blob: d2964ce823337a72243d83363cb2a059fd5e81fe [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package parser
2
3import (
4 "encoding/json"
5 "testing"
6
7 "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
8 "github.com/stretchr/testify/assert"
9 "github.com/stretchr/testify/require"
10)
11
12func TestParseJSON(t *testing.T) {
13 tests := []struct {
14 name string
15 input string
16 expected ast.Node
17 wantErr bool
18 }{
19 {
20 name: "Parse simple term",
21 input: `{
22 "@type": "koral:term",
23 "foundry": "opennlp",
24 "key": "DET",
25 "layer": "p",
26 "match": "match:eq"
27 }`,
28 expected: &ast.Term{
29 Foundry: "opennlp",
30 Key: "DET",
31 Layer: "p",
32 Match: ast.MatchEqual,
33 },
34 wantErr: false,
35 },
36 {
37 name: "Parse term group with AND relation",
38 input: `{
39 "@type": "koral:termGroup",
40 "operands": [
41 {
42 "@type": "koral:term",
43 "foundry": "opennlp",
44 "key": "DET",
45 "layer": "p",
46 "match": "match:eq"
47 },
48 {
49 "@type": "koral:term",
50 "foundry": "opennlp",
51 "key": "AdjType",
52 "layer": "m",
53 "match": "match:eq",
54 "value": "Pdt"
55 }
56 ],
57 "relation": "relation:and"
58 }`,
59 expected: &ast.TermGroup{
60 Operands: []ast.Node{
61 &ast.Term{
62 Foundry: "opennlp",
63 Key: "DET",
64 Layer: "p",
65 Match: ast.MatchEqual,
66 },
67 &ast.Term{
68 Foundry: "opennlp",
69 Key: "AdjType",
70 Layer: "m",
71 Match: ast.MatchEqual,
72 Value: "Pdt",
73 },
74 },
75 Relation: ast.AndRelation,
76 },
77 wantErr: false,
78 },
79 {
80 name: "Parse token with wrapped term",
81 input: `{
82 "@type": "koral:token",
83 "wrap": {
84 "@type": "koral:term",
85 "foundry": "opennlp",
86 "key": "DET",
87 "layer": "p",
88 "match": "match:eq"
89 }
90 }`,
91 expected: &ast.Token{
92 Wrap: &ast.Term{
93 Foundry: "opennlp",
94 Key: "DET",
95 Layer: "p",
96 Match: ast.MatchEqual,
97 },
98 },
99 wantErr: false,
100 },
101 {
102 name: "Parse complex nested structure",
103 input: `{
104 "@type": "koral:token",
105 "wrap": {
106 "@type": "koral:termGroup",
107 "operands": [
108 {
109 "@type": "koral:term",
110 "foundry": "opennlp",
111 "key": "DET",
112 "layer": "p",
113 "match": "match:eq"
114 },
115 {
116 "@type": "koral:termGroup",
117 "operands": [
118 {
119 "@type": "koral:term",
120 "foundry": "opennlp",
121 "key": "AdjType",
122 "layer": "m",
123 "match": "match:eq",
124 "value": "Pdt"
125 },
126 {
127 "@type": "koral:term",
128 "foundry": "opennlp",
129 "key": "PronType",
130 "layer": "m",
131 "match": "match:ne",
132 "value": "Neg"
133 }
134 ],
135 "relation": "relation:or"
136 }
137 ],
138 "relation": "relation:and"
139 }
140 }`,
141 expected: &ast.Token{
142 Wrap: &ast.TermGroup{
143 Operands: []ast.Node{
144 &ast.Term{
145 Foundry: "opennlp",
146 Key: "DET",
147 Layer: "p",
148 Match: ast.MatchEqual,
149 },
150 &ast.TermGroup{
151 Operands: []ast.Node{
152 &ast.Term{
153 Foundry: "opennlp",
154 Key: "AdjType",
155 Layer: "m",
156 Match: ast.MatchEqual,
157 Value: "Pdt",
158 },
159 &ast.Term{
160 Foundry: "opennlp",
161 Key: "PronType",
162 Layer: "m",
163 Match: ast.MatchNotEqual,
164 Value: "Neg",
165 },
166 },
167 Relation: ast.OrRelation,
168 },
169 },
170 Relation: ast.AndRelation,
171 },
172 },
173 wantErr: false,
174 },
175 {
176 name: "Invalid JSON",
177 input: `{"invalid": json`,
178 wantErr: true,
179 },
180 {
181 name: "Empty JSON",
182 input: `{}`,
183 wantErr: true,
184 },
185 {
Akron32958422025-05-16 16:33:05 +0200186 name: "Unknown node type",
Akronb7e1f352025-05-16 15:45:23 +0200187 input: `{
188 "@type": "koral:unknown",
189 "key": "value"
190 }`,
Akron32958422025-05-16 16:33:05 +0200191 expected: &ast.CatchallNode{
192 NodeType: "koral:unknown",
193 RawContent: json.RawMessage(`{"@type":"koral:unknown","key":"value"}`),
194 },
195 wantErr: false,
Akronb7e1f352025-05-16 15:45:23 +0200196 },
197 }
198
199 for _, tt := range tests {
200 t.Run(tt.name, func(t *testing.T) {
201 result, err := ParseJSON([]byte(tt.input))
202 if tt.wantErr {
203 assert.Error(t, err)
204 return
205 }
206
207 require.NoError(t, err)
208 assert.Equal(t, tt.expected, result)
209 })
210 }
211}
212
213func TestSerializeToJSON(t *testing.T) {
214 tests := []struct {
215 name string
216 input ast.Node
217 expected string
218 wantErr bool
219 }{
220 {
221 name: "Serialize simple term",
222 input: &ast.Term{
223 Foundry: "opennlp",
224 Key: "DET",
225 Layer: "p",
226 Match: ast.MatchEqual,
227 },
228 expected: `{
229 "@type": "koral:term",
230 "foundry": "opennlp",
231 "key": "DET",
232 "layer": "p",
233 "match": "match:eq"
234}`,
235 wantErr: false,
236 },
237 {
238 name: "Serialize term group",
239 input: &ast.TermGroup{
240 Operands: []ast.Node{
241 &ast.Term{
242 Foundry: "opennlp",
243 Key: "DET",
244 Layer: "p",
245 Match: ast.MatchEqual,
246 },
247 &ast.Term{
248 Foundry: "opennlp",
249 Key: "AdjType",
250 Layer: "m",
251 Match: ast.MatchEqual,
252 Value: "Pdt",
253 },
254 },
255 Relation: ast.AndRelation,
256 },
257 expected: `{
258 "@type": "koral:termGroup",
259 "operands": [
260 {
261 "@type": "koral:term",
262 "foundry": "opennlp",
263 "key": "DET",
264 "layer": "p",
265 "match": "match:eq"
266 },
267 {
268 "@type": "koral:term",
269 "foundry": "opennlp",
270 "key": "AdjType",
271 "layer": "m",
272 "match": "match:eq",
273 "value": "Pdt"
274 }
275 ],
276 "relation": "relation:and"
277}`,
278 wantErr: false,
279 },
Akron32958422025-05-16 16:33:05 +0200280 {
281 name: "Serialize unknown node type",
282 input: &ast.CatchallNode{
283 NodeType: "koral:unknown",
284 RawContent: json.RawMessage(`{
285 "@type": "koral:unknown",
286 "key": "value"
287}`),
288 },
289 expected: `{
290 "@type": "koral:unknown",
291 "key": "value"
292}`,
293 wantErr: false,
294 },
Akronb7e1f352025-05-16 15:45:23 +0200295 }
296
297 for _, tt := range tests {
298 t.Run(tt.name, func(t *testing.T) {
299 result, err := SerializeToJSON(tt.input)
300 if tt.wantErr {
301 assert.Error(t, err)
302 return
303 }
304
305 require.NoError(t, err)
306 // Compare JSON objects instead of raw strings to avoid whitespace issues
307 var expected, actual interface{}
308 err = json.Unmarshal([]byte(tt.expected), &expected)
309 require.NoError(t, err)
310 err = json.Unmarshal(result, &actual)
311 require.NoError(t, err)
312 assert.Equal(t, expected, actual)
313 })
314 }
315}
316
317func TestRoundTrip(t *testing.T) {
318 // Test that parsing and then serializing produces equivalent JSON
319 input := `{
320 "@type": "koral:token",
321 "wrap": {
322 "@type": "koral:termGroup",
323 "operands": [
324 {
325 "@type": "koral:term",
326 "foundry": "opennlp",
327 "key": "DET",
328 "layer": "p",
329 "match": "match:eq"
330 },
331 {
332 "@type": "koral:term",
333 "foundry": "opennlp",
334 "key": "AdjType",
335 "layer": "m",
336 "match": "match:eq",
337 "value": "Pdt"
338 }
339 ],
340 "relation": "relation:and"
341 }
342 }`
343
344 // Parse JSON to AST
345 node, err := ParseJSON([]byte(input))
346 require.NoError(t, err)
347
348 // Serialize AST back to JSON
349 output, err := SerializeToJSON(node)
350 require.NoError(t, err)
351
352 // Compare JSON objects
353 var expected, actual interface{}
354 err = json.Unmarshal([]byte(input), &expected)
355 require.NoError(t, err)
356 err = json.Unmarshal(output, &actual)
357 require.NoError(t, err)
358 assert.Equal(t, expected, actual)
359}
Akron32958422025-05-16 16:33:05 +0200360
361func TestRoundTripUnknownType(t *testing.T) {
362 // Test that parsing and then serializing an unknown node type preserves the structure
363 input := `{
364 "@type": "koral:unknown",
365 "key": "value",
366 "wrap": {
367 "@type": "koral:term",
368 "foundry": "opennlp",
369 "key": "DET",
370 "layer": "p",
371 "match": "match:eq"
372 },
373 "operands": [
374 {
375 "@type": "koral:term",
376 "foundry": "opennlp",
377 "key": "AdjType",
378 "layer": "m",
379 "match": "match:eq",
380 "value": "Pdt"
381 }
382 ]
383 }`
384
385 // Parse JSON to AST
386 node, err := ParseJSON([]byte(input))
387 require.NoError(t, err)
388
389 // Check that it's a CatchallNode
390 catchall, ok := node.(*ast.CatchallNode)
391 require.True(t, ok)
392 assert.Equal(t, "koral:unknown", catchall.NodeType)
393
394 // Check that wrap and operands were parsed
395 require.NotNil(t, catchall.Wrap)
396 require.Len(t, catchall.Operands, 1)
397
398 // Serialize AST back to JSON
399 output, err := SerializeToJSON(node)
400 require.NoError(t, err)
401
402 // Compare JSON objects
403 var expected, actual interface{}
404 err = json.Unmarshal([]byte(input), &expected)
405 require.NoError(t, err)
406 err = json.Unmarshal(output, &actual)
407 require.NoError(t, err)
408 assert.Equal(t, expected, actual)
409}