blob: df0761941099041fe78b7c0bb1394a02082e0a66 [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package parser
2
3import (
4 "encoding/json"
5 "testing"
6
Akronfa55bb22025-05-26 15:10:42 +02007 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akronb7e1f352025-05-16 15:45:23 +02008 "github.com/stretchr/testify/assert"
9 "github.com/stretchr/testify/require"
10)
11
Akron56e09e72025-05-22 15:38:35 +020012// normalizeJSON normalizes JSON by parsing and re-marshaling it
13func normalizeJSON(t *testing.T, data json.RawMessage) json.RawMessage {
14 var v interface{}
15 err := json.Unmarshal(data, &v)
16 require.NoError(t, err)
17
18 // Convert to canonical form (sorted keys, no whitespace)
19 normalized, err := json.Marshal(v)
20 require.NoError(t, err)
21 return normalized
22}
23
Akron56e09e72025-05-22 15:38:35 +020024// compareNodes compares two AST nodes, normalizing JSON content in CatchallNodes
25func compareNodes(t *testing.T, expected, actual ast.Node) bool {
26 // If both nodes are CatchallNodes, normalize their JSON content before comparison
27 if expectedCatchall, ok := expected.(*ast.CatchallNode); ok {
28 if actualCatchall, ok := actual.(*ast.CatchallNode); ok {
29 // Compare NodeType
30 if !assert.Equal(t, expectedCatchall.NodeType, actualCatchall.NodeType) {
31 t.Logf("NodeType mismatch: expected '%s', got '%s'", expectedCatchall.NodeType, actualCatchall.NodeType)
32 return false
33 }
34
35 // Normalize and compare RawContent
36 if expectedCatchall.RawContent != nil && actualCatchall.RawContent != nil {
37 expectedNorm := normalizeJSON(t, expectedCatchall.RawContent)
38 actualNorm := normalizeJSON(t, actualCatchall.RawContent)
39 if !assert.Equal(t, string(expectedNorm), string(actualNorm)) {
40 t.Logf("RawContent mismatch:\nExpected: %s\nActual: %s", expectedNorm, actualNorm)
41 return false
42 }
43 } else if !assert.Equal(t, expectedCatchall.RawContent == nil, actualCatchall.RawContent == nil) {
44 t.Log("One node has RawContent while the other doesn't")
45 return false
46 }
47
48 // Compare Operands
49 if !assert.Equal(t, len(expectedCatchall.Operands), len(actualCatchall.Operands)) {
50 t.Logf("Operands length mismatch: expected %d, got %d", len(expectedCatchall.Operands), len(actualCatchall.Operands))
51 return false
52 }
53 for i := range expectedCatchall.Operands {
54 if !compareNodes(t, expectedCatchall.Operands[i], actualCatchall.Operands[i]) {
55 t.Logf("Operand %d mismatch", i)
56 return false
57 }
58 }
59
60 // Compare Wrap
61 if expectedCatchall.Wrap != nil || actualCatchall.Wrap != nil {
62 if !assert.Equal(t, expectedCatchall.Wrap != nil, actualCatchall.Wrap != nil) {
63 t.Log("One node has Wrap while the other doesn't")
64 return false
65 }
66 if expectedCatchall.Wrap != nil {
67 if !compareNodes(t, expectedCatchall.Wrap, actualCatchall.Wrap) {
68 t.Log("Wrap node mismatch")
69 return false
70 }
71 }
72 }
73
74 return true
75 }
76 }
77
78 // For Token nodes, compare their Wrap fields using compareNodes
79 if expectedToken, ok := expected.(*ast.Token); ok {
80 if actualToken, ok := actual.(*ast.Token); ok {
81 if expectedToken.Wrap == nil || actualToken.Wrap == nil {
82 return assert.Equal(t, expectedToken.Wrap == nil, actualToken.Wrap == nil)
83 }
84 return compareNodes(t, expectedToken.Wrap, actualToken.Wrap)
85 }
86 }
87
88 // For TermGroup nodes, compare relation and operands
89 if expectedGroup, ok := expected.(*ast.TermGroup); ok {
90 if actualGroup, ok := actual.(*ast.TermGroup); ok {
91 if !assert.Equal(t, expectedGroup.Relation, actualGroup.Relation) {
92 t.Logf("Relation mismatch: expected '%s', got '%s'", expectedGroup.Relation, actualGroup.Relation)
93 return false
94 }
95 if !assert.Equal(t, len(expectedGroup.Operands), len(actualGroup.Operands)) {
96 t.Logf("Operands length mismatch: expected %d, got %d", len(expectedGroup.Operands), len(actualGroup.Operands))
97 return false
98 }
99 for i := range expectedGroup.Operands {
100 if !compareNodes(t, expectedGroup.Operands[i], actualGroup.Operands[i]) {
101 t.Logf("Operand %d mismatch", i)
102 return false
103 }
104 }
105 return true
106 }
107 }
108
109 // For Term nodes, compare all fields
110 if expectedTerm, ok := expected.(*ast.Term); ok {
111 if actualTerm, ok := actual.(*ast.Term); ok {
112 equal := assert.Equal(t, expectedTerm.Foundry, actualTerm.Foundry) &&
113 assert.Equal(t, expectedTerm.Key, actualTerm.Key) &&
114 assert.Equal(t, expectedTerm.Layer, actualTerm.Layer) &&
115 assert.Equal(t, expectedTerm.Match, actualTerm.Match) &&
116 assert.Equal(t, expectedTerm.Value, actualTerm.Value)
117 if !equal {
118 t.Logf("Term mismatch:\nExpected: %+v\nActual: %+v", expectedTerm, actualTerm)
119 }
120 return equal
121 }
122 }
123
124 // For other node types or mismatched types, use regular equality comparison
125 equal := assert.Equal(t, expected, actual)
126 if !equal {
127 t.Logf("Node type mismatch:\nExpected type: %T\nActual type: %T", expected, actual)
128 }
129 return equal
130}
131
Akronb7e1f352025-05-16 15:45:23 +0200132func TestParseJSON(t *testing.T) {
133 tests := []struct {
134 name string
135 input string
136 expected ast.Node
137 wantErr bool
138 }{
139 {
140 name: "Parse simple term",
141 input: `{
142 "@type": "koral:term",
143 "foundry": "opennlp",
144 "key": "DET",
145 "layer": "p",
146 "match": "match:eq"
147 }`,
148 expected: &ast.Term{
149 Foundry: "opennlp",
150 Key: "DET",
151 Layer: "p",
152 Match: ast.MatchEqual,
153 },
154 wantErr: false,
155 },
156 {
157 name: "Parse term group with AND relation",
158 input: `{
159 "@type": "koral:termGroup",
160 "operands": [
161 {
162 "@type": "koral:term",
163 "foundry": "opennlp",
164 "key": "DET",
165 "layer": "p",
166 "match": "match:eq"
167 },
168 {
169 "@type": "koral:term",
170 "foundry": "opennlp",
171 "key": "AdjType",
172 "layer": "m",
173 "match": "match:eq",
174 "value": "Pdt"
175 }
176 ],
177 "relation": "relation:and"
178 }`,
179 expected: &ast.TermGroup{
180 Operands: []ast.Node{
181 &ast.Term{
182 Foundry: "opennlp",
183 Key: "DET",
184 Layer: "p",
185 Match: ast.MatchEqual,
186 },
187 &ast.Term{
188 Foundry: "opennlp",
189 Key: "AdjType",
190 Layer: "m",
191 Match: ast.MatchEqual,
192 Value: "Pdt",
193 },
194 },
195 Relation: ast.AndRelation,
196 },
197 wantErr: false,
198 },
199 {
200 name: "Parse token with wrapped term",
201 input: `{
202 "@type": "koral:token",
203 "wrap": {
204 "@type": "koral:term",
205 "foundry": "opennlp",
206 "key": "DET",
207 "layer": "p",
208 "match": "match:eq"
209 }
210 }`,
211 expected: &ast.Token{
212 Wrap: &ast.Term{
213 Foundry: "opennlp",
214 Key: "DET",
215 Layer: "p",
216 Match: ast.MatchEqual,
217 },
218 },
219 wantErr: false,
220 },
221 {
222 name: "Parse complex nested structure",
223 input: `{
224 "@type": "koral:token",
225 "wrap": {
226 "@type": "koral:termGroup",
227 "operands": [
228 {
229 "@type": "koral:term",
230 "foundry": "opennlp",
231 "key": "DET",
232 "layer": "p",
233 "match": "match:eq"
234 },
235 {
236 "@type": "koral:termGroup",
237 "operands": [
238 {
239 "@type": "koral:term",
240 "foundry": "opennlp",
241 "key": "AdjType",
242 "layer": "m",
243 "match": "match:eq",
244 "value": "Pdt"
245 },
246 {
247 "@type": "koral:term",
248 "foundry": "opennlp",
249 "key": "PronType",
250 "layer": "m",
251 "match": "match:ne",
252 "value": "Neg"
253 }
254 ],
255 "relation": "relation:or"
256 }
257 ],
258 "relation": "relation:and"
259 }
260 }`,
261 expected: &ast.Token{
262 Wrap: &ast.TermGroup{
263 Operands: []ast.Node{
264 &ast.Term{
265 Foundry: "opennlp",
266 Key: "DET",
267 Layer: "p",
268 Match: ast.MatchEqual,
269 },
270 &ast.TermGroup{
271 Operands: []ast.Node{
272 &ast.Term{
273 Foundry: "opennlp",
274 Key: "AdjType",
275 Layer: "m",
276 Match: ast.MatchEqual,
277 Value: "Pdt",
278 },
279 &ast.Term{
280 Foundry: "opennlp",
281 Key: "PronType",
282 Layer: "m",
283 Match: ast.MatchNotEqual,
284 Value: "Neg",
285 },
286 },
287 Relation: ast.OrRelation,
288 },
289 },
290 Relation: ast.AndRelation,
291 },
292 },
293 wantErr: false,
294 },
295 {
296 name: "Invalid JSON",
297 input: `{"invalid": json`,
298 wantErr: true,
299 },
300 {
301 name: "Empty JSON",
302 input: `{}`,
303 wantErr: true,
304 },
305 {
Akron32958422025-05-16 16:33:05 +0200306 name: "Unknown node type",
Akronb7e1f352025-05-16 15:45:23 +0200307 input: `{
308 "@type": "koral:unknown",
309 "key": "value"
310 }`,
Akron32958422025-05-16 16:33:05 +0200311 expected: &ast.CatchallNode{
312 NodeType: "koral:unknown",
313 RawContent: json.RawMessage(`{"@type":"koral:unknown","key":"value"}`),
314 },
315 wantErr: false,
Akronb7e1f352025-05-16 15:45:23 +0200316 },
317 }
318
319 for _, tt := range tests {
320 t.Run(tt.name, func(t *testing.T) {
321 result, err := ParseJSON([]byte(tt.input))
322 if tt.wantErr {
323 assert.Error(t, err)
324 return
325 }
326
327 require.NoError(t, err)
328 assert.Equal(t, tt.expected, result)
329 })
330 }
331}
332
333func TestSerializeToJSON(t *testing.T) {
334 tests := []struct {
335 name string
336 input ast.Node
337 expected string
338 wantErr bool
339 }{
340 {
341 name: "Serialize simple term",
342 input: &ast.Term{
343 Foundry: "opennlp",
344 Key: "DET",
345 Layer: "p",
346 Match: ast.MatchEqual,
347 },
348 expected: `{
349 "@type": "koral:term",
350 "foundry": "opennlp",
351 "key": "DET",
352 "layer": "p",
353 "match": "match:eq"
354}`,
355 wantErr: false,
356 },
357 {
358 name: "Serialize term group",
359 input: &ast.TermGroup{
360 Operands: []ast.Node{
361 &ast.Term{
362 Foundry: "opennlp",
363 Key: "DET",
364 Layer: "p",
365 Match: ast.MatchEqual,
366 },
367 &ast.Term{
368 Foundry: "opennlp",
369 Key: "AdjType",
370 Layer: "m",
371 Match: ast.MatchEqual,
372 Value: "Pdt",
373 },
374 },
375 Relation: ast.AndRelation,
376 },
377 expected: `{
378 "@type": "koral:termGroup",
379 "operands": [
380 {
381 "@type": "koral:term",
382 "foundry": "opennlp",
383 "key": "DET",
384 "layer": "p",
385 "match": "match:eq"
386 },
387 {
388 "@type": "koral:term",
389 "foundry": "opennlp",
390 "key": "AdjType",
391 "layer": "m",
392 "match": "match:eq",
393 "value": "Pdt"
394 }
395 ],
396 "relation": "relation:and"
397}`,
398 wantErr: false,
399 },
Akron32958422025-05-16 16:33:05 +0200400 {
401 name: "Serialize unknown node type",
402 input: &ast.CatchallNode{
403 NodeType: "koral:unknown",
404 RawContent: json.RawMessage(`{
405 "@type": "koral:unknown",
406 "key": "value"
407}`),
408 },
409 expected: `{
410 "@type": "koral:unknown",
411 "key": "value"
412}`,
413 wantErr: false,
414 },
Akronb7e1f352025-05-16 15:45:23 +0200415 }
416
417 for _, tt := range tests {
418 t.Run(tt.name, func(t *testing.T) {
Akron5ab92b62025-05-26 18:16:27 +0200419 result, err := JSON(tt.input)
Akronb7e1f352025-05-16 15:45:23 +0200420 if tt.wantErr {
421 assert.Error(t, err)
422 return
423 }
424
425 require.NoError(t, err)
426 // Compare JSON objects instead of raw strings to avoid whitespace issues
Akron56e09e72025-05-22 15:38:35 +0200427 var expected, actual any
Akronb7e1f352025-05-16 15:45:23 +0200428 err = json.Unmarshal([]byte(tt.expected), &expected)
429 require.NoError(t, err)
430 err = json.Unmarshal(result, &actual)
431 require.NoError(t, err)
432 assert.Equal(t, expected, actual)
433 })
434 }
435}
436
437func TestRoundTrip(t *testing.T) {
438 // Test that parsing and then serializing produces equivalent JSON
439 input := `{
440 "@type": "koral:token",
441 "wrap": {
442 "@type": "koral:termGroup",
443 "operands": [
444 {
445 "@type": "koral:term",
446 "foundry": "opennlp",
447 "key": "DET",
448 "layer": "p",
449 "match": "match:eq"
450 },
451 {
452 "@type": "koral:term",
453 "foundry": "opennlp",
454 "key": "AdjType",
455 "layer": "m",
456 "match": "match:eq",
457 "value": "Pdt"
458 }
459 ],
460 "relation": "relation:and"
461 }
462 }`
463
464 // Parse JSON to AST
465 node, err := ParseJSON([]byte(input))
466 require.NoError(t, err)
467
468 // Serialize AST back to JSON
Akron5ab92b62025-05-26 18:16:27 +0200469 output, err := JSON(node)
Akronb7e1f352025-05-16 15:45:23 +0200470 require.NoError(t, err)
471
472 // Compare JSON objects
473 var expected, actual interface{}
474 err = json.Unmarshal([]byte(input), &expected)
475 require.NoError(t, err)
476 err = json.Unmarshal(output, &actual)
477 require.NoError(t, err)
478 assert.Equal(t, expected, actual)
479}
Akron32958422025-05-16 16:33:05 +0200480
481func TestRoundTripUnknownType(t *testing.T) {
482 // Test that parsing and then serializing an unknown node type preserves the structure
483 input := `{
484 "@type": "koral:unknown",
485 "key": "value",
486 "wrap": {
487 "@type": "koral:term",
488 "foundry": "opennlp",
489 "key": "DET",
490 "layer": "p",
491 "match": "match:eq"
492 },
493 "operands": [
494 {
495 "@type": "koral:term",
496 "foundry": "opennlp",
497 "key": "AdjType",
498 "layer": "m",
499 "match": "match:eq",
500 "value": "Pdt"
501 }
502 ]
503 }`
504
505 // Parse JSON to AST
506 node, err := ParseJSON([]byte(input))
507 require.NoError(t, err)
508
509 // Check that it's a CatchallNode
510 catchall, ok := node.(*ast.CatchallNode)
511 require.True(t, ok)
512 assert.Equal(t, "koral:unknown", catchall.NodeType)
513
514 // Check that wrap and operands were parsed
515 require.NotNil(t, catchall.Wrap)
516 require.Len(t, catchall.Operands, 1)
517
518 // Serialize AST back to JSON
Akron5ab92b62025-05-26 18:16:27 +0200519 output, err := JSON(node)
Akron32958422025-05-16 16:33:05 +0200520 require.NoError(t, err)
521
522 // Compare JSON objects
523 var expected, actual interface{}
524 err = json.Unmarshal([]byte(input), &expected)
525 require.NoError(t, err)
526 err = json.Unmarshal(output, &actual)
527 require.NoError(t, err)
528 assert.Equal(t, expected, actual)
529}
Akron56e09e72025-05-22 15:38:35 +0200530
531func TestParseJSONEdgeCases(t *testing.T) {
532 tests := []struct {
533 name string
534 input string
535 expected ast.Node
536 wantErr bool
537 }{
538 {
539 name: "Unknown node type",
540 input: `{
541 "@type": "koral:unknown",
542 "customField": "value",
543 "wrap": {
544 "@type": "koral:term",
545 "key": "DET"
546 }
547 }`,
548 expected: &ast.CatchallNode{
549 NodeType: "koral:unknown",
550 RawContent: json.RawMessage(`{
551 "@type": "koral:unknown",
552 "customField": "value",
553 "wrap": {
554 "@type": "koral:term",
555 "key": "DET"
556 }
557 }`),
558 Wrap: &ast.Term{
559 Key: "DET",
560 Match: ast.MatchEqual,
561 },
562 },
563 wantErr: false,
564 },
565 {
566 name: "Unknown node with operands",
567 input: `{
568 "@type": "koral:unknown",
569 "operands": [
570 {
571 "@type": "koral:term",
572 "key": "DET"
573 },
574 {
575 "@type": "koral:term",
576 "key": "NOUN"
577 }
578 ]
579 }`,
580 expected: &ast.CatchallNode{
581 NodeType: "koral:unknown",
582 RawContent: json.RawMessage(`{
583 "@type": "koral:unknown",
584 "operands": [
585 {
586 "@type": "koral:term",
587 "key": "DET"
588 },
589 {
590 "@type": "koral:term",
591 "key": "NOUN"
592 }
593 ]
594 }`),
595 Operands: []ast.Node{
596 &ast.Term{
597 Key: "DET",
598 Match: ast.MatchEqual,
599 },
600 &ast.Term{
601 Key: "NOUN",
602 Match: ast.MatchEqual,
603 },
604 },
605 },
606 wantErr: false,
607 },
608 {
609 name: "Deeply nested unknown nodes",
610 input: `{
611 "@type": "koral:outer",
612 "wrap": {
613 "@type": "koral:middle",
614 "wrap": {
615 "@type": "koral:inner",
616 "wrap": {
617 "@type": "koral:term",
618 "key": "DET"
619 }
620 }
621 }
622 }`,
623 expected: &ast.CatchallNode{
624 NodeType: "koral:outer",
625 RawContent: json.RawMessage(`{
626 "@type": "koral:outer",
627 "wrap": {
628 "@type": "koral:middle",
629 "wrap": {
630 "@type": "koral:inner",
631 "wrap": {
632 "@type": "koral:term",
633 "key": "DET"
634 }
635 }
636 }
637 }`),
638 Wrap: &ast.CatchallNode{
639 NodeType: "koral:middle",
640 RawContent: json.RawMessage(`{
641 "@type": "koral:middle",
642 "wrap": {
643 "@type": "koral:inner",
644 "wrap": {
645 "@type": "koral:term",
646 "key": "DET"
647 }
648 }
649 }`),
650 Wrap: &ast.CatchallNode{
651 NodeType: "koral:inner",
652 RawContent: json.RawMessage(`{
653 "@type": "koral:inner",
654 "wrap": {
655 "@type": "koral:term",
656 "key": "DET"
657 }
658 }`),
659 Wrap: &ast.Term{
660 Key: "DET",
661 Match: ast.MatchEqual,
662 },
663 },
664 },
665 },
666 wantErr: false,
667 },
668 {
669 name: "Mixed known and unknown nodes",
670 input: `{
671 "@type": "koral:token",
672 "wrap": {
673 "@type": "koral:custom",
674 "customField": "value",
675 "operands": [
676 {
677 "@type": "koral:termGroup",
678 "operands": [
679 {
680 "@type": "koral:term",
681 "key": "DET"
682 }
683 ],
684 "relation": "relation:and"
685 }
686 ]
687 }
688 }`,
689 expected: &ast.Token{
690 Wrap: &ast.CatchallNode{
691 NodeType: "koral:custom",
692 RawContent: json.RawMessage(`{
693 "@type": "koral:custom",
694 "customField": "value",
695 "operands": [
696 {
697 "@type": "koral:termGroup",
698 "operands": [
699 {
700 "@type": "koral:term",
701 "key": "DET"
702 }
703 ],
704 "relation": "relation:and"
705 }
706 ]
707 }`),
708 Operands: []ast.Node{
709 &ast.TermGroup{
710 Operands: []ast.Node{
711 &ast.Term{
712 Key: "DET",
713 Match: ast.MatchEqual,
714 },
715 },
716 Relation: ast.AndRelation,
717 },
718 },
719 },
720 },
721 wantErr: false,
722 },
723 {
724 name: "Invalid match type",
725 input: `{
726 "@type": "koral:term",
727 "key": "DET",
728 "match": "match:invalid"
729 }`,
730 wantErr: true,
731 },
732 {
733 name: "Invalid relation type",
734 input: `{
735 "@type": "koral:termGroup",
736 "operands": [
737 {
738 "@type": "koral:term",
739 "key": "DET"
740 }
741 ],
742 "relation": "relation:invalid"
743 }`,
744 wantErr: true,
745 },
746 {
747 name: "Empty operands in term group",
748 input: `{
749 "@type": "koral:termGroup",
750 "operands": [],
751 "relation": "relation:and"
752 }`,
753 wantErr: true,
754 },
755 {
756 name: "Null values in term",
757 input: `{
758 "@type": "koral:term",
759 "foundry": null,
760 "key": "DET",
761 "layer": null,
762 "match": null,
763 "value": null
764 }`,
765 expected: &ast.Term{
766 Key: "DET",
767 Match: ast.MatchEqual,
768 },
769 wantErr: false,
770 },
771 }
772
773 for _, tt := range tests {
774 t.Run(tt.name, func(t *testing.T) {
775 result, err := ParseJSON([]byte(tt.input))
776 if tt.wantErr {
777 assert.Error(t, err)
778 return
779 }
780 require.NoError(t, err)
781 compareNodes(t, tt.expected, result)
782 })
783 }
784}