blob: c2f1f6b5fb84aaf163dbf182c3b05b3807023aaf [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package parser
2
3import (
4 "encoding/json"
5 "testing"
6
Akronfa55bb22025-05-26 15:10:42 +02007 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akronb7e1f352025-05-16 15:45:23 +02008 "github.com/stretchr/testify/assert"
9 "github.com/stretchr/testify/require"
10)
11
Akron56e09e72025-05-22 15:38:35 +020012// normalizeJSON normalizes JSON by parsing and re-marshaling it
13func normalizeJSON(t *testing.T, data json.RawMessage) json.RawMessage {
14 var v interface{}
15 err := json.Unmarshal(data, &v)
16 require.NoError(t, err)
17
18 // Convert to canonical form (sorted keys, no whitespace)
19 normalized, err := json.Marshal(v)
20 require.NoError(t, err)
21 return normalized
22}
23
24// compareJSON compares two JSON strings for equality, ignoring whitespace and field order
25func compareJSON(t *testing.T, expected, actual string) bool {
26 // Parse both JSON strings
27 var expectedObj, actualObj interface{}
28 err := json.Unmarshal([]byte(expected), &expectedObj)
29 require.NoError(t, err, "Failed to parse expected JSON")
30 err = json.Unmarshal([]byte(actual), &actualObj)
31 require.NoError(t, err, "Failed to parse actual JSON")
32
33 // Convert both to canonical form
34 expectedBytes, err := json.Marshal(expectedObj)
35 require.NoError(t, err)
36 actualBytes, err := json.Marshal(actualObj)
37 require.NoError(t, err)
38
39 // Compare the canonical forms
40 return string(expectedBytes) == string(actualBytes)
41}
42
43// compareNodes compares two AST nodes, normalizing JSON content in CatchallNodes
44func compareNodes(t *testing.T, expected, actual ast.Node) bool {
45 // If both nodes are CatchallNodes, normalize their JSON content before comparison
46 if expectedCatchall, ok := expected.(*ast.CatchallNode); ok {
47 if actualCatchall, ok := actual.(*ast.CatchallNode); ok {
48 // Compare NodeType
49 if !assert.Equal(t, expectedCatchall.NodeType, actualCatchall.NodeType) {
50 t.Logf("NodeType mismatch: expected '%s', got '%s'", expectedCatchall.NodeType, actualCatchall.NodeType)
51 return false
52 }
53
54 // Normalize and compare RawContent
55 if expectedCatchall.RawContent != nil && actualCatchall.RawContent != nil {
56 expectedNorm := normalizeJSON(t, expectedCatchall.RawContent)
57 actualNorm := normalizeJSON(t, actualCatchall.RawContent)
58 if !assert.Equal(t, string(expectedNorm), string(actualNorm)) {
59 t.Logf("RawContent mismatch:\nExpected: %s\nActual: %s", expectedNorm, actualNorm)
60 return false
61 }
62 } else if !assert.Equal(t, expectedCatchall.RawContent == nil, actualCatchall.RawContent == nil) {
63 t.Log("One node has RawContent while the other doesn't")
64 return false
65 }
66
67 // Compare Operands
68 if !assert.Equal(t, len(expectedCatchall.Operands), len(actualCatchall.Operands)) {
69 t.Logf("Operands length mismatch: expected %d, got %d", len(expectedCatchall.Operands), len(actualCatchall.Operands))
70 return false
71 }
72 for i := range expectedCatchall.Operands {
73 if !compareNodes(t, expectedCatchall.Operands[i], actualCatchall.Operands[i]) {
74 t.Logf("Operand %d mismatch", i)
75 return false
76 }
77 }
78
79 // Compare Wrap
80 if expectedCatchall.Wrap != nil || actualCatchall.Wrap != nil {
81 if !assert.Equal(t, expectedCatchall.Wrap != nil, actualCatchall.Wrap != nil) {
82 t.Log("One node has Wrap while the other doesn't")
83 return false
84 }
85 if expectedCatchall.Wrap != nil {
86 if !compareNodes(t, expectedCatchall.Wrap, actualCatchall.Wrap) {
87 t.Log("Wrap node mismatch")
88 return false
89 }
90 }
91 }
92
93 return true
94 }
95 }
96
97 // For Token nodes, compare their Wrap fields using compareNodes
98 if expectedToken, ok := expected.(*ast.Token); ok {
99 if actualToken, ok := actual.(*ast.Token); ok {
100 if expectedToken.Wrap == nil || actualToken.Wrap == nil {
101 return assert.Equal(t, expectedToken.Wrap == nil, actualToken.Wrap == nil)
102 }
103 return compareNodes(t, expectedToken.Wrap, actualToken.Wrap)
104 }
105 }
106
107 // For TermGroup nodes, compare relation and operands
108 if expectedGroup, ok := expected.(*ast.TermGroup); ok {
109 if actualGroup, ok := actual.(*ast.TermGroup); ok {
110 if !assert.Equal(t, expectedGroup.Relation, actualGroup.Relation) {
111 t.Logf("Relation mismatch: expected '%s', got '%s'", expectedGroup.Relation, actualGroup.Relation)
112 return false
113 }
114 if !assert.Equal(t, len(expectedGroup.Operands), len(actualGroup.Operands)) {
115 t.Logf("Operands length mismatch: expected %d, got %d", len(expectedGroup.Operands), len(actualGroup.Operands))
116 return false
117 }
118 for i := range expectedGroup.Operands {
119 if !compareNodes(t, expectedGroup.Operands[i], actualGroup.Operands[i]) {
120 t.Logf("Operand %d mismatch", i)
121 return false
122 }
123 }
124 return true
125 }
126 }
127
128 // For Term nodes, compare all fields
129 if expectedTerm, ok := expected.(*ast.Term); ok {
130 if actualTerm, ok := actual.(*ast.Term); ok {
131 equal := assert.Equal(t, expectedTerm.Foundry, actualTerm.Foundry) &&
132 assert.Equal(t, expectedTerm.Key, actualTerm.Key) &&
133 assert.Equal(t, expectedTerm.Layer, actualTerm.Layer) &&
134 assert.Equal(t, expectedTerm.Match, actualTerm.Match) &&
135 assert.Equal(t, expectedTerm.Value, actualTerm.Value)
136 if !equal {
137 t.Logf("Term mismatch:\nExpected: %+v\nActual: %+v", expectedTerm, actualTerm)
138 }
139 return equal
140 }
141 }
142
143 // For other node types or mismatched types, use regular equality comparison
144 equal := assert.Equal(t, expected, actual)
145 if !equal {
146 t.Logf("Node type mismatch:\nExpected type: %T\nActual type: %T", expected, actual)
147 }
148 return equal
149}
150
Akronb7e1f352025-05-16 15:45:23 +0200151func TestParseJSON(t *testing.T) {
152 tests := []struct {
153 name string
154 input string
155 expected ast.Node
156 wantErr bool
157 }{
158 {
159 name: "Parse simple term",
160 input: `{
161 "@type": "koral:term",
162 "foundry": "opennlp",
163 "key": "DET",
164 "layer": "p",
165 "match": "match:eq"
166 }`,
167 expected: &ast.Term{
168 Foundry: "opennlp",
169 Key: "DET",
170 Layer: "p",
171 Match: ast.MatchEqual,
172 },
173 wantErr: false,
174 },
175 {
176 name: "Parse term group with AND relation",
177 input: `{
178 "@type": "koral:termGroup",
179 "operands": [
180 {
181 "@type": "koral:term",
182 "foundry": "opennlp",
183 "key": "DET",
184 "layer": "p",
185 "match": "match:eq"
186 },
187 {
188 "@type": "koral:term",
189 "foundry": "opennlp",
190 "key": "AdjType",
191 "layer": "m",
192 "match": "match:eq",
193 "value": "Pdt"
194 }
195 ],
196 "relation": "relation:and"
197 }`,
198 expected: &ast.TermGroup{
199 Operands: []ast.Node{
200 &ast.Term{
201 Foundry: "opennlp",
202 Key: "DET",
203 Layer: "p",
204 Match: ast.MatchEqual,
205 },
206 &ast.Term{
207 Foundry: "opennlp",
208 Key: "AdjType",
209 Layer: "m",
210 Match: ast.MatchEqual,
211 Value: "Pdt",
212 },
213 },
214 Relation: ast.AndRelation,
215 },
216 wantErr: false,
217 },
218 {
219 name: "Parse token with wrapped term",
220 input: `{
221 "@type": "koral:token",
222 "wrap": {
223 "@type": "koral:term",
224 "foundry": "opennlp",
225 "key": "DET",
226 "layer": "p",
227 "match": "match:eq"
228 }
229 }`,
230 expected: &ast.Token{
231 Wrap: &ast.Term{
232 Foundry: "opennlp",
233 Key: "DET",
234 Layer: "p",
235 Match: ast.MatchEqual,
236 },
237 },
238 wantErr: false,
239 },
240 {
241 name: "Parse complex nested structure",
242 input: `{
243 "@type": "koral:token",
244 "wrap": {
245 "@type": "koral:termGroup",
246 "operands": [
247 {
248 "@type": "koral:term",
249 "foundry": "opennlp",
250 "key": "DET",
251 "layer": "p",
252 "match": "match:eq"
253 },
254 {
255 "@type": "koral:termGroup",
256 "operands": [
257 {
258 "@type": "koral:term",
259 "foundry": "opennlp",
260 "key": "AdjType",
261 "layer": "m",
262 "match": "match:eq",
263 "value": "Pdt"
264 },
265 {
266 "@type": "koral:term",
267 "foundry": "opennlp",
268 "key": "PronType",
269 "layer": "m",
270 "match": "match:ne",
271 "value": "Neg"
272 }
273 ],
274 "relation": "relation:or"
275 }
276 ],
277 "relation": "relation:and"
278 }
279 }`,
280 expected: &ast.Token{
281 Wrap: &ast.TermGroup{
282 Operands: []ast.Node{
283 &ast.Term{
284 Foundry: "opennlp",
285 Key: "DET",
286 Layer: "p",
287 Match: ast.MatchEqual,
288 },
289 &ast.TermGroup{
290 Operands: []ast.Node{
291 &ast.Term{
292 Foundry: "opennlp",
293 Key: "AdjType",
294 Layer: "m",
295 Match: ast.MatchEqual,
296 Value: "Pdt",
297 },
298 &ast.Term{
299 Foundry: "opennlp",
300 Key: "PronType",
301 Layer: "m",
302 Match: ast.MatchNotEqual,
303 Value: "Neg",
304 },
305 },
306 Relation: ast.OrRelation,
307 },
308 },
309 Relation: ast.AndRelation,
310 },
311 },
312 wantErr: false,
313 },
314 {
315 name: "Invalid JSON",
316 input: `{"invalid": json`,
317 wantErr: true,
318 },
319 {
320 name: "Empty JSON",
321 input: `{}`,
322 wantErr: true,
323 },
324 {
Akron32958422025-05-16 16:33:05 +0200325 name: "Unknown node type",
Akronb7e1f352025-05-16 15:45:23 +0200326 input: `{
327 "@type": "koral:unknown",
328 "key": "value"
329 }`,
Akron32958422025-05-16 16:33:05 +0200330 expected: &ast.CatchallNode{
331 NodeType: "koral:unknown",
332 RawContent: json.RawMessage(`{"@type":"koral:unknown","key":"value"}`),
333 },
334 wantErr: false,
Akronb7e1f352025-05-16 15:45:23 +0200335 },
336 }
337
338 for _, tt := range tests {
339 t.Run(tt.name, func(t *testing.T) {
340 result, err := ParseJSON([]byte(tt.input))
341 if tt.wantErr {
342 assert.Error(t, err)
343 return
344 }
345
346 require.NoError(t, err)
347 assert.Equal(t, tt.expected, result)
348 })
349 }
350}
351
352func TestSerializeToJSON(t *testing.T) {
353 tests := []struct {
354 name string
355 input ast.Node
356 expected string
357 wantErr bool
358 }{
359 {
360 name: "Serialize simple term",
361 input: &ast.Term{
362 Foundry: "opennlp",
363 Key: "DET",
364 Layer: "p",
365 Match: ast.MatchEqual,
366 },
367 expected: `{
368 "@type": "koral:term",
369 "foundry": "opennlp",
370 "key": "DET",
371 "layer": "p",
372 "match": "match:eq"
373}`,
374 wantErr: false,
375 },
376 {
377 name: "Serialize term group",
378 input: &ast.TermGroup{
379 Operands: []ast.Node{
380 &ast.Term{
381 Foundry: "opennlp",
382 Key: "DET",
383 Layer: "p",
384 Match: ast.MatchEqual,
385 },
386 &ast.Term{
387 Foundry: "opennlp",
388 Key: "AdjType",
389 Layer: "m",
390 Match: ast.MatchEqual,
391 Value: "Pdt",
392 },
393 },
394 Relation: ast.AndRelation,
395 },
396 expected: `{
397 "@type": "koral:termGroup",
398 "operands": [
399 {
400 "@type": "koral:term",
401 "foundry": "opennlp",
402 "key": "DET",
403 "layer": "p",
404 "match": "match:eq"
405 },
406 {
407 "@type": "koral:term",
408 "foundry": "opennlp",
409 "key": "AdjType",
410 "layer": "m",
411 "match": "match:eq",
412 "value": "Pdt"
413 }
414 ],
415 "relation": "relation:and"
416}`,
417 wantErr: false,
418 },
Akron32958422025-05-16 16:33:05 +0200419 {
420 name: "Serialize unknown node type",
421 input: &ast.CatchallNode{
422 NodeType: "koral:unknown",
423 RawContent: json.RawMessage(`{
424 "@type": "koral:unknown",
425 "key": "value"
426}`),
427 },
428 expected: `{
429 "@type": "koral:unknown",
430 "key": "value"
431}`,
432 wantErr: false,
433 },
Akronb7e1f352025-05-16 15:45:23 +0200434 }
435
436 for _, tt := range tests {
437 t.Run(tt.name, func(t *testing.T) {
438 result, err := SerializeToJSON(tt.input)
439 if tt.wantErr {
440 assert.Error(t, err)
441 return
442 }
443
444 require.NoError(t, err)
445 // Compare JSON objects instead of raw strings to avoid whitespace issues
Akron56e09e72025-05-22 15:38:35 +0200446 var expected, actual any
Akronb7e1f352025-05-16 15:45:23 +0200447 err = json.Unmarshal([]byte(tt.expected), &expected)
448 require.NoError(t, err)
449 err = json.Unmarshal(result, &actual)
450 require.NoError(t, err)
451 assert.Equal(t, expected, actual)
452 })
453 }
454}
455
456func TestRoundTrip(t *testing.T) {
457 // Test that parsing and then serializing produces equivalent JSON
458 input := `{
459 "@type": "koral:token",
460 "wrap": {
461 "@type": "koral:termGroup",
462 "operands": [
463 {
464 "@type": "koral:term",
465 "foundry": "opennlp",
466 "key": "DET",
467 "layer": "p",
468 "match": "match:eq"
469 },
470 {
471 "@type": "koral:term",
472 "foundry": "opennlp",
473 "key": "AdjType",
474 "layer": "m",
475 "match": "match:eq",
476 "value": "Pdt"
477 }
478 ],
479 "relation": "relation:and"
480 }
481 }`
482
483 // Parse JSON to AST
484 node, err := ParseJSON([]byte(input))
485 require.NoError(t, err)
486
487 // Serialize AST back to JSON
488 output, err := SerializeToJSON(node)
489 require.NoError(t, err)
490
491 // Compare JSON objects
492 var expected, actual interface{}
493 err = json.Unmarshal([]byte(input), &expected)
494 require.NoError(t, err)
495 err = json.Unmarshal(output, &actual)
496 require.NoError(t, err)
497 assert.Equal(t, expected, actual)
498}
Akron32958422025-05-16 16:33:05 +0200499
500func TestRoundTripUnknownType(t *testing.T) {
501 // Test that parsing and then serializing an unknown node type preserves the structure
502 input := `{
503 "@type": "koral:unknown",
504 "key": "value",
505 "wrap": {
506 "@type": "koral:term",
507 "foundry": "opennlp",
508 "key": "DET",
509 "layer": "p",
510 "match": "match:eq"
511 },
512 "operands": [
513 {
514 "@type": "koral:term",
515 "foundry": "opennlp",
516 "key": "AdjType",
517 "layer": "m",
518 "match": "match:eq",
519 "value": "Pdt"
520 }
521 ]
522 }`
523
524 // Parse JSON to AST
525 node, err := ParseJSON([]byte(input))
526 require.NoError(t, err)
527
528 // Check that it's a CatchallNode
529 catchall, ok := node.(*ast.CatchallNode)
530 require.True(t, ok)
531 assert.Equal(t, "koral:unknown", catchall.NodeType)
532
533 // Check that wrap and operands were parsed
534 require.NotNil(t, catchall.Wrap)
535 require.Len(t, catchall.Operands, 1)
536
537 // Serialize AST back to JSON
538 output, err := SerializeToJSON(node)
539 require.NoError(t, err)
540
541 // Compare JSON objects
542 var expected, actual interface{}
543 err = json.Unmarshal([]byte(input), &expected)
544 require.NoError(t, err)
545 err = json.Unmarshal(output, &actual)
546 require.NoError(t, err)
547 assert.Equal(t, expected, actual)
548}
Akron56e09e72025-05-22 15:38:35 +0200549
550func TestParseJSONEdgeCases(t *testing.T) {
551 tests := []struct {
552 name string
553 input string
554 expected ast.Node
555 wantErr bool
556 }{
557 {
558 name: "Unknown node type",
559 input: `{
560 "@type": "koral:unknown",
561 "customField": "value",
562 "wrap": {
563 "@type": "koral:term",
564 "key": "DET"
565 }
566 }`,
567 expected: &ast.CatchallNode{
568 NodeType: "koral:unknown",
569 RawContent: json.RawMessage(`{
570 "@type": "koral:unknown",
571 "customField": "value",
572 "wrap": {
573 "@type": "koral:term",
574 "key": "DET"
575 }
576 }`),
577 Wrap: &ast.Term{
578 Key: "DET",
579 Match: ast.MatchEqual,
580 },
581 },
582 wantErr: false,
583 },
584 {
585 name: "Unknown node with operands",
586 input: `{
587 "@type": "koral:unknown",
588 "operands": [
589 {
590 "@type": "koral:term",
591 "key": "DET"
592 },
593 {
594 "@type": "koral:term",
595 "key": "NOUN"
596 }
597 ]
598 }`,
599 expected: &ast.CatchallNode{
600 NodeType: "koral:unknown",
601 RawContent: json.RawMessage(`{
602 "@type": "koral:unknown",
603 "operands": [
604 {
605 "@type": "koral:term",
606 "key": "DET"
607 },
608 {
609 "@type": "koral:term",
610 "key": "NOUN"
611 }
612 ]
613 }`),
614 Operands: []ast.Node{
615 &ast.Term{
616 Key: "DET",
617 Match: ast.MatchEqual,
618 },
619 &ast.Term{
620 Key: "NOUN",
621 Match: ast.MatchEqual,
622 },
623 },
624 },
625 wantErr: false,
626 },
627 {
628 name: "Deeply nested unknown nodes",
629 input: `{
630 "@type": "koral:outer",
631 "wrap": {
632 "@type": "koral:middle",
633 "wrap": {
634 "@type": "koral:inner",
635 "wrap": {
636 "@type": "koral:term",
637 "key": "DET"
638 }
639 }
640 }
641 }`,
642 expected: &ast.CatchallNode{
643 NodeType: "koral:outer",
644 RawContent: json.RawMessage(`{
645 "@type": "koral:outer",
646 "wrap": {
647 "@type": "koral:middle",
648 "wrap": {
649 "@type": "koral:inner",
650 "wrap": {
651 "@type": "koral:term",
652 "key": "DET"
653 }
654 }
655 }
656 }`),
657 Wrap: &ast.CatchallNode{
658 NodeType: "koral:middle",
659 RawContent: json.RawMessage(`{
660 "@type": "koral:middle",
661 "wrap": {
662 "@type": "koral:inner",
663 "wrap": {
664 "@type": "koral:term",
665 "key": "DET"
666 }
667 }
668 }`),
669 Wrap: &ast.CatchallNode{
670 NodeType: "koral:inner",
671 RawContent: json.RawMessage(`{
672 "@type": "koral:inner",
673 "wrap": {
674 "@type": "koral:term",
675 "key": "DET"
676 }
677 }`),
678 Wrap: &ast.Term{
679 Key: "DET",
680 Match: ast.MatchEqual,
681 },
682 },
683 },
684 },
685 wantErr: false,
686 },
687 {
688 name: "Mixed known and unknown nodes",
689 input: `{
690 "@type": "koral:token",
691 "wrap": {
692 "@type": "koral:custom",
693 "customField": "value",
694 "operands": [
695 {
696 "@type": "koral:termGroup",
697 "operands": [
698 {
699 "@type": "koral:term",
700 "key": "DET"
701 }
702 ],
703 "relation": "relation:and"
704 }
705 ]
706 }
707 }`,
708 expected: &ast.Token{
709 Wrap: &ast.CatchallNode{
710 NodeType: "koral:custom",
711 RawContent: json.RawMessage(`{
712 "@type": "koral:custom",
713 "customField": "value",
714 "operands": [
715 {
716 "@type": "koral:termGroup",
717 "operands": [
718 {
719 "@type": "koral:term",
720 "key": "DET"
721 }
722 ],
723 "relation": "relation:and"
724 }
725 ]
726 }`),
727 Operands: []ast.Node{
728 &ast.TermGroup{
729 Operands: []ast.Node{
730 &ast.Term{
731 Key: "DET",
732 Match: ast.MatchEqual,
733 },
734 },
735 Relation: ast.AndRelation,
736 },
737 },
738 },
739 },
740 wantErr: false,
741 },
742 {
743 name: "Invalid match type",
744 input: `{
745 "@type": "koral:term",
746 "key": "DET",
747 "match": "match:invalid"
748 }`,
749 wantErr: true,
750 },
751 {
752 name: "Invalid relation type",
753 input: `{
754 "@type": "koral:termGroup",
755 "operands": [
756 {
757 "@type": "koral:term",
758 "key": "DET"
759 }
760 ],
761 "relation": "relation:invalid"
762 }`,
763 wantErr: true,
764 },
765 {
766 name: "Empty operands in term group",
767 input: `{
768 "@type": "koral:termGroup",
769 "operands": [],
770 "relation": "relation:and"
771 }`,
772 wantErr: true,
773 },
774 {
775 name: "Null values in term",
776 input: `{
777 "@type": "koral:term",
778 "foundry": null,
779 "key": "DET",
780 "layer": null,
781 "match": null,
782 "value": null
783 }`,
784 expected: &ast.Term{
785 Key: "DET",
786 Match: ast.MatchEqual,
787 },
788 wantErr: false,
789 },
790 }
791
792 for _, tt := range tests {
793 t.Run(tt.name, func(t *testing.T) {
794 result, err := ParseJSON([]byte(tt.input))
795 if tt.wantErr {
796 assert.Error(t, err)
797 return
798 }
799 require.NoError(t, err)
800 compareNodes(t, tt.expected, result)
801 })
802 }
803}