blob: 47af1f941d59c64013540307ab9b024f3ed954dc [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package matcher
2
Akronbf5149c2025-05-20 15:53:41 +02003// matcher is a function that takes a pattern and a node and returns true if the node matches the pattern.
4// It is used to match a pattern against a node in the AST.
5
Akronb7e1f352025-05-16 15:45:23 +02006import (
Akronbf5149c2025-05-20 15:53:41 +02007 "encoding/json"
Akronb7e1f352025-05-16 15:45:23 +02008 "testing"
9
10 "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
11 "github.com/stretchr/testify/assert"
12)
13
Akrond5850f82025-05-23 16:44:44 +020014func TestNewMatcherValidation(t *testing.T) {
15 tests := []struct {
16 name string
17 pattern ast.Pattern
18 replacement ast.Replacement
19 expectedError string
20 }{
21 {
22 name: "Valid pattern and replacement",
23 pattern: ast.Pattern{
24 Root: &ast.Term{
25 Foundry: "opennlp",
26 Key: "DET",
27 Layer: "p",
28 Match: ast.MatchEqual,
29 },
30 },
31 replacement: ast.Replacement{
32 Root: &ast.Term{
33 Foundry: "opennlp",
34 Key: "COMBINED_DET",
35 Layer: "p",
36 Match: ast.MatchEqual,
37 },
38 },
39 expectedError: "",
40 },
41 {
42 name: "Invalid pattern - CatchallNode",
43 pattern: ast.Pattern{
44 Root: &ast.CatchallNode{
45 NodeType: "custom",
46 },
47 },
48 replacement: ast.Replacement{
49 Root: &ast.Term{
50 Foundry: "opennlp",
51 Key: "DET",
52 Layer: "p",
53 Match: ast.MatchEqual,
54 },
55 },
56 expectedError: "invalid pattern: catchall nodes are not allowed in pattern/replacement ASTs",
57 },
58 {
59 name: "Invalid replacement - CatchallNode",
60 pattern: ast.Pattern{
61 Root: &ast.Term{
62 Foundry: "opennlp",
63 Key: "DET",
64 Layer: "p",
65 Match: ast.MatchEqual,
66 },
67 },
68 replacement: ast.Replacement{
69 Root: &ast.CatchallNode{
70 NodeType: "custom",
71 },
72 },
73 expectedError: "invalid replacement: catchall nodes are not allowed in pattern/replacement ASTs",
74 },
75 {
76 name: "Invalid pattern - Empty TermGroup",
77 pattern: ast.Pattern{
78 Root: &ast.TermGroup{
79 Operands: []ast.Node{},
80 Relation: ast.AndRelation,
81 },
82 },
83 replacement: ast.Replacement{
84 Root: &ast.Term{
85 Foundry: "opennlp",
86 Key: "DET",
87 Layer: "p",
88 Match: ast.MatchEqual,
89 },
90 },
91 expectedError: "invalid pattern: empty term group",
92 },
93 {
94 name: "Invalid pattern - Nested CatchallNode",
95 pattern: ast.Pattern{
96 Root: &ast.TermGroup{
97 Operands: []ast.Node{
98 &ast.Term{
99 Foundry: "opennlp",
100 Key: "DET",
101 Layer: "p",
102 Match: ast.MatchEqual,
103 },
104 &ast.CatchallNode{
105 NodeType: "custom",
106 },
107 },
108 Relation: ast.AndRelation,
109 },
110 },
111 replacement: ast.Replacement{
112 Root: &ast.Term{
113 Foundry: "opennlp",
114 Key: "DET",
115 Layer: "p",
116 Match: ast.MatchEqual,
117 },
118 },
119 expectedError: "invalid pattern: invalid operand: catchall nodes are not allowed in pattern/replacement ASTs",
120 },
121 }
122
123 for _, tt := range tests {
124 t.Run(tt.name, func(t *testing.T) {
125 matcher, err := NewMatcher(tt.pattern, tt.replacement)
126 if tt.expectedError != "" {
127 assert.Error(t, err)
128 assert.Equal(t, tt.expectedError, err.Error())
129 assert.Nil(t, matcher)
130 } else {
131 assert.NoError(t, err)
132 assert.NotNil(t, matcher)
133 }
134 })
135 }
136}
137
Akronb7e1f352025-05-16 15:45:23 +0200138func TestMatchSimplePattern(t *testing.T) {
139 // Create a simple pattern: match a term with DET
140 pattern := ast.Pattern{
141 Root: &ast.Term{
142 Foundry: "opennlp",
143 Key: "DET",
144 Layer: "p",
145 Match: ast.MatchEqual,
146 },
147 }
148
149 // Create a simple replacement
150 replacement := ast.Replacement{
151 Root: &ast.Term{
152 Foundry: "opennlp",
153 Key: "COMBINED_DET",
154 Layer: "p",
155 Match: ast.MatchEqual,
156 },
157 }
158
Akrond5850f82025-05-23 16:44:44 +0200159 m, err := NewMatcher(pattern, replacement)
160 assert.NoError(t, err)
161 assert.NotNil(t, m)
Akronb7e1f352025-05-16 15:45:23 +0200162
163 tests := []struct {
164 name string
165 input ast.Node
166 expected bool
167 }{
168 {
169 name: "Exact match",
170 input: &ast.Term{
171 Foundry: "opennlp",
172 Key: "DET",
173 Layer: "p",
174 Match: ast.MatchEqual,
175 },
176 expected: true,
177 },
178 {
179 name: "Different key",
180 input: &ast.Term{
181 Foundry: "opennlp",
182 Key: "NOUN",
183 Layer: "p",
184 Match: ast.MatchEqual,
185 },
186 expected: false,
187 },
188 {
189 name: "Different foundry",
190 input: &ast.Term{
191 Foundry: "different",
192 Key: "DET",
193 Layer: "p",
194 Match: ast.MatchEqual,
195 },
196 expected: false,
197 },
198 {
199 name: "Different match type",
200 input: &ast.Term{
201 Foundry: "opennlp",
202 Key: "DET",
203 Layer: "p",
204 Match: ast.MatchNotEqual,
205 },
206 expected: false,
207 },
208 {
Akronbf5149c2025-05-20 15:53:41 +0200209 name: "Nested node",
Akronb7e1f352025-05-16 15:45:23 +0200210 input: &ast.Token{
211 Wrap: &ast.Term{
212 Foundry: "opennlp",
213 Key: "DET",
214 Layer: "p",
215 Match: ast.MatchEqual,
216 },
217 },
Akronbf5149c2025-05-20 15:53:41 +0200218 expected: true,
Akronb7e1f352025-05-16 15:45:23 +0200219 },
220 }
221
222 for _, tt := range tests {
223 t.Run(tt.name, func(t *testing.T) {
224 result := m.Match(tt.input)
225 assert.Equal(t, tt.expected, result)
226 })
227 }
228}
229
230func TestMatchComplexPattern(t *testing.T) {
231 // Create a complex pattern: DET AND (AdjType=Pdt OR PronType=Ind)
232 pattern := ast.Pattern{
233 Root: &ast.Token{
234 Wrap: &ast.TermGroup{
235 Operands: []ast.Node{
236 &ast.Term{
237 Foundry: "opennlp",
238 Key: "DET",
239 Layer: "p",
240 Match: ast.MatchEqual,
241 },
242 &ast.TermGroup{
243 Operands: []ast.Node{
244 &ast.Term{
245 Foundry: "opennlp",
246 Key: "AdjType",
247 Layer: "m",
248 Match: ast.MatchEqual,
249 Value: "Pdt",
250 },
251 &ast.Term{
252 Foundry: "opennlp",
253 Key: "PronType",
254 Layer: "m",
255 Match: ast.MatchEqual,
256 Value: "Ind",
257 },
258 },
259 Relation: ast.OrRelation,
260 },
261 },
262 Relation: ast.AndRelation,
263 },
264 },
265 }
266
267 replacement := ast.Replacement{
268 Root: &ast.Token{
269 Wrap: &ast.Term{
270 Foundry: "opennlp",
271 Key: "COMBINED_DET",
272 Layer: "p",
273 Match: ast.MatchEqual,
274 },
275 },
276 }
277
Akrond5850f82025-05-23 16:44:44 +0200278 m, err := NewMatcher(pattern, replacement)
279 assert.NoError(t, err)
280 assert.NotNil(t, m)
Akronb7e1f352025-05-16 15:45:23 +0200281
282 tests := []struct {
283 name string
284 input ast.Node
285 expected bool
286 }{
287 {
288 name: "Match with AdjType=Pdt",
289 input: &ast.Token{
290 Wrap: &ast.TermGroup{
291 Operands: []ast.Node{
292 &ast.Term{
293 Foundry: "opennlp",
294 Key: "DET",
295 Layer: "p",
296 Match: ast.MatchEqual,
297 },
298 &ast.Term{
299 Foundry: "opennlp",
300 Key: "AdjType",
301 Layer: "m",
302 Match: ast.MatchEqual,
303 Value: "Pdt",
304 },
305 },
306 Relation: ast.AndRelation,
307 },
308 },
309 expected: true,
310 },
311 {
312 name: "Match with PronType=Ind",
313 input: &ast.Token{
314 Wrap: &ast.TermGroup{
315 Operands: []ast.Node{
316 &ast.Term{
317 Foundry: "opennlp",
318 Key: "DET",
319 Layer: "p",
320 Match: ast.MatchEqual,
321 },
322 &ast.Term{
323 Foundry: "opennlp",
324 Key: "PronType",
325 Layer: "m",
326 Match: ast.MatchEqual,
327 Value: "Ind",
328 },
329 },
330 Relation: ast.AndRelation,
331 },
332 },
333 expected: true,
334 },
335 {
336 name: "No match - missing DET",
337 input: &ast.Token{
338 Wrap: &ast.TermGroup{
339 Operands: []ast.Node{
340 &ast.Term{
341 Foundry: "opennlp",
342 Key: "NOUN",
343 Layer: "p",
344 Match: ast.MatchEqual,
345 },
346 &ast.Term{
347 Foundry: "opennlp",
348 Key: "AdjType",
349 Layer: "m",
350 Match: ast.MatchEqual,
351 Value: "Pdt",
352 },
353 },
354 Relation: ast.AndRelation,
355 },
356 },
357 expected: false,
358 },
359 {
360 name: "No match - wrong value",
361 input: &ast.Token{
362 Wrap: &ast.TermGroup{
363 Operands: []ast.Node{
364 &ast.Term{
365 Foundry: "opennlp",
366 Key: "DET",
367 Layer: "p",
368 Match: ast.MatchEqual,
369 },
370 &ast.Term{
371 Foundry: "opennlp",
372 Key: "AdjType",
373 Layer: "m",
374 Match: ast.MatchEqual,
375 Value: "Wrong",
376 },
377 },
378 Relation: ast.AndRelation,
379 },
380 },
381 expected: false,
382 },
383 }
384
385 for _, tt := range tests {
386 t.Run(tt.name, func(t *testing.T) {
387 result := m.Match(tt.input)
388 assert.Equal(t, tt.expected, result)
389 })
390 }
391}
392
393func TestReplace(t *testing.T) {
Akronb7e1f352025-05-16 15:45:23 +0200394 pattern := ast.Pattern{
Akrond5850f82025-05-23 16:44:44 +0200395 Root: &ast.Term{
396 Foundry: "opennlp",
397 Key: "DET",
398 Layer: "p",
399 Match: ast.MatchEqual,
Akronb7e1f352025-05-16 15:45:23 +0200400 },
401 }
402
403 replacement := ast.Replacement{
404 Root: &ast.Term{
405 Foundry: "opennlp",
406 Key: "COMBINED_DET",
407 Layer: "p",
408 Match: ast.MatchEqual,
409 },
410 }
411
Akrond5850f82025-05-23 16:44:44 +0200412 m, err := NewMatcher(pattern, replacement)
413 assert.NoError(t, err)
414 assert.NotNil(t, m)
Akronb7e1f352025-05-16 15:45:23 +0200415
416 tests := []struct {
417 name string
418 input ast.Node
419 expected ast.Node
420 }{
421 {
422 name: "Replace matching pattern",
423 input: &ast.TermGroup{
424 Operands: []ast.Node{
425 &ast.Term{
426 Foundry: "opennlp",
427 Key: "DET",
428 Layer: "p",
429 Match: ast.MatchEqual,
430 },
431 &ast.Term{
432 Foundry: "opennlp",
433 Key: "AdjType",
434 Layer: "m",
435 Match: ast.MatchEqual,
436 Value: "Pdt",
437 },
438 },
439 Relation: ast.AndRelation,
440 },
Akrond5850f82025-05-23 16:44:44 +0200441 expected: &ast.TermGroup{
442 Operands: []ast.Node{
443 &ast.Term{
444 Foundry: "opennlp",
445 Key: "COMBINED_DET",
446 Layer: "p",
447 Match: ast.MatchEqual,
448 },
449 &ast.Term{
450 Foundry: "opennlp",
451 Key: "AdjType",
452 Layer: "m",
453 Match: ast.MatchEqual,
454 Value: "Pdt",
455 },
456 },
457 Relation: ast.AndRelation,
Akronb7e1f352025-05-16 15:45:23 +0200458 },
459 },
460 {
461 name: "No replacement for non-matching pattern",
462 input: &ast.TermGroup{
463 Operands: []ast.Node{
464 &ast.Term{
465 Foundry: "opennlp",
466 Key: "NOUN",
467 Layer: "p",
468 Match: ast.MatchEqual,
469 },
470 &ast.Term{
471 Foundry: "opennlp",
472 Key: "AdjType",
473 Layer: "m",
474 Match: ast.MatchEqual,
475 Value: "Pdt",
476 },
477 },
478 Relation: ast.AndRelation,
479 },
480 expected: &ast.TermGroup{
481 Operands: []ast.Node{
482 &ast.Term{
483 Foundry: "opennlp",
484 Key: "NOUN",
485 Layer: "p",
486 Match: ast.MatchEqual,
487 },
488 &ast.Term{
489 Foundry: "opennlp",
490 Key: "AdjType",
491 Layer: "m",
492 Match: ast.MatchEqual,
493 Value: "Pdt",
494 },
495 },
496 Relation: ast.AndRelation,
497 },
498 },
499 {
500 name: "Replace in nested structure",
501 input: &ast.Token{
502 Wrap: &ast.TermGroup{
503 Operands: []ast.Node{
504 &ast.TermGroup{
505 Operands: []ast.Node{
506 &ast.Term{
507 Foundry: "opennlp",
508 Key: "DET",
509 Layer: "p",
510 Match: ast.MatchEqual,
511 },
512 &ast.Term{
513 Foundry: "opennlp",
514 Key: "AdjType",
515 Layer: "m",
516 Match: ast.MatchEqual,
517 Value: "Pdt",
518 },
519 },
520 Relation: ast.AndRelation,
521 },
522 &ast.Term{
523 Foundry: "opennlp",
524 Key: "NOUN",
525 Layer: "p",
526 Match: ast.MatchEqual,
527 },
528 },
529 Relation: ast.AndRelation,
530 },
531 },
532 expected: &ast.Token{
533 Wrap: &ast.TermGroup{
534 Operands: []ast.Node{
535 &ast.Term{
536 Foundry: "opennlp",
537 Key: "COMBINED_DET",
538 Layer: "p",
539 Match: ast.MatchEqual,
540 },
541 &ast.Term{
542 Foundry: "opennlp",
543 Key: "NOUN",
544 Layer: "p",
545 Match: ast.MatchEqual,
546 },
547 },
548 Relation: ast.AndRelation,
549 },
550 },
551 },
552 }
553
554 for _, tt := range tests {
555 t.Run(tt.name, func(t *testing.T) {
556 result := m.Replace(tt.input)
557 assert.Equal(t, tt.expected, result)
558 })
559 }
560}
561
562func TestMatchNodeOrder(t *testing.T) {
Akronb7e1f352025-05-16 15:45:23 +0200563 pattern := ast.Pattern{
564 Root: &ast.TermGroup{
565 Operands: []ast.Node{
566 &ast.Term{
567 Foundry: "opennlp",
568 Key: "DET",
569 Layer: "p",
570 Match: ast.MatchEqual,
571 },
572 &ast.Term{
573 Foundry: "opennlp",
574 Key: "AdjType",
575 Layer: "m",
576 Match: ast.MatchEqual,
577 Value: "Pdt",
578 },
579 },
580 Relation: ast.AndRelation,
581 },
582 }
583
584 replacement := ast.Replacement{
585 Root: &ast.Term{
586 Foundry: "opennlp",
587 Key: "COMBINED_DET",
588 Layer: "p",
589 Match: ast.MatchEqual,
590 },
591 }
592
Akrond5850f82025-05-23 16:44:44 +0200593 m, err := NewMatcher(pattern, replacement)
594 assert.NoError(t, err)
595 assert.NotNil(t, m)
Akronb7e1f352025-05-16 15:45:23 +0200596
597 // Test with operands in different orders
598 input1 := &ast.TermGroup{
599 Operands: []ast.Node{
600 &ast.Term{
601 Foundry: "opennlp",
602 Key: "DET",
603 Layer: "p",
604 Match: ast.MatchEqual,
605 },
606 &ast.Term{
607 Foundry: "opennlp",
608 Key: "AdjType",
609 Layer: "m",
610 Match: ast.MatchEqual,
611 Value: "Pdt",
612 },
613 },
614 Relation: ast.AndRelation,
615 }
616
617 input2 := &ast.TermGroup{
618 Operands: []ast.Node{
619 &ast.Term{
620 Foundry: "opennlp",
621 Key: "AdjType",
622 Layer: "m",
623 Match: ast.MatchEqual,
624 Value: "Pdt",
625 },
626 &ast.Term{
627 Foundry: "opennlp",
628 Key: "DET",
629 Layer: "p",
630 Match: ast.MatchEqual,
631 },
632 },
633 Relation: ast.AndRelation,
634 }
635
636 assert.True(t, m.Match(input1), "Should match with original order")
637 assert.True(t, m.Match(input2), "Should match with reversed order")
638}
Akronbf5149c2025-05-20 15:53:41 +0200639
640func TestMatchWithUnknownNodes(t *testing.T) {
Akronbf5149c2025-05-20 15:53:41 +0200641 pattern := ast.Pattern{
642 Root: &ast.Term{
643 Foundry: "opennlp",
644 Key: "DET",
645 Layer: "p",
646 Match: ast.MatchEqual,
647 },
648 }
649
650 replacement := ast.Replacement{
651 Root: &ast.Term{
652 Foundry: "opennlp",
653 Key: "COMBINED_DET",
654 Layer: "p",
655 Match: ast.MatchEqual,
656 },
657 }
658
Akrond5850f82025-05-23 16:44:44 +0200659 m, err := NewMatcher(pattern, replacement)
660 assert.NoError(t, err)
661 assert.NotNil(t, m)
Akronbf5149c2025-05-20 15:53:41 +0200662
663 tests := []struct {
664 name string
665 input ast.Node
666 expected bool
667 }{
668 {
669 name: "Match term inside unknown node with wrap",
670 input: &ast.CatchallNode{
671 NodeType: "koral:custom",
672 RawContent: json.RawMessage(`{
673 "@type": "koral:custom",
674 "customField": "value"
675 }`),
676 Wrap: &ast.Term{
677 Foundry: "opennlp",
678 Key: "DET",
679 Layer: "p",
680 Match: ast.MatchEqual,
681 },
682 },
683 expected: true,
684 },
685 {
686 name: "Match term inside unknown node's operands",
687 input: &ast.CatchallNode{
688 NodeType: "koral:custom",
689 RawContent: json.RawMessage(`{
690 "@type": "koral:custom",
691 "customField": "value"
692 }`),
693 Operands: []ast.Node{
694 &ast.Term{
695 Foundry: "opennlp",
696 Key: "DET",
697 Layer: "p",
698 Match: ast.MatchEqual,
699 },
700 },
701 },
702 expected: true,
703 },
704 {
705 name: "No match in unknown node with different term",
706 input: &ast.CatchallNode{
707 NodeType: "koral:custom",
708 RawContent: json.RawMessage(`{
709 "@type": "koral:custom",
710 "customField": "value"
711 }`),
712 Wrap: &ast.Term{
713 Foundry: "opennlp",
714 Key: "NOUN",
715 Layer: "p",
716 Match: ast.MatchEqual,
717 },
718 },
719 expected: false,
720 },
721 {
722 name: "Match in deeply nested unknown nodes",
723 input: &ast.CatchallNode{
724 NodeType: "koral:outer",
725 RawContent: json.RawMessage(`{
726 "@type": "koral:outer",
727 "outerField": "value"
728 }`),
729 Wrap: &ast.CatchallNode{
730 NodeType: "koral:inner",
731 RawContent: json.RawMessage(`{
732 "@type": "koral:inner",
733 "innerField": "value"
734 }`),
735 Wrap: &ast.Term{
736 Foundry: "opennlp",
737 Key: "DET",
738 Layer: "p",
739 Match: ast.MatchEqual,
740 },
741 },
742 },
743 expected: true,
744 },
745 {
746 name: "Match in mixed known and unknown nodes",
747 input: &ast.Token{
748 Wrap: &ast.CatchallNode{
749 NodeType: "koral:custom",
750 RawContent: json.RawMessage(`{
751 "@type": "koral:custom",
752 "customField": "value"
753 }`),
754 Operands: []ast.Node{
755 &ast.TermGroup{
756 Operands: []ast.Node{
757 &ast.Term{
758 Foundry: "opennlp",
759 Key: "DET",
760 Layer: "p",
761 Match: ast.MatchEqual,
762 },
763 },
764 Relation: ast.AndRelation,
765 },
766 },
767 },
768 },
769 expected: true,
770 },
771 }
772
773 for _, tt := range tests {
774 t.Run(tt.name, func(t *testing.T) {
775 result := m.Match(tt.input)
776 assert.Equal(t, tt.expected, result)
777
778 if tt.expected {
779 // Test replacement when there's a match
780 replaced := m.Replace(tt.input)
781 // Verify the replacement happened somewhere in the structure
782 containsReplacement := false
783 var checkNode func(ast.Node)
784 checkNode = func(node ast.Node) {
785 switch n := node.(type) {
786 case *ast.Term:
787 if n.Key == "COMBINED_DET" {
788 containsReplacement = true
789 }
790 case *ast.Token:
791 if n.Wrap != nil {
792 checkNode(n.Wrap)
793 }
794 case *ast.TermGroup:
795 for _, op := range n.Operands {
796 checkNode(op)
797 }
798 case *ast.CatchallNode:
799 if n.Wrap != nil {
800 checkNode(n.Wrap)
801 }
802 for _, op := range n.Operands {
803 checkNode(op)
804 }
805 }
806 }
807 checkNode(replaced)
808 assert.True(t, containsReplacement, "Replacement should be found in the result")
809 }
810 })
811 }
812}