blob: adc87841c4e1d7f0e85107a99e972c6da5909157 [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package matcher
2
Akronbf5149c2025-05-20 15:53:41 +02003// matcher is a function that takes a pattern and a node and returns true if the node matches the pattern.
4// It is used to match a pattern against a node in the AST.
5
Akronb7e1f352025-05-16 15:45:23 +02006import (
Akronbf5149c2025-05-20 15:53:41 +02007 "encoding/json"
Akronb7e1f352025-05-16 15:45:23 +02008 "testing"
9
10 "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
11 "github.com/stretchr/testify/assert"
12)
13
14func TestMatchSimplePattern(t *testing.T) {
15 // Create a simple pattern: match a term with DET
16 pattern := ast.Pattern{
17 Root: &ast.Term{
18 Foundry: "opennlp",
19 Key: "DET",
20 Layer: "p",
21 Match: ast.MatchEqual,
22 },
23 }
24
25 // Create a simple replacement
26 replacement := ast.Replacement{
27 Root: &ast.Term{
28 Foundry: "opennlp",
29 Key: "COMBINED_DET",
30 Layer: "p",
31 Match: ast.MatchEqual,
32 },
33 }
34
35 m := NewMatcher(pattern, replacement)
36
37 tests := []struct {
38 name string
39 input ast.Node
40 expected bool
41 }{
42 {
43 name: "Exact match",
44 input: &ast.Term{
45 Foundry: "opennlp",
46 Key: "DET",
47 Layer: "p",
48 Match: ast.MatchEqual,
49 },
50 expected: true,
51 },
52 {
53 name: "Different key",
54 input: &ast.Term{
55 Foundry: "opennlp",
56 Key: "NOUN",
57 Layer: "p",
58 Match: ast.MatchEqual,
59 },
60 expected: false,
61 },
62 {
63 name: "Different foundry",
64 input: &ast.Term{
65 Foundry: "different",
66 Key: "DET",
67 Layer: "p",
68 Match: ast.MatchEqual,
69 },
70 expected: false,
71 },
72 {
73 name: "Different match type",
74 input: &ast.Term{
75 Foundry: "opennlp",
76 Key: "DET",
77 Layer: "p",
78 Match: ast.MatchNotEqual,
79 },
80 expected: false,
81 },
82 {
Akronbf5149c2025-05-20 15:53:41 +020083 name: "Nested node",
Akronb7e1f352025-05-16 15:45:23 +020084 input: &ast.Token{
85 Wrap: &ast.Term{
86 Foundry: "opennlp",
87 Key: "DET",
88 Layer: "p",
89 Match: ast.MatchEqual,
90 },
91 },
Akronbf5149c2025-05-20 15:53:41 +020092 expected: true,
Akronb7e1f352025-05-16 15:45:23 +020093 },
94 }
95
96 for _, tt := range tests {
97 t.Run(tt.name, func(t *testing.T) {
98 result := m.Match(tt.input)
99 assert.Equal(t, tt.expected, result)
100 })
101 }
102}
103
104func TestMatchComplexPattern(t *testing.T) {
105 // Create a complex pattern: DET AND (AdjType=Pdt OR PronType=Ind)
106 pattern := ast.Pattern{
107 Root: &ast.Token{
108 Wrap: &ast.TermGroup{
109 Operands: []ast.Node{
110 &ast.Term{
111 Foundry: "opennlp",
112 Key: "DET",
113 Layer: "p",
114 Match: ast.MatchEqual,
115 },
116 &ast.TermGroup{
117 Operands: []ast.Node{
118 &ast.Term{
119 Foundry: "opennlp",
120 Key: "AdjType",
121 Layer: "m",
122 Match: ast.MatchEqual,
123 Value: "Pdt",
124 },
125 &ast.Term{
126 Foundry: "opennlp",
127 Key: "PronType",
128 Layer: "m",
129 Match: ast.MatchEqual,
130 Value: "Ind",
131 },
132 },
133 Relation: ast.OrRelation,
134 },
135 },
136 Relation: ast.AndRelation,
137 },
138 },
139 }
140
141 replacement := ast.Replacement{
142 Root: &ast.Token{
143 Wrap: &ast.Term{
144 Foundry: "opennlp",
145 Key: "COMBINED_DET",
146 Layer: "p",
147 Match: ast.MatchEqual,
148 },
149 },
150 }
151
152 m := NewMatcher(pattern, replacement)
153
154 tests := []struct {
155 name string
156 input ast.Node
157 expected bool
158 }{
159 {
160 name: "Match with AdjType=Pdt",
161 input: &ast.Token{
162 Wrap: &ast.TermGroup{
163 Operands: []ast.Node{
164 &ast.Term{
165 Foundry: "opennlp",
166 Key: "DET",
167 Layer: "p",
168 Match: ast.MatchEqual,
169 },
170 &ast.Term{
171 Foundry: "opennlp",
172 Key: "AdjType",
173 Layer: "m",
174 Match: ast.MatchEqual,
175 Value: "Pdt",
176 },
177 },
178 Relation: ast.AndRelation,
179 },
180 },
181 expected: true,
182 },
183 {
184 name: "Match with PronType=Ind",
185 input: &ast.Token{
186 Wrap: &ast.TermGroup{
187 Operands: []ast.Node{
188 &ast.Term{
189 Foundry: "opennlp",
190 Key: "DET",
191 Layer: "p",
192 Match: ast.MatchEqual,
193 },
194 &ast.Term{
195 Foundry: "opennlp",
196 Key: "PronType",
197 Layer: "m",
198 Match: ast.MatchEqual,
199 Value: "Ind",
200 },
201 },
202 Relation: ast.AndRelation,
203 },
204 },
205 expected: true,
206 },
207 {
208 name: "No match - missing DET",
209 input: &ast.Token{
210 Wrap: &ast.TermGroup{
211 Operands: []ast.Node{
212 &ast.Term{
213 Foundry: "opennlp",
214 Key: "NOUN",
215 Layer: "p",
216 Match: ast.MatchEqual,
217 },
218 &ast.Term{
219 Foundry: "opennlp",
220 Key: "AdjType",
221 Layer: "m",
222 Match: ast.MatchEqual,
223 Value: "Pdt",
224 },
225 },
226 Relation: ast.AndRelation,
227 },
228 },
229 expected: false,
230 },
231 {
232 name: "No match - wrong value",
233 input: &ast.Token{
234 Wrap: &ast.TermGroup{
235 Operands: []ast.Node{
236 &ast.Term{
237 Foundry: "opennlp",
238 Key: "DET",
239 Layer: "p",
240 Match: ast.MatchEqual,
241 },
242 &ast.Term{
243 Foundry: "opennlp",
244 Key: "AdjType",
245 Layer: "m",
246 Match: ast.MatchEqual,
247 Value: "Wrong",
248 },
249 },
250 Relation: ast.AndRelation,
251 },
252 },
253 expected: false,
254 },
255 }
256
257 for _, tt := range tests {
258 t.Run(tt.name, func(t *testing.T) {
259 result := m.Match(tt.input)
260 assert.Equal(t, tt.expected, result)
261 })
262 }
263}
264
265func TestReplace(t *testing.T) {
266 // Create pattern and replacement
267 pattern := ast.Pattern{
268 Root: &ast.TermGroup{
269 Operands: []ast.Node{
270 &ast.Term{
271 Foundry: "opennlp",
272 Key: "DET",
273 Layer: "p",
274 Match: ast.MatchEqual,
275 },
276 &ast.Term{
277 Foundry: "opennlp",
278 Key: "AdjType",
279 Layer: "m",
280 Match: ast.MatchEqual,
281 Value: "Pdt",
282 },
283 },
284 Relation: ast.AndRelation,
285 },
286 }
287
288 replacement := ast.Replacement{
289 Root: &ast.Term{
290 Foundry: "opennlp",
291 Key: "COMBINED_DET",
292 Layer: "p",
293 Match: ast.MatchEqual,
294 },
295 }
296
297 m := NewMatcher(pattern, replacement)
298
299 tests := []struct {
300 name string
301 input ast.Node
302 expected ast.Node
303 }{
304 {
305 name: "Replace matching pattern",
306 input: &ast.TermGroup{
307 Operands: []ast.Node{
308 &ast.Term{
309 Foundry: "opennlp",
310 Key: "DET",
311 Layer: "p",
312 Match: ast.MatchEqual,
313 },
314 &ast.Term{
315 Foundry: "opennlp",
316 Key: "AdjType",
317 Layer: "m",
318 Match: ast.MatchEqual,
319 Value: "Pdt",
320 },
321 },
322 Relation: ast.AndRelation,
323 },
324 expected: &ast.Term{
325 Foundry: "opennlp",
326 Key: "COMBINED_DET",
327 Layer: "p",
328 Match: ast.MatchEqual,
329 },
330 },
331 {
332 name: "No replacement for non-matching pattern",
333 input: &ast.TermGroup{
334 Operands: []ast.Node{
335 &ast.Term{
336 Foundry: "opennlp",
337 Key: "NOUN",
338 Layer: "p",
339 Match: ast.MatchEqual,
340 },
341 &ast.Term{
342 Foundry: "opennlp",
343 Key: "AdjType",
344 Layer: "m",
345 Match: ast.MatchEqual,
346 Value: "Pdt",
347 },
348 },
349 Relation: ast.AndRelation,
350 },
351 expected: &ast.TermGroup{
352 Operands: []ast.Node{
353 &ast.Term{
354 Foundry: "opennlp",
355 Key: "NOUN",
356 Layer: "p",
357 Match: ast.MatchEqual,
358 },
359 &ast.Term{
360 Foundry: "opennlp",
361 Key: "AdjType",
362 Layer: "m",
363 Match: ast.MatchEqual,
364 Value: "Pdt",
365 },
366 },
367 Relation: ast.AndRelation,
368 },
369 },
370 {
371 name: "Replace in nested structure",
372 input: &ast.Token{
373 Wrap: &ast.TermGroup{
374 Operands: []ast.Node{
375 &ast.TermGroup{
376 Operands: []ast.Node{
377 &ast.Term{
378 Foundry: "opennlp",
379 Key: "DET",
380 Layer: "p",
381 Match: ast.MatchEqual,
382 },
383 &ast.Term{
384 Foundry: "opennlp",
385 Key: "AdjType",
386 Layer: "m",
387 Match: ast.MatchEqual,
388 Value: "Pdt",
389 },
390 },
391 Relation: ast.AndRelation,
392 },
393 &ast.Term{
394 Foundry: "opennlp",
395 Key: "NOUN",
396 Layer: "p",
397 Match: ast.MatchEqual,
398 },
399 },
400 Relation: ast.AndRelation,
401 },
402 },
403 expected: &ast.Token{
404 Wrap: &ast.TermGroup{
405 Operands: []ast.Node{
406 &ast.Term{
407 Foundry: "opennlp",
408 Key: "COMBINED_DET",
409 Layer: "p",
410 Match: ast.MatchEqual,
411 },
412 &ast.Term{
413 Foundry: "opennlp",
414 Key: "NOUN",
415 Layer: "p",
416 Match: ast.MatchEqual,
417 },
418 },
419 Relation: ast.AndRelation,
420 },
421 },
422 },
423 }
424
425 for _, tt := range tests {
426 t.Run(tt.name, func(t *testing.T) {
427 result := m.Replace(tt.input)
428 assert.Equal(t, tt.expected, result)
429 })
430 }
431}
432
433func TestMatchNodeOrder(t *testing.T) {
434 // Test that operands can match in any order
435 pattern := ast.Pattern{
436 Root: &ast.TermGroup{
437 Operands: []ast.Node{
438 &ast.Term{
439 Foundry: "opennlp",
440 Key: "DET",
441 Layer: "p",
442 Match: ast.MatchEqual,
443 },
444 &ast.Term{
445 Foundry: "opennlp",
446 Key: "AdjType",
447 Layer: "m",
448 Match: ast.MatchEqual,
449 Value: "Pdt",
450 },
451 },
452 Relation: ast.AndRelation,
453 },
454 }
455
456 replacement := ast.Replacement{
457 Root: &ast.Term{
458 Foundry: "opennlp",
459 Key: "COMBINED_DET",
460 Layer: "p",
461 Match: ast.MatchEqual,
462 },
463 }
464
465 m := NewMatcher(pattern, replacement)
466
467 // Test with operands in different orders
468 input1 := &ast.TermGroup{
469 Operands: []ast.Node{
470 &ast.Term{
471 Foundry: "opennlp",
472 Key: "DET",
473 Layer: "p",
474 Match: ast.MatchEqual,
475 },
476 &ast.Term{
477 Foundry: "opennlp",
478 Key: "AdjType",
479 Layer: "m",
480 Match: ast.MatchEqual,
481 Value: "Pdt",
482 },
483 },
484 Relation: ast.AndRelation,
485 }
486
487 input2 := &ast.TermGroup{
488 Operands: []ast.Node{
489 &ast.Term{
490 Foundry: "opennlp",
491 Key: "AdjType",
492 Layer: "m",
493 Match: ast.MatchEqual,
494 Value: "Pdt",
495 },
496 &ast.Term{
497 Foundry: "opennlp",
498 Key: "DET",
499 Layer: "p",
500 Match: ast.MatchEqual,
501 },
502 },
503 Relation: ast.AndRelation,
504 }
505
506 assert.True(t, m.Match(input1), "Should match with original order")
507 assert.True(t, m.Match(input2), "Should match with reversed order")
508}
Akronbf5149c2025-05-20 15:53:41 +0200509
510func TestMatchWithUnknownNodes(t *testing.T) {
511 // Create a pattern that looks for a term with DET inside any structure
512 pattern := ast.Pattern{
513 Root: &ast.Term{
514 Foundry: "opennlp",
515 Key: "DET",
516 Layer: "p",
517 Match: ast.MatchEqual,
518 },
519 }
520
521 replacement := ast.Replacement{
522 Root: &ast.Term{
523 Foundry: "opennlp",
524 Key: "COMBINED_DET",
525 Layer: "p",
526 Match: ast.MatchEqual,
527 },
528 }
529
530 m := NewMatcher(pattern, replacement)
531
532 tests := []struct {
533 name string
534 input ast.Node
535 expected bool
536 }{
537 {
538 name: "Match term inside unknown node with wrap",
539 input: &ast.CatchallNode{
540 NodeType: "koral:custom",
541 RawContent: json.RawMessage(`{
542 "@type": "koral:custom",
543 "customField": "value"
544 }`),
545 Wrap: &ast.Term{
546 Foundry: "opennlp",
547 Key: "DET",
548 Layer: "p",
549 Match: ast.MatchEqual,
550 },
551 },
552 expected: true,
553 },
554 {
555 name: "Match term inside unknown node's operands",
556 input: &ast.CatchallNode{
557 NodeType: "koral:custom",
558 RawContent: json.RawMessage(`{
559 "@type": "koral:custom",
560 "customField": "value"
561 }`),
562 Operands: []ast.Node{
563 &ast.Term{
564 Foundry: "opennlp",
565 Key: "DET",
566 Layer: "p",
567 Match: ast.MatchEqual,
568 },
569 },
570 },
571 expected: true,
572 },
573 {
574 name: "No match in unknown node with different term",
575 input: &ast.CatchallNode{
576 NodeType: "koral:custom",
577 RawContent: json.RawMessage(`{
578 "@type": "koral:custom",
579 "customField": "value"
580 }`),
581 Wrap: &ast.Term{
582 Foundry: "opennlp",
583 Key: "NOUN",
584 Layer: "p",
585 Match: ast.MatchEqual,
586 },
587 },
588 expected: false,
589 },
590 {
591 name: "Match in deeply nested unknown nodes",
592 input: &ast.CatchallNode{
593 NodeType: "koral:outer",
594 RawContent: json.RawMessage(`{
595 "@type": "koral:outer",
596 "outerField": "value"
597 }`),
598 Wrap: &ast.CatchallNode{
599 NodeType: "koral:inner",
600 RawContent: json.RawMessage(`{
601 "@type": "koral:inner",
602 "innerField": "value"
603 }`),
604 Wrap: &ast.Term{
605 Foundry: "opennlp",
606 Key: "DET",
607 Layer: "p",
608 Match: ast.MatchEqual,
609 },
610 },
611 },
612 expected: true,
613 },
614 {
615 name: "Match in mixed known and unknown nodes",
616 input: &ast.Token{
617 Wrap: &ast.CatchallNode{
618 NodeType: "koral:custom",
619 RawContent: json.RawMessage(`{
620 "@type": "koral:custom",
621 "customField": "value"
622 }`),
623 Operands: []ast.Node{
624 &ast.TermGroup{
625 Operands: []ast.Node{
626 &ast.Term{
627 Foundry: "opennlp",
628 Key: "DET",
629 Layer: "p",
630 Match: ast.MatchEqual,
631 },
632 },
633 Relation: ast.AndRelation,
634 },
635 },
636 },
637 },
638 expected: true,
639 },
640 }
641
642 for _, tt := range tests {
643 t.Run(tt.name, func(t *testing.T) {
644 result := m.Match(tt.input)
645 assert.Equal(t, tt.expected, result)
646
647 if tt.expected {
648 // Test replacement when there's a match
649 replaced := m.Replace(tt.input)
650 // Verify the replacement happened somewhere in the structure
651 containsReplacement := false
652 var checkNode func(ast.Node)
653 checkNode = func(node ast.Node) {
654 switch n := node.(type) {
655 case *ast.Term:
656 if n.Key == "COMBINED_DET" {
657 containsReplacement = true
658 }
659 case *ast.Token:
660 if n.Wrap != nil {
661 checkNode(n.Wrap)
662 }
663 case *ast.TermGroup:
664 for _, op := range n.Operands {
665 checkNode(op)
666 }
667 case *ast.CatchallNode:
668 if n.Wrap != nil {
669 checkNode(n.Wrap)
670 }
671 for _, op := range n.Operands {
672 checkNode(op)
673 }
674 }
675 }
676 checkNode(replaced)
677 assert.True(t, containsReplacement, "Replacement should be found in the result")
678 }
679 })
680 }
681}