blob: c97238ba3d7ac398ffe33cb07360e0fe3c69e9be [file] [log] [blame]
Akron32d53de2025-05-22 13:45:32 +02001package mapper
2
3import (
4 "encoding/json"
Akron32d53de2025-05-22 13:45:32 +02005 "testing"
6
Akronfa55bb22025-05-26 15:10:42 +02007 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akrona00d4752025-05-26 17:34:36 +02008 "github.com/KorAP/KoralPipe-TermMapper/config"
Akronfa55bb22025-05-26 15:10:42 +02009 "github.com/KorAP/KoralPipe-TermMapper/matcher"
Akron32d53de2025-05-22 13:45:32 +020010 "github.com/stretchr/testify/assert"
11 "github.com/stretchr/testify/require"
12)
13
14func TestMapper(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +020015 // Create test mapping list
16 mappingList := config.MappingList{
17 ID: "test-mapper",
18 FoundryA: "opennlp",
19 LayerA: "p",
20 FoundryB: "upos",
21 LayerB: "p",
22 Mappings: []config.MappingRule{
23 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
24 "[DET] <> [opennlp/p=DET]",
25 },
26 }
Akron32d53de2025-05-22 13:45:32 +020027
28 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +020029 m, err := NewMapper([]config.MappingList{mappingList})
Akron32d53de2025-05-22 13:45:32 +020030 require.NoError(t, err)
31
32 tests := []struct {
33 name string
34 mappingID string
35 opts MappingOptions
36 input string
37 expected string
38 expectError bool
39 }{
40 {
41 name: "Simple A to B mapping",
42 mappingID: "test-mapper",
43 opts: MappingOptions{
44 Direction: AtoB,
45 },
46 input: `{
47 "@type": "koral:token",
48 "wrap": {
49 "@type": "koral:term",
50 "foundry": "opennlp",
51 "key": "PIDAT",
52 "layer": "p",
53 "match": "match:eq"
54 }
55 }`,
56 expected: `{
57 "@type": "koral:token",
58 "wrap": {
59 "@type": "koral:termGroup",
60 "operands": [
61 {
62 "@type": "koral:term",
63 "foundry": "opennlp",
64 "key": "PIDAT",
65 "layer": "p",
66 "match": "match:eq"
67 },
68 {
69 "@type": "koral:term",
70 "foundry": "opennlp",
71 "key": "AdjType",
72 "layer": "p",
73 "match": "match:eq",
74 "value": "Pdt"
75 }
76 ],
77 "relation": "relation:and"
78 }
79 }`,
80 },
81 {
Akron0d9117c2025-05-27 15:20:21 +020082 name: "Simple A to B mapping with rewrites",
Akron32d53de2025-05-22 13:45:32 +020083 mappingID: "test-mapper",
84 opts: MappingOptions{
Akron0d9117c2025-05-27 15:20:21 +020085 Direction: AtoB,
86 AddRewrites: true,
Akron32d53de2025-05-22 13:45:32 +020087 },
88 input: `{
89 "@type": "koral:token",
90 "wrap": {
Akrona1a183f2025-05-26 17:47:33 +020091 "@type": "koral:term",
92 "foundry": "opennlp",
93 "key": "PIDAT",
94 "layer": "p",
95 "match": "match:eq"
Akron32d53de2025-05-22 13:45:32 +020096 }
97 }`,
98 expected: `{
99 "@type": "koral:token",
100 "wrap": {
Akron0d9117c2025-05-27 15:20:21 +0200101 "@type": "koral:termGroup",
102 "operands": [
103 {
104 "@type": "koral:term",
105 "foundry": "opennlp",
106 "key": "PIDAT",
107 "layer": "p",
108 "match": "match:eq"
109 },
110 {
111 "@type": "koral:term",
112 "foundry": "opennlp",
113 "key": "AdjType",
114 "layer": "p",
115 "match": "match:eq",
116 "value": "Pdt"
117 }
118 ],
119 "relation": "relation:and",
120 "rewrites": [
121 {
122 "@type": "koral:rewrite",
123 "editor": "termMapper",
Akron8a87d9a2025-05-27 15:30:48 +0200124 "original": {
Akron0d9117c2025-05-27 15:20:21 +0200125 "@type": "koral:term",
126 "foundry": "opennlp",
127 "key": "PIDAT",
128 "layer": "p",
129 "match": "match:eq"
130 }
131 }
132 ]
Akron32d53de2025-05-22 13:45:32 +0200133 }
134 }`,
135 },
136 {
Akron0d9117c2025-05-27 15:20:21 +0200137 name: "Mapping with foundry override and rewrites",
Akron32d53de2025-05-22 13:45:32 +0200138 mappingID: "test-mapper",
139 opts: MappingOptions{
Akron0d9117c2025-05-27 15:20:21 +0200140 Direction: AtoB,
141 FoundryB: "custom",
142 AddRewrites: true,
Akron32d53de2025-05-22 13:45:32 +0200143 },
144 input: `{
145 "@type": "koral:token",
146 "wrap": {
147 "@type": "koral:term",
148 "foundry": "opennlp",
149 "key": "PIDAT",
150 "layer": "p",
151 "match": "match:eq"
152 }
153 }`,
154 expected: `{
155 "@type": "koral:token",
156 "wrap": {
157 "@type": "koral:termGroup",
158 "operands": [
159 {
160 "@type": "koral:term",
161 "foundry": "custom",
162 "key": "PIDAT",
163 "layer": "p",
164 "match": "match:eq"
165 },
166 {
167 "@type": "koral:term",
168 "foundry": "custom",
169 "key": "AdjType",
170 "layer": "p",
171 "match": "match:eq",
172 "value": "Pdt"
173 }
174 ],
Akron0d9117c2025-05-27 15:20:21 +0200175 "relation": "relation:and",
176 "rewrites": [
177 {
178 "@type": "koral:rewrite",
179 "editor": "termMapper",
Akron8a87d9a2025-05-27 15:30:48 +0200180 "original": {
181 "@type": "koral:term",
182 "foundry": "opennlp",
183 "key": "PIDAT",
184 "layer": "p",
185 "match": "match:eq"
186 }
Akron0d9117c2025-05-27 15:20:21 +0200187 }
188 ]
Akron32d53de2025-05-22 13:45:32 +0200189 }
190 }`,
191 },
192 {
Akron0d9117c2025-05-27 15:20:21 +0200193 name: "B to A direction",
194 mappingID: "test-mapper",
195 opts: MappingOptions{
196 Direction: BtoA,
197 },
198 input: `{
199 "@type": "koral:token",
200 "wrap": {
201 "@type": "koral:term",
202 "foundry": "opennlp",
203 "key": "PIDAT",
204 "layer": "p",
205 "match": "match:eq"
206 }
207 }`,
208 expected: `{
209 "@type": "koral:token",
210 "wrap": {
211 "@type": "koral:term",
212 "foundry": "opennlp",
213 "key": "PIDAT",
214 "layer": "p",
215 "match": "match:eq"
216 }
217 }`,
218 expectError: false,
219 },
220 {
Akron32d53de2025-05-22 13:45:32 +0200221 name: "Invalid mapping ID",
222 mappingID: "nonexistent",
223 opts: MappingOptions{
224 Direction: AtoB,
225 },
226 input: `{
227 "@type": "koral:token",
228 "wrap": {
229 "@type": "koral:term",
230 "foundry": "opennlp",
231 "key": "PIDAT",
232 "layer": "p",
233 "match": "match:eq"
234 }
235 }`,
236 expectError: true,
237 },
238 {
239 name: "Invalid direction",
240 mappingID: "test-mapper",
241 opts: MappingOptions{
Akrona1a183f2025-05-26 17:47:33 +0200242 Direction: Direction(false),
Akron32d53de2025-05-22 13:45:32 +0200243 },
244 input: `{
245 "@type": "koral:token",
246 "wrap": {
247 "@type": "koral:term",
248 "foundry": "opennlp",
249 "key": "PIDAT",
250 "layer": "p",
251 "match": "match:eq"
252 }
253 }`,
Akrona1a183f2025-05-26 17:47:33 +0200254 expected: `{
255 "@type": "koral:token",
256 "wrap": {
257 "@type": "koral:term",
258 "foundry": "opennlp",
259 "key": "PIDAT",
260 "layer": "p",
261 "match": "match:eq"
262 }
263 }`,
264 expectError: false,
Akron32d53de2025-05-22 13:45:32 +0200265 },
266 }
267
268 for _, tt := range tests {
269 t.Run(tt.name, func(t *testing.T) {
270 // Parse input JSON
271 var inputData interface{}
272 err := json.Unmarshal([]byte(tt.input), &inputData)
273 require.NoError(t, err)
274
275 // Apply mappings
Akron7b4984e2025-05-26 19:12:20 +0200276 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
Akron32d53de2025-05-22 13:45:32 +0200277 if tt.expectError {
278 assert.Error(t, err)
279 return
280 }
281 require.NoError(t, err)
282
283 // Parse expected JSON
284 var expectedData interface{}
285 err = json.Unmarshal([]byte(tt.expected), &expectedData)
286 require.NoError(t, err)
287
288 // Compare results
289 assert.Equal(t, expectedData, result)
290 })
291 }
292}
Akrond5850f82025-05-23 16:44:44 +0200293
Akroncc83eb52025-05-27 14:39:12 +0200294func TestTokenToTermGroupWithRewrites(t *testing.T) {
295 // Create test mapping list specifically for token to termGroup test
296 mappingList := config.MappingList{
297 ID: "test-token-to-termgroup",
298 FoundryA: "opennlp",
299 LayerA: "p",
300 FoundryB: "opennlp", // Keep the same foundry for both sides
301 LayerB: "p",
302 Mappings: []config.MappingRule{
303 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
304 },
305 }
306
307 // Create a new mapper
308 m, err := NewMapper([]config.MappingList{mappingList})
309 require.NoError(t, err)
310
311 input := `{
312 "@type": "koral:token",
313 "rewrites": [
314 {
315 "@type": "koral:rewrite",
316 "_comment": "This rewrite should be preserved",
317 "editor": "TestEditor",
318 "operation": "operation:test",
319 "src": "TestSource"
320 }
321 ],
322 "wrap": {
323 "@type": "koral:term",
324 "foundry": "opennlp",
325 "key": "PIDAT",
326 "layer": "p",
327 "match": "match:eq"
328 }
329 }`
330
331 expected := `{
332 "@type": "koral:token",
333 "rewrites": [
334 {
335 "@type": "koral:rewrite",
336 "_comment": "This rewrite should be preserved",
337 "editor": "TestEditor",
338 "operation": "operation:test",
339 "src": "TestSource"
340 }
341 ],
342 "wrap": {
343 "@type": "koral:termGroup",
344 "operands": [
345 {
346 "@type": "koral:term",
347 "foundry": "opennlp",
348 "key": "PIDAT",
349 "layer": "p",
350 "match": "match:eq"
351 },
352 {
353 "@type": "koral:term",
354 "foundry": "opennlp",
355 "key": "AdjType",
356 "layer": "p",
357 "match": "match:eq",
358 "value": "Pdt"
359 }
360 ],
361 "relation": "relation:and"
362 }
363 }`
364
365 // Parse input JSON
366 var inputData interface{}
367 err = json.Unmarshal([]byte(input), &inputData)
368 require.NoError(t, err)
369
370 // Apply mappings
371 result, err := m.ApplyQueryMappings("test-token-to-termgroup", MappingOptions{Direction: AtoB}, inputData)
372 require.NoError(t, err)
373
374 // Parse expected JSON
375 var expectedData interface{}
376 err = json.Unmarshal([]byte(expected), &expectedData)
377 require.NoError(t, err)
378
379 // Compare results
380 assert.Equal(t, expectedData, result)
381}
382
Akrond5850f82025-05-23 16:44:44 +0200383func TestMatchComplexPatterns(t *testing.T) {
384 tests := []struct {
385 name string
386 pattern ast.Pattern
387 replacement ast.Replacement
388 input ast.Node
389 expected ast.Node
390 }{
391 {
392 name: "Deep nested pattern with mixed operators",
393 pattern: ast.Pattern{
394 Root: &ast.TermGroup{
395 Operands: []ast.Node{
396 &ast.Term{
397 Key: "A",
398 Match: ast.MatchEqual,
399 },
400 &ast.TermGroup{
401 Operands: []ast.Node{
402 &ast.Term{
403 Key: "B",
404 Match: ast.MatchEqual,
405 },
406 &ast.TermGroup{
407 Operands: []ast.Node{
408 &ast.Term{
409 Key: "C",
410 Match: ast.MatchEqual,
411 },
412 &ast.Term{
413 Key: "D",
414 Match: ast.MatchEqual,
415 },
416 },
417 Relation: ast.AndRelation,
418 },
419 },
420 Relation: ast.OrRelation,
421 },
422 },
423 Relation: ast.AndRelation,
424 },
425 },
426 replacement: ast.Replacement{
427 Root: &ast.Term{
428 Key: "RESULT",
429 Match: ast.MatchEqual,
430 },
431 },
432 input: &ast.TermGroup{
433 Operands: []ast.Node{
434 &ast.Term{
435 Key: "A",
436 Match: ast.MatchEqual,
437 },
438 &ast.TermGroup{
439 Operands: []ast.Node{
440 &ast.Term{
441 Key: "C",
442 Match: ast.MatchEqual,
443 },
444 &ast.Term{
445 Key: "D",
446 Match: ast.MatchEqual,
447 },
448 },
449 Relation: ast.AndRelation,
450 },
451 },
452 Relation: ast.AndRelation,
453 },
454 expected: &ast.Term{
455 Key: "RESULT",
456 Match: ast.MatchEqual,
457 },
458 },
459 }
460
461 for _, tt := range tests {
462 t.Run(tt.name, func(t *testing.T) {
463 m, err := matcher.NewMatcher(tt.pattern, tt.replacement)
464 require.NoError(t, err)
465 result := m.Replace(tt.input)
466 assert.Equal(t, tt.expected, result)
467 })
468 }
469}
470
471func TestInvalidPatternReplacement(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +0200472 // Create test mapping list
473 mappingList := config.MappingList{
474 ID: "test-mapper",
475 FoundryA: "opennlp",
476 LayerA: "p",
477 FoundryB: "upos",
478 LayerB: "p",
479 Mappings: []config.MappingRule{
480 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
481 },
482 }
Akrond5850f82025-05-23 16:44:44 +0200483
484 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +0200485 m, err := NewMapper([]config.MappingList{mappingList})
Akrond5850f82025-05-23 16:44:44 +0200486 require.NoError(t, err)
487
488 tests := []struct {
489 name string
490 input string
491 expectError bool
492 errorMsg string
493 }{
494 {
495 name: "Invalid input - empty term group",
496 input: `{
497 "@type": "koral:token",
498 "wrap": {
499 "@type": "koral:termGroup",
500 "operands": [],
501 "relation": "relation:and"
502 }
503 }`,
504 expectError: true,
505 errorMsg: "failed to parse JSON into AST: error parsing wrapped node: term group must have at least one operand",
506 },
507 }
508
509 for _, tt := range tests {
510 t.Run(tt.name, func(t *testing.T) {
511 var inputData any
512 err := json.Unmarshal([]byte(tt.input), &inputData)
513 require.NoError(t, err)
514
Akron7b4984e2025-05-26 19:12:20 +0200515 result, err := m.ApplyQueryMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
Akrond5850f82025-05-23 16:44:44 +0200516 if tt.expectError {
517 assert.Error(t, err)
518 assert.Equal(t, tt.errorMsg, err.Error())
519 assert.Nil(t, result)
520 } else {
521 assert.NoError(t, err)
522 assert.NotNil(t, result)
523 }
524 })
525 }
526}
Akron7b4984e2025-05-26 19:12:20 +0200527
528func TestQueryWrapperMappings(t *testing.T) {
529
530 mappingList := config.MappingList{
531 ID: "test-wrapper",
532 FoundryA: "opennlp",
533 LayerA: "orth",
534 FoundryB: "upos",
535 LayerB: "orth",
536 Mappings: []config.MappingRule{
537 "[opennlp/orth=Baum] <> [opennlp/orth=X]",
538 },
539 }
540
541 // Create a new mapper
542 m, err := NewMapper([]config.MappingList{mappingList})
543 require.NoError(t, err)
544
545 tests := []struct {
546 name string
547 mappingID string
548 opts MappingOptions
549 input string
550 expected string
551 expectError bool
552 }{
553 {
Akroncc83eb52025-05-27 14:39:12 +0200554 name: "Query wrapper case with rewrites preservation",
Akron7b4984e2025-05-26 19:12:20 +0200555 mappingID: "test-wrapper",
556 opts: MappingOptions{
557 Direction: AtoB,
558 },
559 input: `{
560 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
561 "collection": {
562 "@type": "koral:doc",
563 "key": "availability",
564 "match": "match:eq",
Akron7b4984e2025-05-26 19:12:20 +0200565 "type": "type:regex",
566 "value": "CC.*"
567 },
568 "query": {
569 "@type": "koral:token",
Akroncc83eb52025-05-27 14:39:12 +0200570 "rewrites": [
571 {
572 "@type": "koral:rewrite",
573 "_comment": "Original rewrite that should be preserved",
574 "editor": "Original",
575 "operation": "operation:original",
576 "src": "Original"
577 }
578 ],
Akron7b4984e2025-05-26 19:12:20 +0200579 "wrap": {
580 "@type": "koral:term",
581 "foundry": "opennlp",
582 "key": "Baum",
583 "layer": "orth",
584 "match": "match:eq"
585 }
586 }
587 }`,
588 expected: `{
589 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
590 "collection": {
591 "@type": "koral:doc",
592 "key": "availability",
593 "match": "match:eq",
Akron7b4984e2025-05-26 19:12:20 +0200594 "type": "type:regex",
595 "value": "CC.*"
596 },
597 "query": {
598 "@type": "koral:token",
Akroncc83eb52025-05-27 14:39:12 +0200599 "rewrites": [
600 {
601 "@type": "koral:rewrite",
602 "_comment": "Original rewrite that should be preserved",
603 "editor": "Original",
604 "operation": "operation:original",
605 "src": "Original"
606 }
607 ],
Akron7b4984e2025-05-26 19:12:20 +0200608 "wrap": {
609 "@type": "koral:term",
610 "foundry": "opennlp",
611 "key": "X",
612 "layer": "orth",
613 "match": "match:eq"
614 }
615 }
616 }`,
617 },
618 {
619 name: "Empty query field",
620 mappingID: "test-wrapper",
621 opts: MappingOptions{
622 Direction: AtoB,
623 },
624 input: `{
625 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
626 "query": null
627 }`,
628 expected: `{
629 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
630 "query": null
631 }`,
632 },
633 {
634 name: "Missing query field",
635 mappingID: "test-wrapper",
636 opts: MappingOptions{
637 Direction: AtoB,
638 },
639 input: `{
640 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
641 "collection": {
642 "@type": "koral:doc"
643 }
644 }`,
645 expected: `{
646 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
647 "collection": {
648 "@type": "koral:doc"
649 }
650 }`,
651 },
652 {
653 name: "Query field with non-object value",
654 mappingID: "test-wrapper",
655 opts: MappingOptions{
656 Direction: AtoB,
657 },
658 input: `{
659 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
660 "query": "invalid"
661 }`,
662 expected: `{
663 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
664 "query": "invalid"
665 }`,
666 },
Akroncc83eb52025-05-27 14:39:12 +0200667 {
668 name: "Query with rewrites in nested token",
669 mappingID: "test-wrapper",
670 opts: MappingOptions{
671 Direction: AtoB,
672 },
673 input: `{
674 "@type": "koral:token",
675 "rewrites": [
676 {
677 "@type": "koral:rewrite",
678 "_comment": "Nested rewrite that should be preserved",
679 "editor": "Nested",
680 "operation": "operation:nested",
681 "src": "Nested"
682 }
683 ],
684 "wrap": {
685 "@type": "koral:term",
686 "foundry": "opennlp",
687 "key": "Baum",
688 "layer": "orth",
689 "match": "match:eq"
690 }
691 }`,
692 expected: `{
693 "@type": "koral:token",
694 "rewrites": [
695 {
696 "@type": "koral:rewrite",
697 "_comment": "Nested rewrite that should be preserved",
698 "editor": "Nested",
699 "operation": "operation:nested",
700 "src": "Nested"
701 }
702 ],
703 "wrap": {
704 "@type": "koral:term",
705 "foundry": "opennlp",
706 "key": "X",
707 "layer": "orth",
708 "match": "match:eq"
709 }
710 }`,
711 },
Akron7b4984e2025-05-26 19:12:20 +0200712 }
713
714 for _, tt := range tests {
715 t.Run(tt.name, func(t *testing.T) {
716 // Parse input JSON
717 var inputData interface{}
718 err := json.Unmarshal([]byte(tt.input), &inputData)
719 require.NoError(t, err)
720
721 // Apply mappings
722 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
723 if tt.expectError {
724 assert.Error(t, err)
725 return
726 }
727 require.NoError(t, err)
728
729 // Parse expected JSON
730 var expectedData interface{}
731 err = json.Unmarshal([]byte(tt.expected), &expectedData)
732 require.NoError(t, err)
733
734 // Compare results
735 assert.Equal(t, expectedData, result)
736 })
737 }
738}