blob: c41f2b9b231a0ae168f02e9731a716fb090a699c [file] [log] [blame]
Akron32d53de2025-05-22 13:45:32 +02001package mapper
2
3import (
4 "encoding/json"
Akron32d53de2025-05-22 13:45:32 +02005 "testing"
6
Akronfa55bb22025-05-26 15:10:42 +02007 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akrona00d4752025-05-26 17:34:36 +02008 "github.com/KorAP/KoralPipe-TermMapper/config"
Akronfa55bb22025-05-26 15:10:42 +02009 "github.com/KorAP/KoralPipe-TermMapper/matcher"
Akron32d53de2025-05-22 13:45:32 +020010 "github.com/stretchr/testify/assert"
11 "github.com/stretchr/testify/require"
12)
13
14func TestMapper(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +020015 // Create test mapping list
16 mappingList := config.MappingList{
17 ID: "test-mapper",
18 FoundryA: "opennlp",
19 LayerA: "p",
20 FoundryB: "upos",
21 LayerB: "p",
22 Mappings: []config.MappingRule{
23 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
24 "[DET] <> [opennlp/p=DET]",
25 },
26 }
Akron32d53de2025-05-22 13:45:32 +020027
28 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +020029 m, err := NewMapper([]config.MappingList{mappingList})
Akron32d53de2025-05-22 13:45:32 +020030 require.NoError(t, err)
31
32 tests := []struct {
33 name string
34 mappingID string
35 opts MappingOptions
36 input string
37 expected string
38 expectError bool
39 }{
40 {
41 name: "Simple A to B mapping",
42 mappingID: "test-mapper",
43 opts: MappingOptions{
44 Direction: AtoB,
45 },
46 input: `{
47 "@type": "koral:token",
48 "wrap": {
49 "@type": "koral:term",
50 "foundry": "opennlp",
51 "key": "PIDAT",
52 "layer": "p",
53 "match": "match:eq"
54 }
55 }`,
56 expected: `{
57 "@type": "koral:token",
58 "wrap": {
59 "@type": "koral:termGroup",
60 "operands": [
61 {
62 "@type": "koral:term",
63 "foundry": "opennlp",
64 "key": "PIDAT",
65 "layer": "p",
66 "match": "match:eq"
67 },
68 {
69 "@type": "koral:term",
70 "foundry": "opennlp",
71 "key": "AdjType",
72 "layer": "p",
73 "match": "match:eq",
74 "value": "Pdt"
75 }
76 ],
77 "relation": "relation:and"
78 }
79 }`,
80 },
81 {
Akron0d9117c2025-05-27 15:20:21 +020082 name: "Simple A to B mapping with rewrites",
Akron32d53de2025-05-22 13:45:32 +020083 mappingID: "test-mapper",
84 opts: MappingOptions{
Akron0d9117c2025-05-27 15:20:21 +020085 Direction: AtoB,
86 AddRewrites: true,
Akron32d53de2025-05-22 13:45:32 +020087 },
88 input: `{
89 "@type": "koral:token",
90 "wrap": {
Akrona1a183f2025-05-26 17:47:33 +020091 "@type": "koral:term",
92 "foundry": "opennlp",
93 "key": "PIDAT",
94 "layer": "p",
95 "match": "match:eq"
Akron32d53de2025-05-22 13:45:32 +020096 }
97 }`,
98 expected: `{
99 "@type": "koral:token",
100 "wrap": {
Akron0d9117c2025-05-27 15:20:21 +0200101 "@type": "koral:termGroup",
102 "operands": [
103 {
104 "@type": "koral:term",
105 "foundry": "opennlp",
106 "key": "PIDAT",
107 "layer": "p",
108 "match": "match:eq"
109 },
110 {
111 "@type": "koral:term",
112 "foundry": "opennlp",
113 "key": "AdjType",
114 "layer": "p",
115 "match": "match:eq",
116 "value": "Pdt"
117 }
118 ],
119 "relation": "relation:and",
120 "rewrites": [
121 {
122 "@type": "koral:rewrite",
123 "editor": "termMapper",
124 "src": {
125 "@type": "koral:term",
126 "foundry": "opennlp",
127 "key": "PIDAT",
128 "layer": "p",
129 "match": "match:eq"
130 }
131 }
132 ]
Akron32d53de2025-05-22 13:45:32 +0200133 }
134 }`,
135 },
136 {
Akron0d9117c2025-05-27 15:20:21 +0200137 name: "Mapping with foundry override and rewrites",
Akron32d53de2025-05-22 13:45:32 +0200138 mappingID: "test-mapper",
139 opts: MappingOptions{
Akron0d9117c2025-05-27 15:20:21 +0200140 Direction: AtoB,
141 FoundryB: "custom",
142 AddRewrites: true,
Akron32d53de2025-05-22 13:45:32 +0200143 },
144 input: `{
145 "@type": "koral:token",
146 "wrap": {
147 "@type": "koral:term",
148 "foundry": "opennlp",
149 "key": "PIDAT",
150 "layer": "p",
151 "match": "match:eq"
152 }
153 }`,
154 expected: `{
155 "@type": "koral:token",
156 "wrap": {
157 "@type": "koral:termGroup",
158 "operands": [
159 {
160 "@type": "koral:term",
161 "foundry": "custom",
162 "key": "PIDAT",
163 "layer": "p",
164 "match": "match:eq"
165 },
166 {
167 "@type": "koral:term",
168 "foundry": "custom",
169 "key": "AdjType",
170 "layer": "p",
171 "match": "match:eq",
172 "value": "Pdt"
173 }
174 ],
Akron0d9117c2025-05-27 15:20:21 +0200175 "relation": "relation:and",
176 "rewrites": [
177 {
178 "@type": "koral:rewrite",
179 "editor": "termMapper",
180 "scope": "foundry",
181 "src": "opennlp"
182 }
183 ]
Akron32d53de2025-05-22 13:45:32 +0200184 }
185 }`,
186 },
187 {
Akron0d9117c2025-05-27 15:20:21 +0200188 name: "B to A direction",
189 mappingID: "test-mapper",
190 opts: MappingOptions{
191 Direction: BtoA,
192 },
193 input: `{
194 "@type": "koral:token",
195 "wrap": {
196 "@type": "koral:term",
197 "foundry": "opennlp",
198 "key": "PIDAT",
199 "layer": "p",
200 "match": "match:eq"
201 }
202 }`,
203 expected: `{
204 "@type": "koral:token",
205 "wrap": {
206 "@type": "koral:term",
207 "foundry": "opennlp",
208 "key": "PIDAT",
209 "layer": "p",
210 "match": "match:eq"
211 }
212 }`,
213 expectError: false,
214 },
215 {
Akron32d53de2025-05-22 13:45:32 +0200216 name: "Invalid mapping ID",
217 mappingID: "nonexistent",
218 opts: MappingOptions{
219 Direction: AtoB,
220 },
221 input: `{
222 "@type": "koral:token",
223 "wrap": {
224 "@type": "koral:term",
225 "foundry": "opennlp",
226 "key": "PIDAT",
227 "layer": "p",
228 "match": "match:eq"
229 }
230 }`,
231 expectError: true,
232 },
233 {
234 name: "Invalid direction",
235 mappingID: "test-mapper",
236 opts: MappingOptions{
Akrona1a183f2025-05-26 17:47:33 +0200237 Direction: Direction(false),
Akron32d53de2025-05-22 13:45:32 +0200238 },
239 input: `{
240 "@type": "koral:token",
241 "wrap": {
242 "@type": "koral:term",
243 "foundry": "opennlp",
244 "key": "PIDAT",
245 "layer": "p",
246 "match": "match:eq"
247 }
248 }`,
Akrona1a183f2025-05-26 17:47:33 +0200249 expected: `{
250 "@type": "koral:token",
251 "wrap": {
252 "@type": "koral:term",
253 "foundry": "opennlp",
254 "key": "PIDAT",
255 "layer": "p",
256 "match": "match:eq"
257 }
258 }`,
259 expectError: false,
Akron32d53de2025-05-22 13:45:32 +0200260 },
261 }
262
263 for _, tt := range tests {
264 t.Run(tt.name, func(t *testing.T) {
265 // Parse input JSON
266 var inputData interface{}
267 err := json.Unmarshal([]byte(tt.input), &inputData)
268 require.NoError(t, err)
269
270 // Apply mappings
Akron7b4984e2025-05-26 19:12:20 +0200271 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
Akron32d53de2025-05-22 13:45:32 +0200272 if tt.expectError {
273 assert.Error(t, err)
274 return
275 }
276 require.NoError(t, err)
277
278 // Parse expected JSON
279 var expectedData interface{}
280 err = json.Unmarshal([]byte(tt.expected), &expectedData)
281 require.NoError(t, err)
282
283 // Compare results
284 assert.Equal(t, expectedData, result)
285 })
286 }
287}
Akrond5850f82025-05-23 16:44:44 +0200288
Akroncc83eb52025-05-27 14:39:12 +0200289func TestTokenToTermGroupWithRewrites(t *testing.T) {
290 // Create test mapping list specifically for token to termGroup test
291 mappingList := config.MappingList{
292 ID: "test-token-to-termgroup",
293 FoundryA: "opennlp",
294 LayerA: "p",
295 FoundryB: "opennlp", // Keep the same foundry for both sides
296 LayerB: "p",
297 Mappings: []config.MappingRule{
298 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
299 },
300 }
301
302 // Create a new mapper
303 m, err := NewMapper([]config.MappingList{mappingList})
304 require.NoError(t, err)
305
306 input := `{
307 "@type": "koral:token",
308 "rewrites": [
309 {
310 "@type": "koral:rewrite",
311 "_comment": "This rewrite should be preserved",
312 "editor": "TestEditor",
313 "operation": "operation:test",
314 "src": "TestSource"
315 }
316 ],
317 "wrap": {
318 "@type": "koral:term",
319 "foundry": "opennlp",
320 "key": "PIDAT",
321 "layer": "p",
322 "match": "match:eq"
323 }
324 }`
325
326 expected := `{
327 "@type": "koral:token",
328 "rewrites": [
329 {
330 "@type": "koral:rewrite",
331 "_comment": "This rewrite should be preserved",
332 "editor": "TestEditor",
333 "operation": "operation:test",
334 "src": "TestSource"
335 }
336 ],
337 "wrap": {
338 "@type": "koral:termGroup",
339 "operands": [
340 {
341 "@type": "koral:term",
342 "foundry": "opennlp",
343 "key": "PIDAT",
344 "layer": "p",
345 "match": "match:eq"
346 },
347 {
348 "@type": "koral:term",
349 "foundry": "opennlp",
350 "key": "AdjType",
351 "layer": "p",
352 "match": "match:eq",
353 "value": "Pdt"
354 }
355 ],
356 "relation": "relation:and"
357 }
358 }`
359
360 // Parse input JSON
361 var inputData interface{}
362 err = json.Unmarshal([]byte(input), &inputData)
363 require.NoError(t, err)
364
365 // Apply mappings
366 result, err := m.ApplyQueryMappings("test-token-to-termgroup", MappingOptions{Direction: AtoB}, inputData)
367 require.NoError(t, err)
368
369 // Parse expected JSON
370 var expectedData interface{}
371 err = json.Unmarshal([]byte(expected), &expectedData)
372 require.NoError(t, err)
373
374 // Compare results
375 assert.Equal(t, expectedData, result)
376}
377
Akrond5850f82025-05-23 16:44:44 +0200378func TestMatchComplexPatterns(t *testing.T) {
379 tests := []struct {
380 name string
381 pattern ast.Pattern
382 replacement ast.Replacement
383 input ast.Node
384 expected ast.Node
385 }{
386 {
387 name: "Deep nested pattern with mixed operators",
388 pattern: ast.Pattern{
389 Root: &ast.TermGroup{
390 Operands: []ast.Node{
391 &ast.Term{
392 Key: "A",
393 Match: ast.MatchEqual,
394 },
395 &ast.TermGroup{
396 Operands: []ast.Node{
397 &ast.Term{
398 Key: "B",
399 Match: ast.MatchEqual,
400 },
401 &ast.TermGroup{
402 Operands: []ast.Node{
403 &ast.Term{
404 Key: "C",
405 Match: ast.MatchEqual,
406 },
407 &ast.Term{
408 Key: "D",
409 Match: ast.MatchEqual,
410 },
411 },
412 Relation: ast.AndRelation,
413 },
414 },
415 Relation: ast.OrRelation,
416 },
417 },
418 Relation: ast.AndRelation,
419 },
420 },
421 replacement: ast.Replacement{
422 Root: &ast.Term{
423 Key: "RESULT",
424 Match: ast.MatchEqual,
425 },
426 },
427 input: &ast.TermGroup{
428 Operands: []ast.Node{
429 &ast.Term{
430 Key: "A",
431 Match: ast.MatchEqual,
432 },
433 &ast.TermGroup{
434 Operands: []ast.Node{
435 &ast.Term{
436 Key: "C",
437 Match: ast.MatchEqual,
438 },
439 &ast.Term{
440 Key: "D",
441 Match: ast.MatchEqual,
442 },
443 },
444 Relation: ast.AndRelation,
445 },
446 },
447 Relation: ast.AndRelation,
448 },
449 expected: &ast.Term{
450 Key: "RESULT",
451 Match: ast.MatchEqual,
452 },
453 },
454 }
455
456 for _, tt := range tests {
457 t.Run(tt.name, func(t *testing.T) {
458 m, err := matcher.NewMatcher(tt.pattern, tt.replacement)
459 require.NoError(t, err)
460 result := m.Replace(tt.input)
461 assert.Equal(t, tt.expected, result)
462 })
463 }
464}
465
466func TestInvalidPatternReplacement(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +0200467 // Create test mapping list
468 mappingList := config.MappingList{
469 ID: "test-mapper",
470 FoundryA: "opennlp",
471 LayerA: "p",
472 FoundryB: "upos",
473 LayerB: "p",
474 Mappings: []config.MappingRule{
475 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
476 },
477 }
Akrond5850f82025-05-23 16:44:44 +0200478
479 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +0200480 m, err := NewMapper([]config.MappingList{mappingList})
Akrond5850f82025-05-23 16:44:44 +0200481 require.NoError(t, err)
482
483 tests := []struct {
484 name string
485 input string
486 expectError bool
487 errorMsg string
488 }{
489 {
490 name: "Invalid input - empty term group",
491 input: `{
492 "@type": "koral:token",
493 "wrap": {
494 "@type": "koral:termGroup",
495 "operands": [],
496 "relation": "relation:and"
497 }
498 }`,
499 expectError: true,
500 errorMsg: "failed to parse JSON into AST: error parsing wrapped node: term group must have at least one operand",
501 },
502 }
503
504 for _, tt := range tests {
505 t.Run(tt.name, func(t *testing.T) {
506 var inputData any
507 err := json.Unmarshal([]byte(tt.input), &inputData)
508 require.NoError(t, err)
509
Akron7b4984e2025-05-26 19:12:20 +0200510 result, err := m.ApplyQueryMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
Akrond5850f82025-05-23 16:44:44 +0200511 if tt.expectError {
512 assert.Error(t, err)
513 assert.Equal(t, tt.errorMsg, err.Error())
514 assert.Nil(t, result)
515 } else {
516 assert.NoError(t, err)
517 assert.NotNil(t, result)
518 }
519 })
520 }
521}
Akron7b4984e2025-05-26 19:12:20 +0200522
523func TestQueryWrapperMappings(t *testing.T) {
524
525 mappingList := config.MappingList{
526 ID: "test-wrapper",
527 FoundryA: "opennlp",
528 LayerA: "orth",
529 FoundryB: "upos",
530 LayerB: "orth",
531 Mappings: []config.MappingRule{
532 "[opennlp/orth=Baum] <> [opennlp/orth=X]",
533 },
534 }
535
536 // Create a new mapper
537 m, err := NewMapper([]config.MappingList{mappingList})
538 require.NoError(t, err)
539
540 tests := []struct {
541 name string
542 mappingID string
543 opts MappingOptions
544 input string
545 expected string
546 expectError bool
547 }{
548 {
Akroncc83eb52025-05-27 14:39:12 +0200549 name: "Query wrapper case with rewrites preservation",
Akron7b4984e2025-05-26 19:12:20 +0200550 mappingID: "test-wrapper",
551 opts: MappingOptions{
552 Direction: AtoB,
553 },
554 input: `{
555 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
556 "collection": {
557 "@type": "koral:doc",
558 "key": "availability",
559 "match": "match:eq",
Akron7b4984e2025-05-26 19:12:20 +0200560 "type": "type:regex",
561 "value": "CC.*"
562 },
563 "query": {
564 "@type": "koral:token",
Akroncc83eb52025-05-27 14:39:12 +0200565 "rewrites": [
566 {
567 "@type": "koral:rewrite",
568 "_comment": "Original rewrite that should be preserved",
569 "editor": "Original",
570 "operation": "operation:original",
571 "src": "Original"
572 }
573 ],
Akron7b4984e2025-05-26 19:12:20 +0200574 "wrap": {
575 "@type": "koral:term",
576 "foundry": "opennlp",
577 "key": "Baum",
578 "layer": "orth",
579 "match": "match:eq"
580 }
581 }
582 }`,
583 expected: `{
584 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
585 "collection": {
586 "@type": "koral:doc",
587 "key": "availability",
588 "match": "match:eq",
Akron7b4984e2025-05-26 19:12:20 +0200589 "type": "type:regex",
590 "value": "CC.*"
591 },
592 "query": {
593 "@type": "koral:token",
Akroncc83eb52025-05-27 14:39:12 +0200594 "rewrites": [
595 {
596 "@type": "koral:rewrite",
597 "_comment": "Original rewrite that should be preserved",
598 "editor": "Original",
599 "operation": "operation:original",
600 "src": "Original"
601 }
602 ],
Akron7b4984e2025-05-26 19:12:20 +0200603 "wrap": {
604 "@type": "koral:term",
605 "foundry": "opennlp",
606 "key": "X",
607 "layer": "orth",
608 "match": "match:eq"
609 }
610 }
611 }`,
612 },
613 {
614 name: "Empty query field",
615 mappingID: "test-wrapper",
616 opts: MappingOptions{
617 Direction: AtoB,
618 },
619 input: `{
620 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
621 "query": null
622 }`,
623 expected: `{
624 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
625 "query": null
626 }`,
627 },
628 {
629 name: "Missing query field",
630 mappingID: "test-wrapper",
631 opts: MappingOptions{
632 Direction: AtoB,
633 },
634 input: `{
635 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
636 "collection": {
637 "@type": "koral:doc"
638 }
639 }`,
640 expected: `{
641 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
642 "collection": {
643 "@type": "koral:doc"
644 }
645 }`,
646 },
647 {
648 name: "Query field with non-object value",
649 mappingID: "test-wrapper",
650 opts: MappingOptions{
651 Direction: AtoB,
652 },
653 input: `{
654 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
655 "query": "invalid"
656 }`,
657 expected: `{
658 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
659 "query": "invalid"
660 }`,
661 },
Akroncc83eb52025-05-27 14:39:12 +0200662 {
663 name: "Query with rewrites in nested token",
664 mappingID: "test-wrapper",
665 opts: MappingOptions{
666 Direction: AtoB,
667 },
668 input: `{
669 "@type": "koral:token",
670 "rewrites": [
671 {
672 "@type": "koral:rewrite",
673 "_comment": "Nested rewrite that should be preserved",
674 "editor": "Nested",
675 "operation": "operation:nested",
676 "src": "Nested"
677 }
678 ],
679 "wrap": {
680 "@type": "koral:term",
681 "foundry": "opennlp",
682 "key": "Baum",
683 "layer": "orth",
684 "match": "match:eq"
685 }
686 }`,
687 expected: `{
688 "@type": "koral:token",
689 "rewrites": [
690 {
691 "@type": "koral:rewrite",
692 "_comment": "Nested rewrite that should be preserved",
693 "editor": "Nested",
694 "operation": "operation:nested",
695 "src": "Nested"
696 }
697 ],
698 "wrap": {
699 "@type": "koral:term",
700 "foundry": "opennlp",
701 "key": "X",
702 "layer": "orth",
703 "match": "match:eq"
704 }
705 }`,
706 },
Akron7b4984e2025-05-26 19:12:20 +0200707 }
708
709 for _, tt := range tests {
710 t.Run(tt.name, func(t *testing.T) {
711 // Parse input JSON
712 var inputData interface{}
713 err := json.Unmarshal([]byte(tt.input), &inputData)
714 require.NoError(t, err)
715
716 // Apply mappings
717 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
718 if tt.expectError {
719 assert.Error(t, err)
720 return
721 }
722 require.NoError(t, err)
723
724 // Parse expected JSON
725 var expectedData interface{}
726 err = json.Unmarshal([]byte(tt.expected), &expectedData)
727 require.NoError(t, err)
728
729 // Compare results
730 assert.Equal(t, expectedData, result)
731 })
732 }
733}