blob: 13cb7a813679abcfbee301d0afed77c62647df39 [file] [log] [blame]
Akron32d53de2025-05-22 13:45:32 +02001package mapper
2
3import (
4 "encoding/json"
Akron32d53de2025-05-22 13:45:32 +02005 "testing"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/config"
9 "github.com/KorAP/Koral-Mapper/matcher"
Akron32d53de2025-05-22 13:45:32 +020010 "github.com/stretchr/testify/assert"
11 "github.com/stretchr/testify/require"
12)
13
14func TestMapper(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +020015 // Create test mapping list
16 mappingList := config.MappingList{
17 ID: "test-mapper",
18 FoundryA: "opennlp",
19 LayerA: "p",
20 FoundryB: "upos",
21 LayerB: "p",
22 Mappings: []config.MappingRule{
23 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
24 "[DET] <> [opennlp/p=DET]",
25 },
26 }
Akron32d53de2025-05-22 13:45:32 +020027
28 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +020029 m, err := NewMapper([]config.MappingList{mappingList})
Akron32d53de2025-05-22 13:45:32 +020030 require.NoError(t, err)
31
32 tests := []struct {
33 name string
34 mappingID string
35 opts MappingOptions
36 input string
37 expected string
38 expectError bool
39 }{
40 {
41 name: "Simple A to B mapping",
42 mappingID: "test-mapper",
43 opts: MappingOptions{
44 Direction: AtoB,
45 },
46 input: `{
47 "@type": "koral:token",
48 "wrap": {
49 "@type": "koral:term",
50 "foundry": "opennlp",
51 "key": "PIDAT",
52 "layer": "p",
53 "match": "match:eq"
54 }
55 }`,
56 expected: `{
57 "@type": "koral:token",
58 "wrap": {
59 "@type": "koral:termGroup",
60 "operands": [
61 {
62 "@type": "koral:term",
63 "foundry": "opennlp",
64 "key": "PIDAT",
65 "layer": "p",
66 "match": "match:eq"
67 },
68 {
69 "@type": "koral:term",
70 "foundry": "opennlp",
71 "key": "AdjType",
72 "layer": "p",
73 "match": "match:eq",
74 "value": "Pdt"
75 }
76 ],
77 "relation": "relation:and"
78 }
79 }`,
80 },
81 {
Akron0d9117c2025-05-27 15:20:21 +020082 name: "Simple A to B mapping with rewrites",
Akron32d53de2025-05-22 13:45:32 +020083 mappingID: "test-mapper",
84 opts: MappingOptions{
Akron0d9117c2025-05-27 15:20:21 +020085 Direction: AtoB,
86 AddRewrites: true,
Akron32d53de2025-05-22 13:45:32 +020087 },
88 input: `{
89 "@type": "koral:token",
90 "wrap": {
Akrona1a183f2025-05-26 17:47:33 +020091 "@type": "koral:term",
92 "foundry": "opennlp",
93 "key": "PIDAT",
94 "layer": "p",
95 "match": "match:eq"
Akron32d53de2025-05-22 13:45:32 +020096 }
97 }`,
98 expected: `{
99 "@type": "koral:token",
100 "wrap": {
Akron0d9117c2025-05-27 15:20:21 +0200101 "@type": "koral:termGroup",
102 "operands": [
103 {
104 "@type": "koral:term",
105 "foundry": "opennlp",
106 "key": "PIDAT",
107 "layer": "p",
108 "match": "match:eq"
109 },
110 {
111 "@type": "koral:term",
112 "foundry": "opennlp",
113 "key": "AdjType",
114 "layer": "p",
115 "match": "match:eq",
116 "value": "Pdt"
117 }
118 ],
119 "relation": "relation:and",
120 "rewrites": [
121 {
122 "@type": "koral:rewrite",
Akron2f93c582026-02-19 16:49:13 +0100123 "editor": "Koral-Mapper",
Akron8a87d9a2025-05-27 15:30:48 +0200124 "original": {
Akron0d9117c2025-05-27 15:20:21 +0200125 "@type": "koral:term",
126 "foundry": "opennlp",
127 "key": "PIDAT",
128 "layer": "p",
129 "match": "match:eq"
130 }
131 }
132 ]
Akron32d53de2025-05-22 13:45:32 +0200133 }
134 }`,
135 },
136 {
Akron0d9117c2025-05-27 15:20:21 +0200137 name: "Mapping with foundry override and rewrites",
Akron32d53de2025-05-22 13:45:32 +0200138 mappingID: "test-mapper",
139 opts: MappingOptions{
Akron0d9117c2025-05-27 15:20:21 +0200140 Direction: AtoB,
141 FoundryB: "custom",
142 AddRewrites: true,
Akron32d53de2025-05-22 13:45:32 +0200143 },
144 input: `{
145 "@type": "koral:token",
146 "wrap": {
147 "@type": "koral:term",
148 "foundry": "opennlp",
149 "key": "PIDAT",
150 "layer": "p",
151 "match": "match:eq"
152 }
153 }`,
154 expected: `{
155 "@type": "koral:token",
156 "wrap": {
157 "@type": "koral:termGroup",
158 "operands": [
159 {
160 "@type": "koral:term",
161 "foundry": "custom",
162 "key": "PIDAT",
163 "layer": "p",
164 "match": "match:eq"
165 },
166 {
167 "@type": "koral:term",
168 "foundry": "custom",
169 "key": "AdjType",
170 "layer": "p",
171 "match": "match:eq",
172 "value": "Pdt"
173 }
174 ],
Akron0d9117c2025-05-27 15:20:21 +0200175 "relation": "relation:and",
176 "rewrites": [
177 {
178 "@type": "koral:rewrite",
Akron2f93c582026-02-19 16:49:13 +0100179 "editor": "Koral-Mapper",
Akron8a87d9a2025-05-27 15:30:48 +0200180 "original": {
181 "@type": "koral:term",
182 "foundry": "opennlp",
183 "key": "PIDAT",
184 "layer": "p",
185 "match": "match:eq"
186 }
Akron0d9117c2025-05-27 15:20:21 +0200187 }
188 ]
Akron32d53de2025-05-22 13:45:32 +0200189 }
190 }`,
191 },
192 {
Akron0d9117c2025-05-27 15:20:21 +0200193 name: "B to A direction",
194 mappingID: "test-mapper",
195 opts: MappingOptions{
196 Direction: BtoA,
197 },
198 input: `{
199 "@type": "koral:token",
200 "wrap": {
201 "@type": "koral:term",
202 "foundry": "opennlp",
203 "key": "PIDAT",
204 "layer": "p",
205 "match": "match:eq"
206 }
207 }`,
208 expected: `{
209 "@type": "koral:token",
210 "wrap": {
211 "@type": "koral:term",
212 "foundry": "opennlp",
213 "key": "PIDAT",
214 "layer": "p",
215 "match": "match:eq"
216 }
217 }`,
218 expectError: false,
219 },
220 {
Akron32d53de2025-05-22 13:45:32 +0200221 name: "Invalid mapping ID",
222 mappingID: "nonexistent",
223 opts: MappingOptions{
224 Direction: AtoB,
225 },
226 input: `{
227 "@type": "koral:token",
228 "wrap": {
229 "@type": "koral:term",
230 "foundry": "opennlp",
231 "key": "PIDAT",
232 "layer": "p",
233 "match": "match:eq"
234 }
235 }`,
236 expectError: true,
237 },
238 {
239 name: "Invalid direction",
240 mappingID: "test-mapper",
241 opts: MappingOptions{
Akrona1a183f2025-05-26 17:47:33 +0200242 Direction: Direction(false),
Akron32d53de2025-05-22 13:45:32 +0200243 },
244 input: `{
245 "@type": "koral:token",
246 "wrap": {
247 "@type": "koral:term",
248 "foundry": "opennlp",
249 "key": "PIDAT",
250 "layer": "p",
251 "match": "match:eq"
252 }
253 }`,
Akrona1a183f2025-05-26 17:47:33 +0200254 expected: `{
255 "@type": "koral:token",
256 "wrap": {
257 "@type": "koral:term",
258 "foundry": "opennlp",
259 "key": "PIDAT",
260 "layer": "p",
261 "match": "match:eq"
262 }
263 }`,
264 expectError: false,
Akron32d53de2025-05-22 13:45:32 +0200265 },
Akron8f1970f2025-05-30 12:52:03 +0200266 {
267 name: "Query with legacy rewrite field names",
268 mappingID: "test-mapper",
269 opts: MappingOptions{
270 Direction: AtoB,
271 },
272 input: `{
273 "@type": "koral:token",
274 "rewrites": [
275 {
276 "@type": "koral:rewrite",
277 "_comment": "Legacy rewrite with source instead of editor",
278 "source": "LegacyEditor",
279 "operation": "operation:legacy",
280 "origin": "LegacySource"
281 }
282 ],
283 "wrap": {
284 "@type": "koral:term",
285 "foundry": "opennlp",
286 "key": "PIDAT",
287 "layer": "p",
288 "match": "match:eq"
289 }
290 }`,
291 expected: `{
292 "@type": "koral:token",
293 "rewrites": [
294 {
295 "@type": "koral:rewrite",
296 "_comment": "Legacy rewrite with source instead of editor",
297 "editor": "LegacyEditor",
298 "operation": "operation:legacy",
299 "src": "LegacySource"
300 }
301 ],
302 "wrap": {
303 "@type": "koral:termGroup",
304 "operands": [
305 {
306 "@type": "koral:term",
307 "foundry": "opennlp",
308 "key": "PIDAT",
309 "layer": "p",
310 "match": "match:eq"
311 },
312 {
313 "@type": "koral:term",
314 "foundry": "opennlp",
315 "key": "AdjType",
316 "layer": "p",
317 "match": "match:eq",
318 "value": "Pdt"
319 }
320 ],
321 "relation": "relation:and"
322 }
323 }`,
324 },
325 {
326 name: "Query with mixed legacy and modern rewrite fields",
327 mappingID: "test-mapper",
328 opts: MappingOptions{
329 Direction: AtoB,
330 },
331 input: `{
332 "@type": "koral:token",
333 "rewrites": [
334 {
335 "@type": "koral:rewrite",
336 "_comment": "Modern rewrite",
337 "editor": "ModernEditor",
338 "operation": "operation:modern",
339 "original": {
340 "@type": "koral:term",
341 "foundry": "original",
342 "key": "original-key"
343 }
344 },
345 {
346 "@type": "koral:rewrite",
347 "_comment": "Legacy rewrite with precedence test",
348 "editor": "PreferredEditor",
349 "source": "IgnoredSource",
350 "operation": "operation:precedence",
351 "original": "PreferredOriginal",
352 "src": "IgnoredSrc",
353 "origin": "IgnoredOrigin"
354 }
355 ],
356 "wrap": {
357 "@type": "koral:term",
358 "foundry": "opennlp",
359 "key": "PIDAT",
360 "layer": "p",
361 "match": "match:eq"
362 }
363 }`,
364 expected: `{
365 "@type": "koral:token",
366 "rewrites": [
367 {
368 "@type": "koral:rewrite",
369 "_comment": "Modern rewrite",
370 "editor": "ModernEditor",
371 "operation": "operation:modern",
372 "original": {
373 "@type": "koral:term",
374 "foundry": "original",
375 "key": "original-key"
376 }
377 },
378 {
379 "@type": "koral:rewrite",
380 "_comment": "Legacy rewrite with precedence test",
381 "editor": "PreferredEditor",
382 "operation": "operation:precedence",
383 "original": "PreferredOriginal"
384 }
385 ],
386 "wrap": {
387 "@type": "koral:termGroup",
388 "operands": [
389 {
390 "@type": "koral:term",
391 "foundry": "opennlp",
392 "key": "PIDAT",
393 "layer": "p",
394 "match": "match:eq"
395 },
396 {
397 "@type": "koral:term",
398 "foundry": "opennlp",
399 "key": "AdjType",
400 "layer": "p",
401 "match": "match:eq",
402 "value": "Pdt"
403 }
404 ],
405 "relation": "relation:and"
406 }
407 }`,
408 },
Akron32d53de2025-05-22 13:45:32 +0200409 }
410
411 for _, tt := range tests {
412 t.Run(tt.name, func(t *testing.T) {
413 // Parse input JSON
Akron121c66e2025-06-02 16:34:05 +0200414 var inputData any
Akron32d53de2025-05-22 13:45:32 +0200415 err := json.Unmarshal([]byte(tt.input), &inputData)
416 require.NoError(t, err)
417
418 // Apply mappings
Akron7b4984e2025-05-26 19:12:20 +0200419 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
Akron32d53de2025-05-22 13:45:32 +0200420 if tt.expectError {
421 assert.Error(t, err)
422 return
423 }
424 require.NoError(t, err)
425
426 // Parse expected JSON
Akron121c66e2025-06-02 16:34:05 +0200427 var expectedData any
Akron32d53de2025-05-22 13:45:32 +0200428 err = json.Unmarshal([]byte(tt.expected), &expectedData)
429 require.NoError(t, err)
430
431 // Compare results
432 assert.Equal(t, expectedData, result)
433 })
434 }
435}
Akrond5850f82025-05-23 16:44:44 +0200436
Akroncc83eb52025-05-27 14:39:12 +0200437func TestTokenToTermGroupWithRewrites(t *testing.T) {
438 // Create test mapping list specifically for token to termGroup test
439 mappingList := config.MappingList{
440 ID: "test-token-to-termgroup",
441 FoundryA: "opennlp",
442 LayerA: "p",
Akron422cd252026-05-19 16:31:19 +0200443 FoundryB: "tt",
444 LayerB: "pos",
Akroncc83eb52025-05-27 14:39:12 +0200445 Mappings: []config.MappingRule{
446 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
447 },
448 }
449
450 // Create a new mapper
451 m, err := NewMapper([]config.MappingList{mappingList})
452 require.NoError(t, err)
453
454 input := `{
455 "@type": "koral:token",
456 "rewrites": [
457 {
458 "@type": "koral:rewrite",
459 "_comment": "This rewrite should be preserved",
460 "editor": "TestEditor",
461 "operation": "operation:test",
462 "src": "TestSource"
463 }
464 ],
465 "wrap": {
466 "@type": "koral:term",
467 "foundry": "opennlp",
468 "key": "PIDAT",
469 "layer": "p",
470 "match": "match:eq"
471 }
472 }`
473
474 expected := `{
475 "@type": "koral:token",
476 "rewrites": [
477 {
478 "@type": "koral:rewrite",
479 "_comment": "This rewrite should be preserved",
480 "editor": "TestEditor",
481 "operation": "operation:test",
482 "src": "TestSource"
483 }
484 ],
485 "wrap": {
486 "@type": "koral:termGroup",
487 "operands": [
488 {
489 "@type": "koral:term",
490 "foundry": "opennlp",
491 "key": "PIDAT",
492 "layer": "p",
493 "match": "match:eq"
494 },
495 {
496 "@type": "koral:term",
497 "foundry": "opennlp",
498 "key": "AdjType",
499 "layer": "p",
500 "match": "match:eq",
501 "value": "Pdt"
502 }
503 ],
504 "relation": "relation:and"
505 }
506 }`
507
508 // Parse input JSON
Akron121c66e2025-06-02 16:34:05 +0200509 var inputData any
Akroncc83eb52025-05-27 14:39:12 +0200510 err = json.Unmarshal([]byte(input), &inputData)
511 require.NoError(t, err)
512
513 // Apply mappings
514 result, err := m.ApplyQueryMappings("test-token-to-termgroup", MappingOptions{Direction: AtoB}, inputData)
515 require.NoError(t, err)
516
517 // Parse expected JSON
Akron121c66e2025-06-02 16:34:05 +0200518 var expectedData any
Akroncc83eb52025-05-27 14:39:12 +0200519 err = json.Unmarshal([]byte(expected), &expectedData)
520 require.NoError(t, err)
521
522 // Compare results
523 assert.Equal(t, expectedData, result)
524}
525
Akrond5850f82025-05-23 16:44:44 +0200526func TestMatchComplexPatterns(t *testing.T) {
527 tests := []struct {
528 name string
529 pattern ast.Pattern
530 replacement ast.Replacement
531 input ast.Node
532 expected ast.Node
533 }{
534 {
535 name: "Deep nested pattern with mixed operators",
536 pattern: ast.Pattern{
537 Root: &ast.TermGroup{
538 Operands: []ast.Node{
539 &ast.Term{
540 Key: "A",
541 Match: ast.MatchEqual,
542 },
543 &ast.TermGroup{
544 Operands: []ast.Node{
545 &ast.Term{
546 Key: "B",
547 Match: ast.MatchEqual,
548 },
549 &ast.TermGroup{
550 Operands: []ast.Node{
551 &ast.Term{
552 Key: "C",
553 Match: ast.MatchEqual,
554 },
555 &ast.Term{
556 Key: "D",
557 Match: ast.MatchEqual,
558 },
559 },
560 Relation: ast.AndRelation,
561 },
562 },
563 Relation: ast.OrRelation,
564 },
565 },
566 Relation: ast.AndRelation,
567 },
568 },
569 replacement: ast.Replacement{
570 Root: &ast.Term{
571 Key: "RESULT",
572 Match: ast.MatchEqual,
573 },
574 },
575 input: &ast.TermGroup{
576 Operands: []ast.Node{
577 &ast.Term{
578 Key: "A",
579 Match: ast.MatchEqual,
580 },
581 &ast.TermGroup{
582 Operands: []ast.Node{
583 &ast.Term{
584 Key: "C",
585 Match: ast.MatchEqual,
586 },
587 &ast.Term{
588 Key: "D",
589 Match: ast.MatchEqual,
590 },
591 },
592 Relation: ast.AndRelation,
593 },
594 },
595 Relation: ast.AndRelation,
596 },
597 expected: &ast.Term{
598 Key: "RESULT",
599 Match: ast.MatchEqual,
600 },
601 },
602 }
603
604 for _, tt := range tests {
605 t.Run(tt.name, func(t *testing.T) {
606 m, err := matcher.NewMatcher(tt.pattern, tt.replacement)
607 require.NoError(t, err)
608 result := m.Replace(tt.input)
609 assert.Equal(t, tt.expected, result)
610 })
611 }
612}
613
614func TestInvalidPatternReplacement(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +0200615 // Create test mapping list
616 mappingList := config.MappingList{
617 ID: "test-mapper",
618 FoundryA: "opennlp",
619 LayerA: "p",
620 FoundryB: "upos",
621 LayerB: "p",
622 Mappings: []config.MappingRule{
623 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
624 },
625 }
Akrond5850f82025-05-23 16:44:44 +0200626
627 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +0200628 m, err := NewMapper([]config.MappingList{mappingList})
Akrond5850f82025-05-23 16:44:44 +0200629 require.NoError(t, err)
630
631 tests := []struct {
632 name string
633 input string
634 expectError bool
635 errorMsg string
636 }{
637 {
638 name: "Invalid input - empty term group",
639 input: `{
640 "@type": "koral:token",
641 "wrap": {
642 "@type": "koral:termGroup",
643 "operands": [],
644 "relation": "relation:and"
645 }
646 }`,
647 expectError: true,
648 errorMsg: "failed to parse JSON into AST: error parsing wrapped node: term group must have at least one operand",
649 },
650 }
651
652 for _, tt := range tests {
653 t.Run(tt.name, func(t *testing.T) {
654 var inputData any
655 err := json.Unmarshal([]byte(tt.input), &inputData)
656 require.NoError(t, err)
657
Akron7b4984e2025-05-26 19:12:20 +0200658 result, err := m.ApplyQueryMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
Akrond5850f82025-05-23 16:44:44 +0200659 if tt.expectError {
660 assert.Error(t, err)
661 assert.Equal(t, tt.errorMsg, err.Error())
662 assert.Nil(t, result)
663 } else {
664 assert.NoError(t, err)
665 assert.NotNil(t, result)
666 }
667 })
668 }
669}
Akron7b4984e2025-05-26 19:12:20 +0200670
Akron958fc472026-05-19 13:58:52 +0200671func TestMultiFieldRewritesAreReversible(t *testing.T) {
672 mappingList := config.MappingList{
673 ID: "multi-field",
674 FoundryA: "opennlp",
675 LayerA: "p",
676 FoundryB: "upos",
677 LayerB: "pos",
678 Mappings: []config.MappingRule{
679 "[DET] <> [PRON]",
680 },
681 }
682
683 m, err := NewMapper([]config.MappingList{mappingList})
684 require.NoError(t, err)
685
686 tests := []struct {
687 name string
688 opts MappingOptions
689 input string
690 expected string
691 }{
692 {
Akrona0174352026-05-19 17:04:42 +0200693 name: "Multi-field change: single rewrite with full original",
Akron958fc472026-05-19 13:58:52 +0200694 opts: MappingOptions{
695 Direction: AtoB,
696 AddRewrites: true,
697 },
698 input: `{
699 "@type": "koral:token",
700 "wrap": {
701 "@type": "koral:term",
702 "foundry": "opennlp",
703 "key": "DET",
704 "layer": "p",
705 "match": "match:eq"
706 }
707 }`,
708 expected: `{
709 "@type": "koral:token",
710 "wrap": {
711 "@type": "koral:term",
712 "foundry": "upos",
713 "key": "PRON",
714 "layer": "pos",
715 "match": "match:eq",
716 "rewrites": [
717 {
718 "@type": "koral:rewrite",
719 "editor": "Koral-Mapper",
Akrona0174352026-05-19 17:04:42 +0200720 "original": {
721 "@type": "koral:term",
722 "foundry": "opennlp",
723 "key": "DET",
724 "layer": "p",
725 "match": "match:eq"
726 }
Akron958fc472026-05-19 13:58:52 +0200727 }
728 ]
729 }
730 }`,
731 },
732 {
Akrona0174352026-05-19 17:04:42 +0200733 name: "Reverse direction: single rewrite with full original",
Akron958fc472026-05-19 13:58:52 +0200734 opts: MappingOptions{
735 Direction: BtoA,
736 AddRewrites: true,
737 },
738 input: `{
739 "@type": "koral:token",
740 "wrap": {
741 "@type": "koral:term",
742 "foundry": "upos",
743 "key": "PRON",
744 "layer": "pos",
745 "match": "match:eq"
746 }
747 }`,
748 expected: `{
749 "@type": "koral:token",
750 "wrap": {
751 "@type": "koral:term",
752 "foundry": "opennlp",
753 "key": "DET",
754 "layer": "p",
755 "match": "match:eq",
756 "rewrites": [
757 {
758 "@type": "koral:rewrite",
759 "editor": "Koral-Mapper",
Akrona0174352026-05-19 17:04:42 +0200760 "original": {
761 "@type": "koral:term",
762 "foundry": "upos",
763 "key": "PRON",
764 "layer": "pos",
765 "match": "match:eq"
766 }
Akron958fc472026-05-19 13:58:52 +0200767 }
768 ]
769 }
770 }`,
771 },
772 }
773
774 for _, tt := range tests {
775 t.Run(tt.name, func(t *testing.T) {
776 var inputData any
777 err := json.Unmarshal([]byte(tt.input), &inputData)
778 require.NoError(t, err)
779
780 result, err := m.ApplyQueryMappings("multi-field", tt.opts, inputData)
781 require.NoError(t, err)
782
783 var expectedData any
784 err = json.Unmarshal([]byte(tt.expected), &expectedData)
785 require.NoError(t, err)
786
787 assert.Equal(t, expectedData, result)
788 })
789 }
790}
791
792func TestSingleFieldRewrite(t *testing.T) {
793 mappingList := config.MappingList{
794 ID: "same-fl",
795 FoundryA: "opennlp",
796 LayerA: "p",
797 FoundryB: "opennlp",
Akron422cd252026-05-19 16:31:19 +0200798 LayerB: "pos",
Akron958fc472026-05-19 13:58:52 +0200799 Mappings: []config.MappingRule{
800 "[DET] <> [PRON]",
801 },
802 }
803
804 m, err := NewMapper([]config.MappingList{mappingList})
805 require.NoError(t, err)
806
807 var inputData any
808 err = json.Unmarshal([]byte(`{
809 "@type": "koral:token",
810 "wrap": {
811 "@type": "koral:term",
812 "foundry": "opennlp",
813 "key": "DET",
814 "layer": "p",
815 "match": "match:eq"
816 }
817 }`), &inputData)
818 require.NoError(t, err)
819
820 result, err := m.ApplyQueryMappings("same-fl", MappingOptions{
821 Direction: AtoB,
822 AddRewrites: true,
823 }, inputData)
824 require.NoError(t, err)
825
826 var expectedData any
827 err = json.Unmarshal([]byte(`{
828 "@type": "koral:token",
829 "wrap": {
830 "@type": "koral:term",
831 "foundry": "opennlp",
832 "key": "PRON",
Akron422cd252026-05-19 16:31:19 +0200833 "layer": "pos",
Akron958fc472026-05-19 13:58:52 +0200834 "match": "match:eq",
835 "rewrites": [
836 {
837 "@type": "koral:rewrite",
838 "editor": "Koral-Mapper",
Akrona0174352026-05-19 17:04:42 +0200839 "original": {
840 "@type": "koral:term",
841 "foundry": "opennlp",
842 "key": "DET",
843 "layer": "p",
844 "match": "match:eq"
845 }
Akron958fc472026-05-19 13:58:52 +0200846 }
847 ]
848 }
849 }`), &expectedData)
850 require.NoError(t, err)
851
852 assert.Equal(t, expectedData, result)
853}
854
Akrona0174352026-05-19 17:04:42 +0200855func TestBuildRewritesSingleObjectRewrite(t *testing.T) {
Akron958fc472026-05-19 13:58:52 +0200856 tests := []struct {
Akrona0174352026-05-19 17:04:42 +0200857 name string
858 original *ast.Term
859 new_ *ast.Term
Akron958fc472026-05-19 13:58:52 +0200860 }{
861 {
Akrona0174352026-05-19 17:04:42 +0200862 name: "All fields change",
863 original: &ast.Term{Foundry: "a", Layer: "l1", Key: "k1", Value: "v1", Match: ast.MatchEqual},
864 new_: &ast.Term{Foundry: "b", Layer: "l2", Key: "k2", Value: "v2", Match: ast.MatchEqual},
Akron958fc472026-05-19 13:58:52 +0200865 },
866 {
Akrona0174352026-05-19 17:04:42 +0200867 name: "Single field injection: empty value becomes non-empty",
868 original: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Match: ast.MatchEqual},
869 new_: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Value: "v", Match: ast.MatchEqual},
Akron958fc472026-05-19 13:58:52 +0200870 },
871 {
Akrona0174352026-05-19 17:04:42 +0200872 name: "Single field deletion: non-empty value becomes empty",
873 original: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Value: "v", Match: ast.MatchEqual},
874 new_: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Match: ast.MatchEqual},
Akron958fc472026-05-19 13:58:52 +0200875 },
876 }
877
878 for _, tt := range tests {
879 t.Run(tt.name, func(t *testing.T) {
880 rewrites := buildRewrites(tt.original, tt.new_)
Akrona0174352026-05-19 17:04:42 +0200881 require.Len(t, rewrites, 1, "one rule application should produce exactly one rewrite")
882 rw := rewrites[0]
883 assert.Equal(t, RewriteEditor, rw.Editor)
884 assert.Empty(t, rw.Scope, "object-level rewrite should have no scope")
885 assert.NotNil(t, rw.Original, "rewrite should contain the full original")
886 originalMap, ok := rw.Original.(map[string]any)
887 require.True(t, ok)
888 assert.Equal(t, "koral:term", originalMap["@type"])
Akron958fc472026-05-19 13:58:52 +0200889 })
890 }
891}
892
Akron7b4984e2025-05-26 19:12:20 +0200893func TestQueryWrapperMappings(t *testing.T) {
894
895 mappingList := config.MappingList{
896 ID: "test-wrapper",
897 FoundryA: "opennlp",
898 LayerA: "orth",
899 FoundryB: "upos",
900 LayerB: "orth",
901 Mappings: []config.MappingRule{
902 "[opennlp/orth=Baum] <> [opennlp/orth=X]",
903 },
904 }
905
906 // Create a new mapper
907 m, err := NewMapper([]config.MappingList{mappingList})
908 require.NoError(t, err)
909
910 tests := []struct {
911 name string
912 mappingID string
913 opts MappingOptions
914 input string
915 expected string
916 expectError bool
917 }{
918 {
Akroncc83eb52025-05-27 14:39:12 +0200919 name: "Query wrapper case with rewrites preservation",
Akron7b4984e2025-05-26 19:12:20 +0200920 mappingID: "test-wrapper",
921 opts: MappingOptions{
922 Direction: AtoB,
923 },
924 input: `{
925 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
926 "collection": {
927 "@type": "koral:doc",
928 "key": "availability",
929 "match": "match:eq",
Akron7b4984e2025-05-26 19:12:20 +0200930 "type": "type:regex",
931 "value": "CC.*"
932 },
933 "query": {
934 "@type": "koral:token",
Akroncc83eb52025-05-27 14:39:12 +0200935 "rewrites": [
936 {
937 "@type": "koral:rewrite",
938 "_comment": "Original rewrite that should be preserved",
939 "editor": "Original",
940 "operation": "operation:original",
941 "src": "Original"
942 }
943 ],
Akron7b4984e2025-05-26 19:12:20 +0200944 "wrap": {
945 "@type": "koral:term",
946 "foundry": "opennlp",
947 "key": "Baum",
948 "layer": "orth",
949 "match": "match:eq"
950 }
951 }
952 }`,
953 expected: `{
954 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
955 "collection": {
956 "@type": "koral:doc",
957 "key": "availability",
958 "match": "match:eq",
Akron7b4984e2025-05-26 19:12:20 +0200959 "type": "type:regex",
960 "value": "CC.*"
961 },
962 "query": {
963 "@type": "koral:token",
Akroncc83eb52025-05-27 14:39:12 +0200964 "rewrites": [
965 {
966 "@type": "koral:rewrite",
967 "_comment": "Original rewrite that should be preserved",
968 "editor": "Original",
969 "operation": "operation:original",
970 "src": "Original"
971 }
972 ],
Akron7b4984e2025-05-26 19:12:20 +0200973 "wrap": {
974 "@type": "koral:term",
975 "foundry": "opennlp",
976 "key": "X",
977 "layer": "orth",
978 "match": "match:eq"
979 }
980 }
981 }`,
982 },
983 {
984 name: "Empty query field",
985 mappingID: "test-wrapper",
986 opts: MappingOptions{
987 Direction: AtoB,
988 },
989 input: `{
990 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
991 "query": null
992 }`,
993 expected: `{
994 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
995 "query": null
996 }`,
997 },
998 {
999 name: "Missing query field",
1000 mappingID: "test-wrapper",
1001 opts: MappingOptions{
1002 Direction: AtoB,
1003 },
1004 input: `{
1005 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
1006 "collection": {
1007 "@type": "koral:doc"
1008 }
1009 }`,
1010 expected: `{
1011 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
1012 "collection": {
1013 "@type": "koral:doc"
1014 }
1015 }`,
1016 },
1017 {
1018 name: "Query field with non-object value",
1019 mappingID: "test-wrapper",
1020 opts: MappingOptions{
1021 Direction: AtoB,
1022 },
1023 input: `{
1024 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
1025 "query": "invalid"
1026 }`,
1027 expected: `{
1028 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
1029 "query": "invalid"
1030 }`,
1031 },
Akroncc83eb52025-05-27 14:39:12 +02001032 {
1033 name: "Query with rewrites in nested token",
1034 mappingID: "test-wrapper",
1035 opts: MappingOptions{
1036 Direction: AtoB,
1037 },
1038 input: `{
1039 "@type": "koral:token",
1040 "rewrites": [
1041 {
1042 "@type": "koral:rewrite",
1043 "_comment": "Nested rewrite that should be preserved",
1044 "editor": "Nested",
1045 "operation": "operation:nested",
1046 "src": "Nested"
1047 }
1048 ],
1049 "wrap": {
1050 "@type": "koral:term",
1051 "foundry": "opennlp",
1052 "key": "Baum",
1053 "layer": "orth",
1054 "match": "match:eq"
1055 }
1056 }`,
1057 expected: `{
1058 "@type": "koral:token",
1059 "rewrites": [
1060 {
1061 "@type": "koral:rewrite",
1062 "_comment": "Nested rewrite that should be preserved",
1063 "editor": "Nested",
1064 "operation": "operation:nested",
1065 "src": "Nested"
1066 }
1067 ],
1068 "wrap": {
1069 "@type": "koral:term",
1070 "foundry": "opennlp",
1071 "key": "X",
1072 "layer": "orth",
1073 "match": "match:eq"
1074 }
1075 }`,
1076 },
Akron7b4984e2025-05-26 19:12:20 +02001077 }
1078
1079 for _, tt := range tests {
1080 t.Run(tt.name, func(t *testing.T) {
1081 // Parse input JSON
Akron121c66e2025-06-02 16:34:05 +02001082 var inputData any
Akron7b4984e2025-05-26 19:12:20 +02001083 err := json.Unmarshal([]byte(tt.input), &inputData)
1084 require.NoError(t, err)
1085
1086 // Apply mappings
1087 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
1088 if tt.expectError {
1089 assert.Error(t, err)
1090 return
1091 }
1092 require.NoError(t, err)
1093
1094 // Parse expected JSON
Akron121c66e2025-06-02 16:34:05 +02001095 var expectedData any
Akron7b4984e2025-05-26 19:12:20 +02001096 err = json.Unmarshal([]byte(tt.expected), &expectedData)
1097 require.NoError(t, err)
1098
1099 // Compare results
1100 assert.Equal(t, expectedData, result)
1101 })
1102 }
1103}
Akron422cd252026-05-19 16:31:19 +02001104
1105func TestIdenticalEffectiveFoundryLayerRejected(t *testing.T) {
1106 tests := []struct {
1107 name string
1108 list config.MappingList
1109 opts MappingOptions
1110 wantErr string
1111 }{
1112 {
1113 name: "YAML defaults identical",
1114 list: config.MappingList{
1115 ID: "test", FoundryA: "opennlp", LayerA: "p",
1116 FoundryB: "opennlp", LayerB: "p",
1117 Mappings: []config.MappingRule{"[A] <> [B]"},
1118 },
1119 opts: MappingOptions{Direction: AtoB},
1120 wantErr: "identical source and target",
1121 },
1122 {
1123 name: "Query param override makes them identical",
1124 list: config.MappingList{
1125 ID: "test", FoundryA: "opennlp", LayerA: "p",
1126 FoundryB: "upos", LayerB: "p",
1127 Mappings: []config.MappingRule{"[A] <> [B]"},
1128 },
1129 opts: MappingOptions{Direction: AtoB, FoundryB: "opennlp"},
1130 wantErr: "identical source and target",
1131 },
1132 {
1133 name: "Query param override resolves the conflict",
1134 list: config.MappingList{
1135 ID: "test", FoundryA: "opennlp", LayerA: "p",
1136 FoundryB: "opennlp", LayerB: "p",
1137 Mappings: []config.MappingRule{"[A] <> [B]"},
1138 },
1139 opts: MappingOptions{Direction: AtoB, FoundryB: "upos"},
1140 wantErr: "",
1141 },
1142 {
1143 name: "Different foundry same layer is allowed",
1144 list: config.MappingList{
1145 ID: "test", FoundryA: "opennlp", LayerA: "p",
1146 FoundryB: "upos", LayerB: "p",
1147 Mappings: []config.MappingRule{"[A] <> [B]"},
1148 },
1149 opts: MappingOptions{Direction: AtoB},
1150 wantErr: "",
1151 },
1152 {
1153 name: "Both foundries empty is allowed",
1154 list: config.MappingList{
1155 ID: "test",
1156 Mappings: []config.MappingRule{"[A] <> [B]"},
1157 },
1158 opts: MappingOptions{Direction: AtoB},
1159 wantErr: "",
1160 },
1161 }
1162
1163 for _, tt := range tests {
1164 t.Run(tt.name, func(t *testing.T) {
1165 m, err := NewMapper([]config.MappingList{tt.list})
1166 require.NoError(t, err)
1167
1168 input := map[string]any{
1169 "@type": "koral:token",
1170 "wrap": map[string]any{
1171 "@type": "koral:term",
1172 "key": "A",
1173 },
1174 }
1175
1176 _, err = m.ApplyQueryMappings("test", tt.opts, input)
1177 if tt.wantErr != "" {
1178 require.Error(t, err)
1179 assert.Contains(t, err.Error(), tt.wantErr)
1180 } else {
1181 assert.NoError(t, err)
1182 }
1183 })
1184 }
1185}
1186
1187func TestIdenticalEffectiveFieldRejected(t *testing.T) {
1188 tests := []struct {
1189 name string
1190 list config.MappingList
1191 opts MappingOptions
1192 wantErr string
1193 }{
1194 {
1195 name: "YAML defaults identical",
1196 list: config.MappingList{
1197 ID: "test", Type: "corpus",
1198 FieldA: "textClass", FieldB: "textClass",
1199 Mappings: []config.MappingRule{"novel <> fiction"},
1200 },
1201 opts: MappingOptions{Direction: AtoB},
1202 wantErr: "identical source and target field",
1203 },
1204 {
1205 name: "Query param override makes them identical",
1206 list: config.MappingList{
1207 ID: "test", Type: "corpus",
1208 FieldA: "textClass", FieldB: "genre",
1209 Mappings: []config.MappingRule{"novel <> fiction"},
1210 },
1211 opts: MappingOptions{Direction: AtoB, FieldB: "textClass"},
1212 wantErr: "identical source and target field",
1213 },
1214 {
1215 name: "Query param override resolves the conflict",
1216 list: config.MappingList{
1217 ID: "test", Type: "corpus",
1218 FieldA: "textClass", FieldB: "textClass",
1219 Mappings: []config.MappingRule{"novel <> fiction"},
1220 },
1221 opts: MappingOptions{Direction: AtoB, FieldB: "genre"},
1222 wantErr: "",
1223 },
1224 {
1225 name: "Different fields is allowed",
1226 list: config.MappingList{
1227 ID: "test", Type: "corpus",
1228 FieldA: "textClass", FieldB: "genre",
1229 Mappings: []config.MappingRule{"novel <> fiction"},
1230 },
1231 opts: MappingOptions{Direction: AtoB},
1232 wantErr: "",
1233 },
1234 {
1235 name: "Both fields empty is allowed",
1236 list: config.MappingList{
1237 ID: "test", Type: "corpus",
1238 Mappings: []config.MappingRule{"textClass=novel <> genre=fiction"},
1239 },
1240 opts: MappingOptions{Direction: AtoB},
1241 wantErr: "",
1242 },
1243 }
1244
1245 for _, tt := range tests {
1246 t.Run(tt.name, func(t *testing.T) {
1247 m, err := NewMapper([]config.MappingList{tt.list})
1248 require.NoError(t, err)
1249
1250 input := map[string]any{
1251 "collection": map[string]any{
1252 "@type": "koral:doc",
1253 "key": "textClass",
1254 "value": "novel",
1255 "match": "match:eq",
1256 },
1257 }
1258
1259 _, err = m.ApplyQueryMappings("test", tt.opts, input)
1260 if tt.wantErr != "" {
1261 require.Error(t, err)
1262 assert.Contains(t, err.Error(), tt.wantErr)
1263 } else {
1264 assert.NoError(t, err)
1265 }
1266 })
1267 }
1268}
1269
1270func TestIdenticalEffectiveValuesResponseEndpoint(t *testing.T) {
1271 t.Run("annotation response rejects identical effective foundry/layer", func(t *testing.T) {
1272 m, err := NewMapper([]config.MappingList{{
1273 ID: "test", FoundryA: "marmot", LayerA: "p",
1274 FoundryB: "marmot", LayerB: "p",
1275 Mappings: []config.MappingRule{"[DET] <> [PRON]"},
1276 }})
1277 require.NoError(t, err)
1278
1279 input := map[string]any{
1280 "snippet": `<span title="marmot/p:DET">Der</span>`,
1281 }
1282
1283 _, err = m.ApplyResponseMappings("test", MappingOptions{Direction: AtoB}, input)
1284 require.Error(t, err)
1285 assert.Contains(t, err.Error(), "identical source and target")
1286 })
1287
1288 t.Run("corpus response rejects identical effective field", func(t *testing.T) {
1289 m, err := NewMapper([]config.MappingList{{
1290 ID: "test", Type: "corpus",
1291 FieldA: "textClass", FieldB: "textClass",
1292 Mappings: []config.MappingRule{"novel <> fiction"},
1293 }})
1294 require.NoError(t, err)
1295
1296 input := map[string]any{
1297 "fields": []any{
1298 map[string]any{
1299 "@type": "koral:field",
1300 "key": "textClass",
1301 "value": "novel",
1302 "type": "type:string",
1303 },
1304 },
1305 }
1306
1307 _, err = m.ApplyResponseMappings("test", MappingOptions{Direction: AtoB}, input)
1308 require.Error(t, err)
1309 assert.Contains(t, err.Error(), "identical source and target field")
1310 })
1311}