blob: 8d3e691b70164e673674f5cf8c274d253cd371eb [file] [log] [blame]
Akron32d53de2025-05-22 13:45:32 +02001package mapper
2
3import (
4 "encoding/json"
Akronb4e36f62026-05-21 11:44:25 +02005 "os"
Akron32d53de2025-05-22 13:45:32 +02006 "testing"
7
Akron2ef703c2025-07-03 15:57:42 +02008 "github.com/KorAP/Koral-Mapper/ast"
9 "github.com/KorAP/Koral-Mapper/config"
10 "github.com/KorAP/Koral-Mapper/matcher"
Akron32d53de2025-05-22 13:45:32 +020011 "github.com/stretchr/testify/assert"
12 "github.com/stretchr/testify/require"
Akronb4e36f62026-05-21 11:44:25 +020013 "gopkg.in/yaml.v3"
Akron32d53de2025-05-22 13:45:32 +020014)
15
16func TestMapper(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +020017 // Create test mapping list
18 mappingList := config.MappingList{
19 ID: "test-mapper",
20 FoundryA: "opennlp",
21 LayerA: "p",
22 FoundryB: "upos",
23 LayerB: "p",
24 Mappings: []config.MappingRule{
25 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
26 "[DET] <> [opennlp/p=DET]",
27 },
28 }
Akron32d53de2025-05-22 13:45:32 +020029
30 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +020031 m, err := NewMapper([]config.MappingList{mappingList})
Akron32d53de2025-05-22 13:45:32 +020032 require.NoError(t, err)
33
34 tests := []struct {
35 name string
36 mappingID string
37 opts MappingOptions
38 input string
39 expected string
40 expectError bool
41 }{
42 {
43 name: "Simple A to B mapping",
44 mappingID: "test-mapper",
45 opts: MappingOptions{
46 Direction: AtoB,
47 },
48 input: `{
49 "@type": "koral:token",
50 "wrap": {
51 "@type": "koral:term",
52 "foundry": "opennlp",
53 "key": "PIDAT",
54 "layer": "p",
55 "match": "match:eq"
56 }
57 }`,
58 expected: `{
59 "@type": "koral:token",
60 "wrap": {
61 "@type": "koral:termGroup",
62 "operands": [
63 {
64 "@type": "koral:term",
65 "foundry": "opennlp",
66 "key": "PIDAT",
67 "layer": "p",
68 "match": "match:eq"
69 },
70 {
71 "@type": "koral:term",
72 "foundry": "opennlp",
73 "key": "AdjType",
74 "layer": "p",
75 "match": "match:eq",
76 "value": "Pdt"
77 }
78 ],
79 "relation": "relation:and"
80 }
81 }`,
82 },
83 {
Akron0d9117c2025-05-27 15:20:21 +020084 name: "Simple A to B mapping with rewrites",
Akron32d53de2025-05-22 13:45:32 +020085 mappingID: "test-mapper",
86 opts: MappingOptions{
Akron0d9117c2025-05-27 15:20:21 +020087 Direction: AtoB,
88 AddRewrites: true,
Akron32d53de2025-05-22 13:45:32 +020089 },
90 input: `{
91 "@type": "koral:token",
92 "wrap": {
Akrona1a183f2025-05-26 17:47:33 +020093 "@type": "koral:term",
94 "foundry": "opennlp",
95 "key": "PIDAT",
96 "layer": "p",
97 "match": "match:eq"
Akron32d53de2025-05-22 13:45:32 +020098 }
99 }`,
100 expected: `{
101 "@type": "koral:token",
102 "wrap": {
Akron0d9117c2025-05-27 15:20:21 +0200103 "@type": "koral:termGroup",
104 "operands": [
105 {
106 "@type": "koral:term",
107 "foundry": "opennlp",
108 "key": "PIDAT",
109 "layer": "p",
110 "match": "match:eq"
111 },
112 {
113 "@type": "koral:term",
114 "foundry": "opennlp",
115 "key": "AdjType",
116 "layer": "p",
117 "match": "match:eq",
118 "value": "Pdt"
119 }
120 ],
121 "relation": "relation:and",
122 "rewrites": [
123 {
124 "@type": "koral:rewrite",
Akron2f93c582026-02-19 16:49:13 +0100125 "editor": "Koral-Mapper",
Akron8a87d9a2025-05-27 15:30:48 +0200126 "original": {
Akron0d9117c2025-05-27 15:20:21 +0200127 "@type": "koral:term",
128 "foundry": "opennlp",
129 "key": "PIDAT",
130 "layer": "p",
131 "match": "match:eq"
132 }
133 }
134 ]
Akron32d53de2025-05-22 13:45:32 +0200135 }
136 }`,
137 },
138 {
Akron0d9117c2025-05-27 15:20:21 +0200139 name: "Mapping with foundry override and rewrites",
Akron32d53de2025-05-22 13:45:32 +0200140 mappingID: "test-mapper",
141 opts: MappingOptions{
Akron0d9117c2025-05-27 15:20:21 +0200142 Direction: AtoB,
143 FoundryB: "custom",
144 AddRewrites: true,
Akron32d53de2025-05-22 13:45:32 +0200145 },
146 input: `{
147 "@type": "koral:token",
148 "wrap": {
149 "@type": "koral:term",
150 "foundry": "opennlp",
151 "key": "PIDAT",
152 "layer": "p",
153 "match": "match:eq"
154 }
155 }`,
156 expected: `{
157 "@type": "koral:token",
158 "wrap": {
159 "@type": "koral:termGroup",
160 "operands": [
161 {
162 "@type": "koral:term",
163 "foundry": "custom",
164 "key": "PIDAT",
165 "layer": "p",
166 "match": "match:eq"
167 },
168 {
169 "@type": "koral:term",
170 "foundry": "custom",
171 "key": "AdjType",
172 "layer": "p",
173 "match": "match:eq",
174 "value": "Pdt"
175 }
176 ],
Akron0d9117c2025-05-27 15:20:21 +0200177 "relation": "relation:and",
178 "rewrites": [
179 {
180 "@type": "koral:rewrite",
Akron2f93c582026-02-19 16:49:13 +0100181 "editor": "Koral-Mapper",
Akron8a87d9a2025-05-27 15:30:48 +0200182 "original": {
183 "@type": "koral:term",
184 "foundry": "opennlp",
185 "key": "PIDAT",
186 "layer": "p",
187 "match": "match:eq"
188 }
Akron0d9117c2025-05-27 15:20:21 +0200189 }
190 ]
Akron32d53de2025-05-22 13:45:32 +0200191 }
192 }`,
193 },
194 {
Akron0d9117c2025-05-27 15:20:21 +0200195 name: "B to A direction",
196 mappingID: "test-mapper",
197 opts: MappingOptions{
198 Direction: BtoA,
199 },
200 input: `{
201 "@type": "koral:token",
202 "wrap": {
203 "@type": "koral:term",
204 "foundry": "opennlp",
205 "key": "PIDAT",
206 "layer": "p",
207 "match": "match:eq"
208 }
209 }`,
210 expected: `{
211 "@type": "koral:token",
212 "wrap": {
213 "@type": "koral:term",
214 "foundry": "opennlp",
215 "key": "PIDAT",
216 "layer": "p",
217 "match": "match:eq"
218 }
219 }`,
220 expectError: false,
221 },
222 {
Akron32d53de2025-05-22 13:45:32 +0200223 name: "Invalid mapping ID",
224 mappingID: "nonexistent",
225 opts: MappingOptions{
226 Direction: AtoB,
227 },
228 input: `{
229 "@type": "koral:token",
230 "wrap": {
231 "@type": "koral:term",
232 "foundry": "opennlp",
233 "key": "PIDAT",
234 "layer": "p",
235 "match": "match:eq"
236 }
237 }`,
238 expectError: true,
239 },
240 {
241 name: "Invalid direction",
242 mappingID: "test-mapper",
243 opts: MappingOptions{
Akrona1a183f2025-05-26 17:47:33 +0200244 Direction: Direction(false),
Akron32d53de2025-05-22 13:45:32 +0200245 },
246 input: `{
247 "@type": "koral:token",
248 "wrap": {
249 "@type": "koral:term",
250 "foundry": "opennlp",
251 "key": "PIDAT",
252 "layer": "p",
253 "match": "match:eq"
254 }
255 }`,
Akrona1a183f2025-05-26 17:47:33 +0200256 expected: `{
257 "@type": "koral:token",
258 "wrap": {
259 "@type": "koral:term",
260 "foundry": "opennlp",
261 "key": "PIDAT",
262 "layer": "p",
263 "match": "match:eq"
264 }
265 }`,
266 expectError: false,
Akron32d53de2025-05-22 13:45:32 +0200267 },
Akron8f1970f2025-05-30 12:52:03 +0200268 {
269 name: "Query with legacy rewrite field names",
270 mappingID: "test-mapper",
271 opts: MappingOptions{
272 Direction: AtoB,
273 },
274 input: `{
275 "@type": "koral:token",
276 "rewrites": [
277 {
278 "@type": "koral:rewrite",
279 "_comment": "Legacy rewrite with source instead of editor",
280 "source": "LegacyEditor",
281 "operation": "operation:legacy",
282 "origin": "LegacySource"
283 }
284 ],
285 "wrap": {
286 "@type": "koral:term",
287 "foundry": "opennlp",
288 "key": "PIDAT",
289 "layer": "p",
290 "match": "match:eq"
291 }
292 }`,
293 expected: `{
294 "@type": "koral:token",
295 "rewrites": [
296 {
297 "@type": "koral:rewrite",
298 "_comment": "Legacy rewrite with source instead of editor",
299 "editor": "LegacyEditor",
300 "operation": "operation:legacy",
301 "src": "LegacySource"
302 }
303 ],
304 "wrap": {
305 "@type": "koral:termGroup",
306 "operands": [
307 {
308 "@type": "koral:term",
309 "foundry": "opennlp",
310 "key": "PIDAT",
311 "layer": "p",
312 "match": "match:eq"
313 },
314 {
315 "@type": "koral:term",
316 "foundry": "opennlp",
317 "key": "AdjType",
318 "layer": "p",
319 "match": "match:eq",
320 "value": "Pdt"
321 }
322 ],
323 "relation": "relation:and"
324 }
325 }`,
326 },
327 {
328 name: "Query with mixed legacy and modern rewrite fields",
329 mappingID: "test-mapper",
330 opts: MappingOptions{
331 Direction: AtoB,
332 },
333 input: `{
334 "@type": "koral:token",
335 "rewrites": [
336 {
337 "@type": "koral:rewrite",
338 "_comment": "Modern rewrite",
339 "editor": "ModernEditor",
340 "operation": "operation:modern",
341 "original": {
342 "@type": "koral:term",
343 "foundry": "original",
344 "key": "original-key"
345 }
346 },
347 {
348 "@type": "koral:rewrite",
349 "_comment": "Legacy rewrite with precedence test",
350 "editor": "PreferredEditor",
351 "source": "IgnoredSource",
352 "operation": "operation:precedence",
353 "original": "PreferredOriginal",
354 "src": "IgnoredSrc",
355 "origin": "IgnoredOrigin"
356 }
357 ],
358 "wrap": {
359 "@type": "koral:term",
360 "foundry": "opennlp",
361 "key": "PIDAT",
362 "layer": "p",
363 "match": "match:eq"
364 }
365 }`,
366 expected: `{
367 "@type": "koral:token",
368 "rewrites": [
369 {
370 "@type": "koral:rewrite",
371 "_comment": "Modern rewrite",
372 "editor": "ModernEditor",
373 "operation": "operation:modern",
374 "original": {
375 "@type": "koral:term",
376 "foundry": "original",
377 "key": "original-key"
378 }
379 },
380 {
381 "@type": "koral:rewrite",
382 "_comment": "Legacy rewrite with precedence test",
383 "editor": "PreferredEditor",
384 "operation": "operation:precedence",
385 "original": "PreferredOriginal"
386 }
387 ],
388 "wrap": {
389 "@type": "koral:termGroup",
390 "operands": [
391 {
392 "@type": "koral:term",
393 "foundry": "opennlp",
394 "key": "PIDAT",
395 "layer": "p",
396 "match": "match:eq"
397 },
398 {
399 "@type": "koral:term",
400 "foundry": "opennlp",
401 "key": "AdjType",
402 "layer": "p",
403 "match": "match:eq",
404 "value": "Pdt"
405 }
406 ],
407 "relation": "relation:and"
408 }
409 }`,
410 },
Akron32d53de2025-05-22 13:45:32 +0200411 }
412
413 for _, tt := range tests {
414 t.Run(tt.name, func(t *testing.T) {
415 // Parse input JSON
Akron121c66e2025-06-02 16:34:05 +0200416 var inputData any
Akron32d53de2025-05-22 13:45:32 +0200417 err := json.Unmarshal([]byte(tt.input), &inputData)
418 require.NoError(t, err)
419
420 // Apply mappings
Akron7b4984e2025-05-26 19:12:20 +0200421 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
Akron32d53de2025-05-22 13:45:32 +0200422 if tt.expectError {
423 assert.Error(t, err)
424 return
425 }
426 require.NoError(t, err)
427
428 // Parse expected JSON
Akron121c66e2025-06-02 16:34:05 +0200429 var expectedData any
Akron32d53de2025-05-22 13:45:32 +0200430 err = json.Unmarshal([]byte(tt.expected), &expectedData)
431 require.NoError(t, err)
432
433 // Compare results
434 assert.Equal(t, expectedData, result)
435 })
436 }
437}
Akrond5850f82025-05-23 16:44:44 +0200438
Akroncc83eb52025-05-27 14:39:12 +0200439func TestTokenToTermGroupWithRewrites(t *testing.T) {
440 // Create test mapping list specifically for token to termGroup test
441 mappingList := config.MappingList{
442 ID: "test-token-to-termgroup",
443 FoundryA: "opennlp",
444 LayerA: "p",
Akron422cd252026-05-19 16:31:19 +0200445 FoundryB: "tt",
446 LayerB: "pos",
Akroncc83eb52025-05-27 14:39:12 +0200447 Mappings: []config.MappingRule{
448 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
449 },
450 }
451
452 // Create a new mapper
453 m, err := NewMapper([]config.MappingList{mappingList})
454 require.NoError(t, err)
455
456 input := `{
457 "@type": "koral:token",
458 "rewrites": [
459 {
460 "@type": "koral:rewrite",
461 "_comment": "This rewrite should be preserved",
462 "editor": "TestEditor",
463 "operation": "operation:test",
464 "src": "TestSource"
465 }
466 ],
467 "wrap": {
468 "@type": "koral:term",
469 "foundry": "opennlp",
470 "key": "PIDAT",
471 "layer": "p",
472 "match": "match:eq"
473 }
474 }`
475
476 expected := `{
477 "@type": "koral:token",
478 "rewrites": [
479 {
480 "@type": "koral:rewrite",
481 "_comment": "This rewrite should be preserved",
482 "editor": "TestEditor",
483 "operation": "operation:test",
484 "src": "TestSource"
485 }
486 ],
487 "wrap": {
488 "@type": "koral:termGroup",
489 "operands": [
490 {
491 "@type": "koral:term",
492 "foundry": "opennlp",
493 "key": "PIDAT",
494 "layer": "p",
495 "match": "match:eq"
496 },
497 {
498 "@type": "koral:term",
499 "foundry": "opennlp",
500 "key": "AdjType",
501 "layer": "p",
502 "match": "match:eq",
503 "value": "Pdt"
504 }
505 ],
506 "relation": "relation:and"
507 }
508 }`
509
510 // Parse input JSON
Akron121c66e2025-06-02 16:34:05 +0200511 var inputData any
Akroncc83eb52025-05-27 14:39:12 +0200512 err = json.Unmarshal([]byte(input), &inputData)
513 require.NoError(t, err)
514
515 // Apply mappings
516 result, err := m.ApplyQueryMappings("test-token-to-termgroup", MappingOptions{Direction: AtoB}, inputData)
517 require.NoError(t, err)
518
519 // Parse expected JSON
Akron121c66e2025-06-02 16:34:05 +0200520 var expectedData any
Akroncc83eb52025-05-27 14:39:12 +0200521 err = json.Unmarshal([]byte(expected), &expectedData)
522 require.NoError(t, err)
523
524 // Compare results
525 assert.Equal(t, expectedData, result)
526}
527
Akrond5850f82025-05-23 16:44:44 +0200528func TestMatchComplexPatterns(t *testing.T) {
529 tests := []struct {
530 name string
531 pattern ast.Pattern
532 replacement ast.Replacement
533 input ast.Node
534 expected ast.Node
535 }{
536 {
537 name: "Deep nested pattern with mixed operators",
538 pattern: ast.Pattern{
539 Root: &ast.TermGroup{
540 Operands: []ast.Node{
541 &ast.Term{
542 Key: "A",
543 Match: ast.MatchEqual,
544 },
545 &ast.TermGroup{
546 Operands: []ast.Node{
547 &ast.Term{
548 Key: "B",
549 Match: ast.MatchEqual,
550 },
551 &ast.TermGroup{
552 Operands: []ast.Node{
553 &ast.Term{
554 Key: "C",
555 Match: ast.MatchEqual,
556 },
557 &ast.Term{
558 Key: "D",
559 Match: ast.MatchEqual,
560 },
561 },
562 Relation: ast.AndRelation,
563 },
564 },
565 Relation: ast.OrRelation,
566 },
567 },
568 Relation: ast.AndRelation,
569 },
570 },
571 replacement: ast.Replacement{
572 Root: &ast.Term{
573 Key: "RESULT",
574 Match: ast.MatchEqual,
575 },
576 },
577 input: &ast.TermGroup{
578 Operands: []ast.Node{
579 &ast.Term{
580 Key: "A",
581 Match: ast.MatchEqual,
582 },
583 &ast.TermGroup{
584 Operands: []ast.Node{
585 &ast.Term{
586 Key: "C",
587 Match: ast.MatchEqual,
588 },
589 &ast.Term{
590 Key: "D",
591 Match: ast.MatchEqual,
592 },
593 },
594 Relation: ast.AndRelation,
595 },
596 },
597 Relation: ast.AndRelation,
598 },
599 expected: &ast.Term{
600 Key: "RESULT",
601 Match: ast.MatchEqual,
602 },
603 },
604 }
605
606 for _, tt := range tests {
607 t.Run(tt.name, func(t *testing.T) {
608 m, err := matcher.NewMatcher(tt.pattern, tt.replacement)
609 require.NoError(t, err)
610 result := m.Replace(tt.input)
611 assert.Equal(t, tt.expected, result)
612 })
613 }
614}
615
616func TestInvalidPatternReplacement(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +0200617 // Create test mapping list
618 mappingList := config.MappingList{
619 ID: "test-mapper",
620 FoundryA: "opennlp",
621 LayerA: "p",
622 FoundryB: "upos",
623 LayerB: "p",
624 Mappings: []config.MappingRule{
625 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
626 },
627 }
Akrond5850f82025-05-23 16:44:44 +0200628
629 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +0200630 m, err := NewMapper([]config.MappingList{mappingList})
Akrond5850f82025-05-23 16:44:44 +0200631 require.NoError(t, err)
632
633 tests := []struct {
634 name string
635 input string
636 expectError bool
637 errorMsg string
638 }{
639 {
640 name: "Invalid input - empty term group",
641 input: `{
642 "@type": "koral:token",
643 "wrap": {
644 "@type": "koral:termGroup",
645 "operands": [],
646 "relation": "relation:and"
647 }
648 }`,
649 expectError: true,
650 errorMsg: "failed to parse JSON into AST: error parsing wrapped node: term group must have at least one operand",
651 },
652 }
653
654 for _, tt := range tests {
655 t.Run(tt.name, func(t *testing.T) {
656 var inputData any
657 err := json.Unmarshal([]byte(tt.input), &inputData)
658 require.NoError(t, err)
659
Akron7b4984e2025-05-26 19:12:20 +0200660 result, err := m.ApplyQueryMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
Akrond5850f82025-05-23 16:44:44 +0200661 if tt.expectError {
662 assert.Error(t, err)
663 assert.Equal(t, tt.errorMsg, err.Error())
664 assert.Nil(t, result)
665 } else {
666 assert.NoError(t, err)
667 assert.NotNil(t, result)
668 }
669 })
670 }
671}
Akron7b4984e2025-05-26 19:12:20 +0200672
Akron958fc472026-05-19 13:58:52 +0200673func TestMultiFieldRewritesAreReversible(t *testing.T) {
674 mappingList := config.MappingList{
675 ID: "multi-field",
676 FoundryA: "opennlp",
677 LayerA: "p",
678 FoundryB: "upos",
679 LayerB: "pos",
680 Mappings: []config.MappingRule{
681 "[DET] <> [PRON]",
682 },
683 }
684
685 m, err := NewMapper([]config.MappingList{mappingList})
686 require.NoError(t, err)
687
688 tests := []struct {
689 name string
690 opts MappingOptions
691 input string
692 expected string
693 }{
694 {
Akrona0174352026-05-19 17:04:42 +0200695 name: "Multi-field change: single rewrite with full original",
Akron958fc472026-05-19 13:58:52 +0200696 opts: MappingOptions{
697 Direction: AtoB,
698 AddRewrites: true,
699 },
700 input: `{
701 "@type": "koral:token",
702 "wrap": {
703 "@type": "koral:term",
704 "foundry": "opennlp",
705 "key": "DET",
706 "layer": "p",
707 "match": "match:eq"
708 }
709 }`,
710 expected: `{
711 "@type": "koral:token",
712 "wrap": {
713 "@type": "koral:term",
714 "foundry": "upos",
715 "key": "PRON",
716 "layer": "pos",
717 "match": "match:eq",
718 "rewrites": [
719 {
720 "@type": "koral:rewrite",
721 "editor": "Koral-Mapper",
Akrona0174352026-05-19 17:04:42 +0200722 "original": {
723 "@type": "koral:term",
724 "foundry": "opennlp",
725 "key": "DET",
726 "layer": "p",
727 "match": "match:eq"
728 }
Akron958fc472026-05-19 13:58:52 +0200729 }
730 ]
731 }
732 }`,
733 },
734 {
Akrona0174352026-05-19 17:04:42 +0200735 name: "Reverse direction: single rewrite with full original",
Akron958fc472026-05-19 13:58:52 +0200736 opts: MappingOptions{
737 Direction: BtoA,
738 AddRewrites: true,
739 },
740 input: `{
741 "@type": "koral:token",
742 "wrap": {
743 "@type": "koral:term",
744 "foundry": "upos",
745 "key": "PRON",
746 "layer": "pos",
747 "match": "match:eq"
748 }
749 }`,
750 expected: `{
751 "@type": "koral:token",
752 "wrap": {
753 "@type": "koral:term",
754 "foundry": "opennlp",
755 "key": "DET",
756 "layer": "p",
757 "match": "match:eq",
758 "rewrites": [
759 {
760 "@type": "koral:rewrite",
761 "editor": "Koral-Mapper",
Akrona0174352026-05-19 17:04:42 +0200762 "original": {
763 "@type": "koral:term",
764 "foundry": "upos",
765 "key": "PRON",
766 "layer": "pos",
767 "match": "match:eq"
768 }
Akron958fc472026-05-19 13:58:52 +0200769 }
770 ]
771 }
772 }`,
773 },
774 }
775
776 for _, tt := range tests {
777 t.Run(tt.name, func(t *testing.T) {
778 var inputData any
779 err := json.Unmarshal([]byte(tt.input), &inputData)
780 require.NoError(t, err)
781
782 result, err := m.ApplyQueryMappings("multi-field", tt.opts, inputData)
783 require.NoError(t, err)
784
785 var expectedData any
786 err = json.Unmarshal([]byte(tt.expected), &expectedData)
787 require.NoError(t, err)
788
789 assert.Equal(t, expectedData, result)
790 })
791 }
792}
793
794func TestSingleFieldRewrite(t *testing.T) {
795 mappingList := config.MappingList{
796 ID: "same-fl",
797 FoundryA: "opennlp",
798 LayerA: "p",
799 FoundryB: "opennlp",
Akron422cd252026-05-19 16:31:19 +0200800 LayerB: "pos",
Akron958fc472026-05-19 13:58:52 +0200801 Mappings: []config.MappingRule{
802 "[DET] <> [PRON]",
803 },
804 }
805
806 m, err := NewMapper([]config.MappingList{mappingList})
807 require.NoError(t, err)
808
809 var inputData any
810 err = json.Unmarshal([]byte(`{
811 "@type": "koral:token",
812 "wrap": {
813 "@type": "koral:term",
814 "foundry": "opennlp",
815 "key": "DET",
816 "layer": "p",
817 "match": "match:eq"
818 }
819 }`), &inputData)
820 require.NoError(t, err)
821
822 result, err := m.ApplyQueryMappings("same-fl", MappingOptions{
823 Direction: AtoB,
824 AddRewrites: true,
825 }, inputData)
826 require.NoError(t, err)
827
828 var expectedData any
829 err = json.Unmarshal([]byte(`{
830 "@type": "koral:token",
831 "wrap": {
832 "@type": "koral:term",
833 "foundry": "opennlp",
834 "key": "PRON",
Akron422cd252026-05-19 16:31:19 +0200835 "layer": "pos",
Akron958fc472026-05-19 13:58:52 +0200836 "match": "match:eq",
837 "rewrites": [
838 {
839 "@type": "koral:rewrite",
840 "editor": "Koral-Mapper",
Akrona0174352026-05-19 17:04:42 +0200841 "original": {
842 "@type": "koral:term",
843 "foundry": "opennlp",
844 "key": "DET",
845 "layer": "p",
846 "match": "match:eq"
847 }
Akron958fc472026-05-19 13:58:52 +0200848 }
849 ]
850 }
851 }`), &expectedData)
852 require.NoError(t, err)
853
854 assert.Equal(t, expectedData, result)
855}
856
Akrona0174352026-05-19 17:04:42 +0200857func TestBuildRewritesSingleObjectRewrite(t *testing.T) {
Akron958fc472026-05-19 13:58:52 +0200858 tests := []struct {
Akrona0174352026-05-19 17:04:42 +0200859 name string
860 original *ast.Term
861 new_ *ast.Term
Akron958fc472026-05-19 13:58:52 +0200862 }{
863 {
Akrona0174352026-05-19 17:04:42 +0200864 name: "All fields change",
865 original: &ast.Term{Foundry: "a", Layer: "l1", Key: "k1", Value: "v1", Match: ast.MatchEqual},
866 new_: &ast.Term{Foundry: "b", Layer: "l2", Key: "k2", Value: "v2", Match: ast.MatchEqual},
Akron958fc472026-05-19 13:58:52 +0200867 },
868 {
Akrona0174352026-05-19 17:04:42 +0200869 name: "Single field injection: empty value becomes non-empty",
870 original: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Match: ast.MatchEqual},
871 new_: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Value: "v", Match: ast.MatchEqual},
Akron958fc472026-05-19 13:58:52 +0200872 },
873 {
Akrona0174352026-05-19 17:04:42 +0200874 name: "Single field deletion: non-empty value becomes empty",
875 original: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Value: "v", Match: ast.MatchEqual},
876 new_: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Match: ast.MatchEqual},
Akron958fc472026-05-19 13:58:52 +0200877 },
878 }
879
880 for _, tt := range tests {
881 t.Run(tt.name, func(t *testing.T) {
882 rewrites := buildRewrites(tt.original, tt.new_)
Akrona0174352026-05-19 17:04:42 +0200883 require.Len(t, rewrites, 1, "one rule application should produce exactly one rewrite")
884 rw := rewrites[0]
885 assert.Equal(t, RewriteEditor, rw.Editor)
886 assert.Empty(t, rw.Scope, "object-level rewrite should have no scope")
887 assert.NotNil(t, rw.Original, "rewrite should contain the full original")
888 originalMap, ok := rw.Original.(map[string]any)
889 require.True(t, ok)
890 assert.Equal(t, "koral:term", originalMap["@type"])
Akron958fc472026-05-19 13:58:52 +0200891 })
892 }
893}
894
Akron7b4984e2025-05-26 19:12:20 +0200895func TestQueryWrapperMappings(t *testing.T) {
896
897 mappingList := config.MappingList{
898 ID: "test-wrapper",
899 FoundryA: "opennlp",
900 LayerA: "orth",
901 FoundryB: "upos",
902 LayerB: "orth",
903 Mappings: []config.MappingRule{
904 "[opennlp/orth=Baum] <> [opennlp/orth=X]",
905 },
906 }
907
908 // Create a new mapper
909 m, err := NewMapper([]config.MappingList{mappingList})
910 require.NoError(t, err)
911
912 tests := []struct {
913 name string
914 mappingID string
915 opts MappingOptions
916 input string
917 expected string
918 expectError bool
919 }{
920 {
Akroncc83eb52025-05-27 14:39:12 +0200921 name: "Query wrapper case with rewrites preservation",
Akron7b4984e2025-05-26 19:12:20 +0200922 mappingID: "test-wrapper",
923 opts: MappingOptions{
924 Direction: AtoB,
925 },
926 input: `{
927 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
928 "collection": {
929 "@type": "koral:doc",
930 "key": "availability",
931 "match": "match:eq",
Akron7b4984e2025-05-26 19:12:20 +0200932 "type": "type:regex",
933 "value": "CC.*"
934 },
935 "query": {
936 "@type": "koral:token",
Akroncc83eb52025-05-27 14:39:12 +0200937 "rewrites": [
938 {
939 "@type": "koral:rewrite",
940 "_comment": "Original rewrite that should be preserved",
941 "editor": "Original",
942 "operation": "operation:original",
943 "src": "Original"
944 }
945 ],
Akron7b4984e2025-05-26 19:12:20 +0200946 "wrap": {
947 "@type": "koral:term",
948 "foundry": "opennlp",
949 "key": "Baum",
950 "layer": "orth",
951 "match": "match:eq"
952 }
953 }
954 }`,
955 expected: `{
956 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
957 "collection": {
958 "@type": "koral:doc",
959 "key": "availability",
960 "match": "match:eq",
Akron7b4984e2025-05-26 19:12:20 +0200961 "type": "type:regex",
962 "value": "CC.*"
963 },
964 "query": {
965 "@type": "koral:token",
Akroncc83eb52025-05-27 14:39:12 +0200966 "rewrites": [
967 {
968 "@type": "koral:rewrite",
969 "_comment": "Original rewrite that should be preserved",
970 "editor": "Original",
971 "operation": "operation:original",
972 "src": "Original"
973 }
974 ],
Akron7b4984e2025-05-26 19:12:20 +0200975 "wrap": {
976 "@type": "koral:term",
977 "foundry": "opennlp",
978 "key": "X",
979 "layer": "orth",
980 "match": "match:eq"
981 }
982 }
983 }`,
984 },
985 {
986 name: "Empty query field",
987 mappingID: "test-wrapper",
988 opts: MappingOptions{
989 Direction: AtoB,
990 },
991 input: `{
992 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
993 "query": null
994 }`,
995 expected: `{
996 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
997 "query": null
998 }`,
999 },
1000 {
1001 name: "Missing query field",
1002 mappingID: "test-wrapper",
1003 opts: MappingOptions{
1004 Direction: AtoB,
1005 },
1006 input: `{
1007 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
1008 "collection": {
1009 "@type": "koral:doc"
1010 }
1011 }`,
1012 expected: `{
1013 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
1014 "collection": {
1015 "@type": "koral:doc"
1016 }
1017 }`,
1018 },
1019 {
1020 name: "Query field with non-object value",
1021 mappingID: "test-wrapper",
1022 opts: MappingOptions{
1023 Direction: AtoB,
1024 },
1025 input: `{
1026 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
1027 "query": "invalid"
1028 }`,
1029 expected: `{
1030 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
1031 "query": "invalid"
1032 }`,
1033 },
Akroncc83eb52025-05-27 14:39:12 +02001034 {
1035 name: "Query with rewrites in nested token",
1036 mappingID: "test-wrapper",
1037 opts: MappingOptions{
1038 Direction: AtoB,
1039 },
1040 input: `{
1041 "@type": "koral:token",
1042 "rewrites": [
1043 {
1044 "@type": "koral:rewrite",
1045 "_comment": "Nested rewrite that should be preserved",
1046 "editor": "Nested",
1047 "operation": "operation:nested",
1048 "src": "Nested"
1049 }
1050 ],
1051 "wrap": {
1052 "@type": "koral:term",
1053 "foundry": "opennlp",
1054 "key": "Baum",
1055 "layer": "orth",
1056 "match": "match:eq"
1057 }
1058 }`,
1059 expected: `{
1060 "@type": "koral:token",
1061 "rewrites": [
1062 {
1063 "@type": "koral:rewrite",
1064 "_comment": "Nested rewrite that should be preserved",
1065 "editor": "Nested",
1066 "operation": "operation:nested",
1067 "src": "Nested"
1068 }
1069 ],
1070 "wrap": {
1071 "@type": "koral:term",
1072 "foundry": "opennlp",
1073 "key": "X",
1074 "layer": "orth",
1075 "match": "match:eq"
1076 }
1077 }`,
1078 },
Akron7b4984e2025-05-26 19:12:20 +02001079 }
1080
1081 for _, tt := range tests {
1082 t.Run(tt.name, func(t *testing.T) {
1083 // Parse input JSON
Akron121c66e2025-06-02 16:34:05 +02001084 var inputData any
Akron7b4984e2025-05-26 19:12:20 +02001085 err := json.Unmarshal([]byte(tt.input), &inputData)
1086 require.NoError(t, err)
1087
1088 // Apply mappings
1089 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
1090 if tt.expectError {
1091 assert.Error(t, err)
1092 return
1093 }
1094 require.NoError(t, err)
1095
1096 // Parse expected JSON
Akron121c66e2025-06-02 16:34:05 +02001097 var expectedData any
Akron7b4984e2025-05-26 19:12:20 +02001098 err = json.Unmarshal([]byte(tt.expected), &expectedData)
1099 require.NoError(t, err)
1100
1101 // Compare results
1102 assert.Equal(t, expectedData, result)
1103 })
1104 }
1105}
Akron422cd252026-05-19 16:31:19 +02001106
1107func TestIdenticalEffectiveFoundryLayerRejected(t *testing.T) {
1108 tests := []struct {
1109 name string
1110 list config.MappingList
1111 opts MappingOptions
1112 wantErr string
1113 }{
1114 {
1115 name: "YAML defaults identical",
1116 list: config.MappingList{
1117 ID: "test", FoundryA: "opennlp", LayerA: "p",
1118 FoundryB: "opennlp", LayerB: "p",
1119 Mappings: []config.MappingRule{"[A] <> [B]"},
1120 },
1121 opts: MappingOptions{Direction: AtoB},
1122 wantErr: "identical source and target",
1123 },
1124 {
1125 name: "Query param override makes them identical",
1126 list: config.MappingList{
1127 ID: "test", FoundryA: "opennlp", LayerA: "p",
1128 FoundryB: "upos", LayerB: "p",
1129 Mappings: []config.MappingRule{"[A] <> [B]"},
1130 },
1131 opts: MappingOptions{Direction: AtoB, FoundryB: "opennlp"},
1132 wantErr: "identical source and target",
1133 },
1134 {
1135 name: "Query param override resolves the conflict",
1136 list: config.MappingList{
1137 ID: "test", FoundryA: "opennlp", LayerA: "p",
1138 FoundryB: "opennlp", LayerB: "p",
1139 Mappings: []config.MappingRule{"[A] <> [B]"},
1140 },
1141 opts: MappingOptions{Direction: AtoB, FoundryB: "upos"},
1142 wantErr: "",
1143 },
1144 {
1145 name: "Different foundry same layer is allowed",
1146 list: config.MappingList{
1147 ID: "test", FoundryA: "opennlp", LayerA: "p",
1148 FoundryB: "upos", LayerB: "p",
1149 Mappings: []config.MappingRule{"[A] <> [B]"},
1150 },
1151 opts: MappingOptions{Direction: AtoB},
1152 wantErr: "",
1153 },
1154 {
1155 name: "Both foundries empty is allowed",
1156 list: config.MappingList{
1157 ID: "test",
1158 Mappings: []config.MappingRule{"[A] <> [B]"},
1159 },
1160 opts: MappingOptions{Direction: AtoB},
1161 wantErr: "",
1162 },
1163 }
1164
1165 for _, tt := range tests {
1166 t.Run(tt.name, func(t *testing.T) {
1167 m, err := NewMapper([]config.MappingList{tt.list})
1168 require.NoError(t, err)
1169
1170 input := map[string]any{
1171 "@type": "koral:token",
1172 "wrap": map[string]any{
1173 "@type": "koral:term",
1174 "key": "A",
1175 },
1176 }
1177
1178 _, err = m.ApplyQueryMappings("test", tt.opts, input)
1179 if tt.wantErr != "" {
1180 require.Error(t, err)
1181 assert.Contains(t, err.Error(), tt.wantErr)
1182 } else {
1183 assert.NoError(t, err)
1184 }
1185 })
1186 }
1187}
1188
1189func TestIdenticalEffectiveFieldRejected(t *testing.T) {
1190 tests := []struct {
1191 name string
1192 list config.MappingList
1193 opts MappingOptions
1194 wantErr string
1195 }{
1196 {
1197 name: "YAML defaults identical",
1198 list: config.MappingList{
1199 ID: "test", Type: "corpus",
1200 FieldA: "textClass", FieldB: "textClass",
1201 Mappings: []config.MappingRule{"novel <> fiction"},
1202 },
1203 opts: MappingOptions{Direction: AtoB},
1204 wantErr: "identical source and target field",
1205 },
1206 {
1207 name: "Query param override makes them identical",
1208 list: config.MappingList{
1209 ID: "test", Type: "corpus",
1210 FieldA: "textClass", FieldB: "genre",
1211 Mappings: []config.MappingRule{"novel <> fiction"},
1212 },
1213 opts: MappingOptions{Direction: AtoB, FieldB: "textClass"},
1214 wantErr: "identical source and target field",
1215 },
1216 {
1217 name: "Query param override resolves the conflict",
1218 list: config.MappingList{
1219 ID: "test", Type: "corpus",
1220 FieldA: "textClass", FieldB: "textClass",
1221 Mappings: []config.MappingRule{"novel <> fiction"},
1222 },
1223 opts: MappingOptions{Direction: AtoB, FieldB: "genre"},
1224 wantErr: "",
1225 },
1226 {
1227 name: "Different fields is allowed",
1228 list: config.MappingList{
1229 ID: "test", Type: "corpus",
1230 FieldA: "textClass", FieldB: "genre",
1231 Mappings: []config.MappingRule{"novel <> fiction"},
1232 },
1233 opts: MappingOptions{Direction: AtoB},
1234 wantErr: "",
1235 },
1236 {
1237 name: "Both fields empty is allowed",
1238 list: config.MappingList{
1239 ID: "test", Type: "corpus",
1240 Mappings: []config.MappingRule{"textClass=novel <> genre=fiction"},
1241 },
1242 opts: MappingOptions{Direction: AtoB},
1243 wantErr: "",
1244 },
1245 }
1246
1247 for _, tt := range tests {
1248 t.Run(tt.name, func(t *testing.T) {
1249 m, err := NewMapper([]config.MappingList{tt.list})
1250 require.NoError(t, err)
1251
1252 input := map[string]any{
1253 "collection": map[string]any{
1254 "@type": "koral:doc",
1255 "key": "textClass",
1256 "value": "novel",
1257 "match": "match:eq",
1258 },
1259 }
1260
1261 _, err = m.ApplyQueryMappings("test", tt.opts, input)
1262 if tt.wantErr != "" {
1263 require.Error(t, err)
1264 assert.Contains(t, err.Error(), tt.wantErr)
1265 } else {
1266 assert.NoError(t, err)
1267 }
1268 })
1269 }
1270}
1271
1272func TestIdenticalEffectiveValuesResponseEndpoint(t *testing.T) {
1273 t.Run("annotation response rejects identical effective foundry/layer", func(t *testing.T) {
1274 m, err := NewMapper([]config.MappingList{{
1275 ID: "test", FoundryA: "marmot", LayerA: "p",
1276 FoundryB: "marmot", LayerB: "p",
1277 Mappings: []config.MappingRule{"[DET] <> [PRON]"},
1278 }})
1279 require.NoError(t, err)
1280
1281 input := map[string]any{
1282 "snippet": `<span title="marmot/p:DET">Der</span>`,
1283 }
1284
1285 _, err = m.ApplyResponseMappings("test", MappingOptions{Direction: AtoB}, input)
1286 require.Error(t, err)
1287 assert.Contains(t, err.Error(), "identical source and target")
1288 })
1289
1290 t.Run("corpus response rejects identical effective field", func(t *testing.T) {
1291 m, err := NewMapper([]config.MappingList{{
1292 ID: "test", Type: "corpus",
1293 FieldA: "textClass", FieldB: "textClass",
1294 Mappings: []config.MappingRule{"novel <> fiction"},
1295 }})
1296 require.NoError(t, err)
1297
1298 input := map[string]any{
1299 "fields": []any{
1300 map[string]any{
1301 "@type": "koral:field",
1302 "key": "textClass",
1303 "value": "novel",
1304 "type": "type:string",
1305 },
1306 },
1307 }
1308
1309 _, err = m.ApplyResponseMappings("test", MappingOptions{Direction: AtoB}, input)
1310 require.Error(t, err)
1311 assert.Contains(t, err.Error(), "identical source and target field")
1312 })
1313}
Akronb4e36f62026-05-21 11:44:25 +02001314
1315func newSTTSUPoSMapper(t *testing.T) *Mapper {
1316 t.Helper()
1317 data, err := os.ReadFile("../mappings/stts-upos.yaml")
1318 require.NoError(t, err, "failed to read stts-upos.yaml from disk")
1319
1320 var mappingList config.MappingList
1321 err = yaml.Unmarshal(data, &mappingList)
1322 require.NoError(t, err, "failed to parse stts-upos.yaml")
1323
1324 m, err := NewMapper([]config.MappingList{mappingList})
1325 require.NoError(t, err)
1326 return m
1327}
1328
1329func TestFallbackRules(t *testing.T) {
1330 m := newSTTSUPoSMapper(t)
1331
1332 t.Run("Bare ADJ (BtoA) maps to ADJA|ADJD disjunction", func(t *testing.T) {
1333 input := `{
1334 "@type": "koral:token",
1335 "wrap": {
1336 "@type": "koral:term",
1337 "foundry": "upos",
1338 "key": "ADJ",
1339 "layer": "p",
1340 "match": "match:eq"
1341 }
1342 }`
1343 var inputData any
1344 err := json.Unmarshal([]byte(input), &inputData)
1345 require.NoError(t, err)
1346
1347 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
1348 require.NoError(t, err)
1349
1350 resultMap := result.(map[string]any)
1351 wrap := resultMap["wrap"].(map[string]any)
1352 assert.Equal(t, "koral:termGroup", wrap["@type"])
1353 assert.Equal(t, "relation:or", wrap["relation"])
1354 operands := wrap["operands"].([]any)
1355 assert.Len(t, operands, 2)
1356 keys := []string{
1357 operands[0].(map[string]any)["key"].(string),
1358 operands[1].(map[string]any)["key"].(string),
1359 }
1360 assert.Contains(t, keys, "ADJA")
1361 assert.Contains(t, keys, "ADJD")
1362 })
1363
1364 t.Run("ADJ & Variant=Short (BtoA) maps to ADJD only", func(t *testing.T) {
1365 input := `{
1366 "@type": "koral:token",
1367 "wrap": {
1368 "@type": "koral:termGroup",
1369 "operands": [
1370 {
1371 "@type": "koral:term",
1372 "foundry": "upos",
1373 "key": "ADJ",
1374 "layer": "p",
1375 "match": "match:eq"
1376 },
1377 {
1378 "@type": "koral:term",
1379 "foundry": "upos",
1380 "key": "Short",
1381 "layer": "Variant",
1382 "match": "match:eq"
1383 }
1384 ],
1385 "relation": "relation:and"
1386 }
1387 }`
1388 var inputData any
1389 err := json.Unmarshal([]byte(input), &inputData)
1390 require.NoError(t, err)
1391
1392 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
1393 require.NoError(t, err)
1394
1395 resultMap := result.(map[string]any)
1396 wrap := resultMap["wrap"].(map[string]any)
1397 assert.Equal(t, "koral:term", wrap["@type"])
1398 assert.Equal(t, "ADJD", wrap["key"])
1399 })
1400
1401 t.Run("Bare DET (BtoA) maps to DET subtypes disjunction", func(t *testing.T) {
1402 input := `{
1403 "@type": "koral:token",
1404 "wrap": {
1405 "@type": "koral:term",
1406 "foundry": "upos",
1407 "key": "DET",
1408 "layer": "p",
1409 "match": "match:eq"
1410 }
1411 }`
1412 var inputData any
1413 err := json.Unmarshal([]byte(input), &inputData)
1414 require.NoError(t, err)
1415
1416 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
1417 require.NoError(t, err)
1418
1419 resultMap := result.(map[string]any)
1420 wrap := resultMap["wrap"].(map[string]any)
1421 assert.Equal(t, "koral:termGroup", wrap["@type"])
1422 assert.Equal(t, "relation:or", wrap["relation"])
1423 operands := wrap["operands"].([]any)
1424 assert.Len(t, operands, 7)
1425 var keys []string
1426 for _, op := range operands {
1427 keys = append(keys, op.(map[string]any)["key"].(string))
1428 }
1429 assert.Contains(t, keys, "ART")
1430 assert.Contains(t, keys, "PDAT")
1431 assert.Contains(t, keys, "PWAT")
1432 })
1433
1434 t.Run("DET & PronType=Art (BtoA) maps to ART only", func(t *testing.T) {
1435 input := `{
1436 "@type": "koral:token",
1437 "wrap": {
1438 "@type": "koral:termGroup",
1439 "operands": [
1440 {
1441 "@type": "koral:term",
1442 "foundry": "upos",
1443 "key": "DET",
1444 "layer": "p",
1445 "match": "match:eq"
1446 },
1447 {
1448 "@type": "koral:term",
1449 "foundry": "upos",
1450 "key": "Art",
1451 "layer": "PronType",
1452 "match": "match:eq"
1453 }
1454 ],
1455 "relation": "relation:and"
1456 }
1457 }`
1458 var inputData any
1459 err := json.Unmarshal([]byte(input), &inputData)
1460 require.NoError(t, err)
1461
1462 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
1463 require.NoError(t, err)
1464
1465 resultMap := result.(map[string]any)
1466 wrap := resultMap["wrap"].(map[string]any)
1467 assert.Equal(t, "koral:term", wrap["@type"])
1468 assert.Equal(t, "ART", wrap["key"])
1469 })
1470
1471 t.Run("Bare SCONJ (BtoA) maps to KOUI|KOUS disjunction", func(t *testing.T) {
1472 input := `{
1473 "@type": "koral:token",
1474 "wrap": {
1475 "@type": "koral:term",
1476 "foundry": "upos",
1477 "key": "SCONJ",
1478 "layer": "p",
1479 "match": "match:eq"
1480 }
1481 }`
1482 var inputData any
1483 err := json.Unmarshal([]byte(input), &inputData)
1484 require.NoError(t, err)
1485
1486 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
1487 require.NoError(t, err)
1488
1489 resultMap := result.(map[string]any)
1490 wrap := resultMap["wrap"].(map[string]any)
1491 assert.Equal(t, "koral:termGroup", wrap["@type"])
1492 assert.Equal(t, "relation:or", wrap["relation"])
1493 operands := wrap["operands"].([]any)
1494 assert.Len(t, operands, 2)
1495 })
1496
1497 t.Run("Bare VERB (BtoA) maps to STTS verb subtypes disjunction", func(t *testing.T) {
1498 input := `{
1499 "@type": "koral:token",
1500 "wrap": {
1501 "@type": "koral:term",
1502 "foundry": "upos",
1503 "key": "VERB",
1504 "layer": "p",
1505 "match": "match:eq"
1506 }
1507 }`
1508 var inputData any
1509 err := json.Unmarshal([]byte(input), &inputData)
1510 require.NoError(t, err)
1511
1512 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
1513 require.NoError(t, err)
1514
1515 resultMap := result.(map[string]any)
1516 wrap := resultMap["wrap"].(map[string]any)
1517 assert.Equal(t, "koral:termGroup", wrap["@type"])
1518 assert.Equal(t, "relation:or", wrap["relation"])
1519 operands := wrap["operands"].([]any)
1520 assert.Len(t, operands, 8)
1521 })
1522
1523 t.Run("Bare AUX (BtoA) maps to AUX subtypes disjunction", func(t *testing.T) {
1524 input := `{
1525 "@type": "koral:token",
1526 "wrap": {
1527 "@type": "koral:term",
1528 "foundry": "upos",
1529 "key": "AUX",
1530 "layer": "p",
1531 "match": "match:eq"
1532 }
1533 }`
1534 var inputData any
1535 err := json.Unmarshal([]byte(input), &inputData)
1536 require.NoError(t, err)
1537
1538 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
1539 require.NoError(t, err)
1540
1541 resultMap := result.(map[string]any)
1542 wrap := resultMap["wrap"].(map[string]any)
1543 assert.Equal(t, "koral:termGroup", wrap["@type"])
1544 assert.Equal(t, "relation:or", wrap["relation"])
1545 operands := wrap["operands"].([]any)
1546 assert.Len(t, operands, 4)
1547 })
1548
1549 t.Run("Forward direction AtoB: ADJA maps to ADJ", func(t *testing.T) {
1550 input := `{
1551 "@type": "koral:token",
1552 "wrap": {
1553 "@type": "koral:term",
1554 "foundry": "opennlp",
1555 "key": "ADJA",
1556 "layer": "p",
1557 "match": "match:eq"
1558 }
1559 }`
1560 var inputData any
1561 err := json.Unmarshal([]byte(input), &inputData)
1562 require.NoError(t, err)
1563
1564 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: AtoB}, inputData)
1565 require.NoError(t, err)
1566
1567 resultMap := result.(map[string]any)
1568 wrap := resultMap["wrap"].(map[string]any)
1569 assert.Equal(t, "koral:term", wrap["@type"])
1570 assert.Equal(t, "ADJ", wrap["key"])
1571 })
1572
1573 t.Run("Forward direction AtoB: ART maps to DET & PronType=Art", func(t *testing.T) {
1574 input := `{
1575 "@type": "koral:token",
1576 "wrap": {
1577 "@type": "koral:term",
1578 "foundry": "opennlp",
1579 "key": "ART",
1580 "layer": "p",
1581 "match": "match:eq"
1582 }
1583 }`
1584 var inputData any
1585 err := json.Unmarshal([]byte(input), &inputData)
1586 require.NoError(t, err)
1587
1588 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: AtoB}, inputData)
1589 require.NoError(t, err)
1590
1591 resultMap := result.(map[string]any)
1592 wrap := resultMap["wrap"].(map[string]any)
1593 assert.Equal(t, "koral:termGroup", wrap["@type"])
1594 assert.Equal(t, "relation:and", wrap["relation"])
1595 })
1596}
1597
1598func TestOriginalProblemMultiTokenQuery(t *testing.T) {
1599 m := newSTTSUPoSMapper(t)
1600
1601 t.Run("Multi-token [DET][ADJ][NOUN] BtoA produces correct disjunctions", func(t *testing.T) {
1602 // This reproduces the exact problem from the issue:
1603 // [upos/p=DET][upos/p=ADJ][upos/p=NOUN] mapped B->A
1604 input := `{
1605 "@type": "koral:group",
1606 "operation": "operation:sequence",
1607 "operands": [
1608 {
1609 "@type": "koral:token",
1610 "wrap": {
1611 "@type": "koral:term",
1612 "foundry": "upos",
1613 "key": "DET",
1614 "layer": "p",
1615 "match": "match:eq"
1616 }
1617 },
1618 {
1619 "@type": "koral:token",
1620 "wrap": {
1621 "@type": "koral:term",
1622 "foundry": "upos",
1623 "key": "ADJ",
1624 "layer": "p",
1625 "match": "match:eq"
1626 }
1627 },
1628 {
1629 "@type": "koral:token",
1630 "wrap": {
1631 "@type": "koral:term",
1632 "foundry": "upos",
1633 "key": "NOUN",
1634 "layer": "p",
1635 "match": "match:eq"
1636 }
1637 }
1638 ]
1639 }`
1640
1641 var inputData any
1642 err := json.Unmarshal([]byte(input), &inputData)
1643 require.NoError(t, err)
1644
1645 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
1646 require.NoError(t, err)
1647
1648 resultMap := result.(map[string]any)
1649 operands := resultMap["operands"].([]any)
1650 require.Len(t, operands, 3)
1651
1652 // Token 1: DET -> ART | PDAT | PIAT | PIDAT | PPOSAT | PRELAT | PWAT
1653 token1 := operands[0].(map[string]any)
1654 wrap1 := token1["wrap"].(map[string]any)
1655 assert.Equal(t, "koral:termGroup", wrap1["@type"], "DET should be mapped to OR group")
1656 assert.Equal(t, "relation:or", wrap1["relation"])
1657 ops1 := wrap1["operands"].([]any)
1658 assert.Len(t, ops1, 7, "DET fallback should have 7 alternatives")
1659
1660 // Token 2: ADJ -> ADJA | ADJD
1661 token2 := operands[1].(map[string]any)
1662 wrap2 := token2["wrap"].(map[string]any)
1663 assert.Equal(t, "koral:termGroup", wrap2["@type"], "ADJ should be mapped to OR group")
1664 assert.Equal(t, "relation:or", wrap2["relation"])
1665 ops2 := wrap2["operands"].([]any)
1666 assert.Len(t, ops2, 2, "ADJ fallback should have 2 alternatives")
1667
1668 adjKeys := []string{
1669 ops2[0].(map[string]any)["key"].(string),
1670 ops2[1].(map[string]any)["key"].(string),
1671 }
1672 assert.Contains(t, adjKeys, "ADJA")
1673 assert.Contains(t, adjKeys, "ADJD")
1674
1675 // Token 3: NOUN -> NN (specific rule, not fallback, because
1676 // [NN] <> [NOUN] has specificity 1 and [NN | NE] <> [NOUN | PROPN]
1677 // has pattern specificity 0 on B-side (OR group))
1678 token3 := operands[2].(map[string]any)
1679 wrap3 := token3["wrap"].(map[string]any)
1680 assert.Equal(t, "koral:term", wrap3["@type"], "NOUN should map to single NN term")
1681 assert.Equal(t, "NN", wrap3["key"])
1682 })
1683
1684 t.Run("Specific input [ADJ & Variant=Short] maps to ADJD only", func(t *testing.T) {
1685 input := `{
1686 "@type": "koral:token",
1687 "wrap": {
1688 "@type": "koral:termGroup",
1689 "operands": [
1690 {
1691 "@type": "koral:term",
1692 "foundry": "upos",
1693 "key": "ADJ",
1694 "layer": "p",
1695 "match": "match:eq"
1696 },
1697 {
1698 "@type": "koral:term",
1699 "foundry": "upos",
1700 "key": "Short",
1701 "layer": "Variant",
1702 "match": "match:eq"
1703 }
1704 ],
1705 "relation": "relation:and"
1706 }
1707 }`
1708
1709 var inputData any
1710 err := json.Unmarshal([]byte(input), &inputData)
1711 require.NoError(t, err)
1712
1713 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
1714 require.NoError(t, err)
1715
1716 resultMap := result.(map[string]any)
1717 wrap := resultMap["wrap"].(map[string]any)
1718 assert.Equal(t, "koral:term", wrap["@type"])
1719 assert.Equal(t, "ADJD", wrap["key"])
1720 })
1721
1722 t.Run("Specific input [DET & PronType=Art] maps to ART only", func(t *testing.T) {
1723 input := `{
1724 "@type": "koral:token",
1725 "wrap": {
1726 "@type": "koral:termGroup",
1727 "operands": [
1728 {
1729 "@type": "koral:term",
1730 "foundry": "upos",
1731 "key": "DET",
1732 "layer": "p",
1733 "match": "match:eq"
1734 },
1735 {
1736 "@type": "koral:term",
1737 "foundry": "upos",
1738 "key": "Art",
1739 "layer": "PronType",
1740 "match": "match:eq"
1741 }
1742 ],
1743 "relation": "relation:and"
1744 }
1745 }`
1746
1747 var inputData any
1748 err := json.Unmarshal([]byte(input), &inputData)
1749 require.NoError(t, err)
1750
1751 result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
1752 require.NoError(t, err)
1753
1754 resultMap := result.(map[string]any)
1755 wrap := resultMap["wrap"].(map[string]any)
1756 assert.Equal(t, "koral:term", wrap["@type"])
1757 assert.Equal(t, "ART", wrap["key"])
1758 })
1759}
1760
1761func TestSpecificityBasedRuleSelection(t *testing.T) {
1762 t.Run("More specific rule wins over less specific", func(t *testing.T) {
1763 mappingList := config.MappingList{
1764 ID: "spec-test",
1765 FoundryA: "opennlp",
1766 LayerA: "p",
1767 FoundryB: "upos",
1768 LayerB: "p",
1769 Mappings: []config.MappingRule{
1770 "[ADJA] <> [ADJ]",
1771 "[ADJD] <> [ADJ & Variant=Short]",
1772 },
1773 }
1774
1775 m, err := NewMapper([]config.MappingList{mappingList})
1776 require.NoError(t, err)
1777
1778 // Input: ADJ & Variant=Short — matches the internal representation
1779 // where "Variant=Short" is parsed as layer="Variant", key="Short"
1780 input := `{
1781 "@type": "koral:token",
1782 "wrap": {
1783 "@type": "koral:termGroup",
1784 "operands": [
1785 {
1786 "@type": "koral:term",
1787 "foundry": "upos",
1788 "key": "ADJ",
1789 "layer": "p",
1790 "match": "match:eq"
1791 },
1792 {
1793 "@type": "koral:term",
1794 "foundry": "upos",
1795 "key": "Short",
1796 "layer": "Variant",
1797 "match": "match:eq"
1798 }
1799 ],
1800 "relation": "relation:and"
1801 }
1802 }`
1803
1804 var inputData any
1805 err = json.Unmarshal([]byte(input), &inputData)
1806 require.NoError(t, err)
1807
1808 result, err := m.ApplyQueryMappings("spec-test", MappingOptions{Direction: BtoA}, inputData)
1809 require.NoError(t, err)
1810
1811 resultMap := result.(map[string]any)
1812 wrap := resultMap["wrap"].(map[string]any)
1813 assert.Equal(t, "koral:term", wrap["@type"])
1814 assert.Equal(t, "ADJD", wrap["key"])
1815 })
1816
1817 t.Run("Same specificity - first rule in file order wins", func(t *testing.T) {
1818 mappingList := config.MappingList{
1819 ID: "tie-test",
1820 FoundryA: "opennlp",
1821 LayerA: "p",
1822 FoundryB: "upos",
1823 LayerB: "p",
1824 Mappings: []config.MappingRule{
1825 "[KOUI] <> [SCONJ]",
1826 "[KOUS] <> [SCONJ]",
1827 },
1828 }
1829
1830 m, err := NewMapper([]config.MappingList{mappingList})
1831 require.NoError(t, err)
1832
1833 input := `{
1834 "@type": "koral:token",
1835 "wrap": {
1836 "@type": "koral:term",
1837 "foundry": "upos",
1838 "key": "SCONJ",
1839 "layer": "p",
1840 "match": "match:eq"
1841 }
1842 }`
1843
1844 var inputData any
1845 err = json.Unmarshal([]byte(input), &inputData)
1846 require.NoError(t, err)
1847
1848 result, err := m.ApplyQueryMappings("tie-test", MappingOptions{Direction: BtoA}, inputData)
1849 require.NoError(t, err)
1850
1851 resultMap := result.(map[string]any)
1852 wrap := resultMap["wrap"].(map[string]any)
1853 assert.Equal(t, "KOUI", wrap["key"])
1854 })
1855
1856 t.Run("Single matching rule - identical to first-match-wins", func(t *testing.T) {
1857 mappingList := config.MappingList{
1858 ID: "single-test",
1859 FoundryA: "opennlp",
1860 LayerA: "p",
1861 FoundryB: "upos",
1862 LayerB: "p",
1863 Mappings: []config.MappingRule{
1864 "[NN] <> [NOUN]",
1865 },
1866 }
1867
1868 m, err := NewMapper([]config.MappingList{mappingList})
1869 require.NoError(t, err)
1870
1871 input := `{
1872 "@type": "koral:token",
1873 "wrap": {
1874 "@type": "koral:term",
1875 "foundry": "upos",
1876 "key": "NOUN",
1877 "layer": "p",
1878 "match": "match:eq"
1879 }
1880 }`
1881
1882 var inputData any
1883 err = json.Unmarshal([]byte(input), &inputData)
1884 require.NoError(t, err)
1885
1886 result, err := m.ApplyQueryMappings("single-test", MappingOptions{Direction: BtoA}, inputData)
1887 require.NoError(t, err)
1888
1889 resultMap := result.(map[string]any)
1890 wrap := resultMap["wrap"].(map[string]any)
1891 assert.Equal(t, "NN", wrap["key"])
1892 })
1893
1894 t.Run("No matching rule - node passes through unchanged", func(t *testing.T) {
1895 mappingList := config.MappingList{
1896 ID: "nomatch-test",
1897 FoundryA: "opennlp",
1898 LayerA: "p",
1899 FoundryB: "upos",
1900 LayerB: "p",
1901 Mappings: []config.MappingRule{
1902 "[NN] <> [NOUN]",
1903 },
1904 }
1905
1906 m, err := NewMapper([]config.MappingList{mappingList})
1907 require.NoError(t, err)
1908
1909 input := `{
1910 "@type": "koral:token",
1911 "wrap": {
1912 "@type": "koral:term",
1913 "foundry": "upos",
1914 "key": "VERB",
1915 "layer": "p",
1916 "match": "match:eq"
1917 }
1918 }`
1919
1920 var inputData any
1921 err = json.Unmarshal([]byte(input), &inputData)
1922 require.NoError(t, err)
1923
1924 result, err := m.ApplyQueryMappings("nomatch-test", MappingOptions{Direction: BtoA}, inputData)
1925 require.NoError(t, err)
1926
1927 resultMap := result.(map[string]any)
1928 wrap := resultMap["wrap"].(map[string]any)
1929 assert.Equal(t, "VERB", wrap["key"])
1930 })
1931
1932 t.Run("Fallback OR-disjunction rule loses to specific rule", func(t *testing.T) {
1933 mappingList := config.MappingList{
1934 ID: "fallback-test",
1935 FoundryA: "opennlp",
1936 LayerA: "p",
1937 FoundryB: "upos",
1938 LayerB: "p",
1939 Mappings: []config.MappingRule{
1940 "[ADJA] <> [ADJ]",
1941 "[ADJA | ADJD] <> [ADJ]",
1942 },
1943 }
1944
1945 m, err := NewMapper([]config.MappingList{mappingList})
1946 require.NoError(t, err)
1947
1948 input := `{
1949 "@type": "koral:token",
1950 "wrap": {
1951 "@type": "koral:term",
1952 "foundry": "upos",
1953 "key": "ADJ",
1954 "layer": "p",
1955 "match": "match:eq"
1956 }
1957 }`
1958
1959 var inputData any
1960 err = json.Unmarshal([]byte(input), &inputData)
1961 require.NoError(t, err)
1962
1963 result, err := m.ApplyQueryMappings("fallback-test", MappingOptions{Direction: BtoA}, inputData)
1964 require.NoError(t, err)
1965
1966 // Both rules match with pattern specificity 1 on B-side.
1967 // Rule 1 replacement specificity = 1 (Term), Rule 2 replacement specificity = 0 (OR group).
1968 // Lower replacement specificity wins (broader/fallback output) => rule 2 wins.
1969 resultMap := result.(map[string]any)
1970 wrap := resultMap["wrap"].(map[string]any)
1971 assert.Equal(t, "koral:termGroup", wrap["@type"])
1972 assert.Equal(t, "relation:or", wrap["relation"])
1973 })
1974}