blob: 39389c9a7b8511e7bfcfc5135acb2e9174aca404 [file] [log] [blame]
Akron32d53de2025-05-22 13:45:32 +02001package mapper
2
3import (
4 "encoding/json"
Akron32d53de2025-05-22 13:45:32 +02005 "testing"
6
Akronfa55bb22025-05-26 15:10:42 +02007 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akrona00d4752025-05-26 17:34:36 +02008 "github.com/KorAP/KoralPipe-TermMapper/config"
Akronfa55bb22025-05-26 15:10:42 +02009 "github.com/KorAP/KoralPipe-TermMapper/matcher"
Akron32d53de2025-05-22 13:45:32 +020010 "github.com/stretchr/testify/assert"
11 "github.com/stretchr/testify/require"
12)
13
14func TestMapper(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +020015 // Create test mapping list
16 mappingList := config.MappingList{
17 ID: "test-mapper",
18 FoundryA: "opennlp",
19 LayerA: "p",
20 FoundryB: "upos",
21 LayerB: "p",
22 Mappings: []config.MappingRule{
23 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
24 "[DET] <> [opennlp/p=DET]",
25 },
26 }
Akron32d53de2025-05-22 13:45:32 +020027
28 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +020029 m, err := NewMapper([]config.MappingList{mappingList})
Akron32d53de2025-05-22 13:45:32 +020030 require.NoError(t, err)
31
32 tests := []struct {
33 name string
34 mappingID string
35 opts MappingOptions
36 input string
37 expected string
38 expectError bool
39 }{
40 {
41 name: "Simple A to B mapping",
42 mappingID: "test-mapper",
43 opts: MappingOptions{
44 Direction: AtoB,
45 },
46 input: `{
47 "@type": "koral:token",
48 "wrap": {
49 "@type": "koral:term",
50 "foundry": "opennlp",
51 "key": "PIDAT",
52 "layer": "p",
53 "match": "match:eq"
54 }
55 }`,
56 expected: `{
57 "@type": "koral:token",
58 "wrap": {
59 "@type": "koral:termGroup",
60 "operands": [
61 {
62 "@type": "koral:term",
63 "foundry": "opennlp",
64 "key": "PIDAT",
65 "layer": "p",
66 "match": "match:eq"
67 },
68 {
69 "@type": "koral:term",
70 "foundry": "opennlp",
71 "key": "AdjType",
72 "layer": "p",
73 "match": "match:eq",
74 "value": "Pdt"
75 }
76 ],
77 "relation": "relation:and"
78 }
79 }`,
80 },
81 {
Akrona1a183f2025-05-26 17:47:33 +020082 name: "B to A direction",
Akron32d53de2025-05-22 13:45:32 +020083 mappingID: "test-mapper",
84 opts: MappingOptions{
85 Direction: BtoA,
86 },
87 input: `{
88 "@type": "koral:token",
89 "wrap": {
Akrona1a183f2025-05-26 17:47:33 +020090 "@type": "koral:term",
91 "foundry": "opennlp",
92 "key": "PIDAT",
93 "layer": "p",
94 "match": "match:eq"
Akron32d53de2025-05-22 13:45:32 +020095 }
96 }`,
97 expected: `{
98 "@type": "koral:token",
99 "wrap": {
100 "@type": "koral:term",
101 "foundry": "opennlp",
102 "key": "PIDAT",
103 "layer": "p",
104 "match": "match:eq"
105 }
106 }`,
Akrona1a183f2025-05-26 17:47:33 +0200107 expectError: false,
Akron32d53de2025-05-22 13:45:32 +0200108 },
109 {
110 name: "Mapping with foundry override",
111 mappingID: "test-mapper",
112 opts: MappingOptions{
113 Direction: AtoB,
114 FoundryB: "custom",
115 },
116 input: `{
117 "@type": "koral:token",
118 "wrap": {
119 "@type": "koral:term",
120 "foundry": "opennlp",
121 "key": "PIDAT",
122 "layer": "p",
123 "match": "match:eq"
124 }
125 }`,
126 expected: `{
127 "@type": "koral:token",
128 "wrap": {
129 "@type": "koral:termGroup",
130 "operands": [
131 {
132 "@type": "koral:term",
133 "foundry": "custom",
134 "key": "PIDAT",
135 "layer": "p",
136 "match": "match:eq"
137 },
138 {
139 "@type": "koral:term",
140 "foundry": "custom",
141 "key": "AdjType",
142 "layer": "p",
143 "match": "match:eq",
144 "value": "Pdt"
145 }
146 ],
147 "relation": "relation:and"
148 }
149 }`,
150 },
151 {
152 name: "Invalid mapping ID",
153 mappingID: "nonexistent",
154 opts: MappingOptions{
155 Direction: AtoB,
156 },
157 input: `{
158 "@type": "koral:token",
159 "wrap": {
160 "@type": "koral:term",
161 "foundry": "opennlp",
162 "key": "PIDAT",
163 "layer": "p",
164 "match": "match:eq"
165 }
166 }`,
167 expectError: true,
168 },
169 {
170 name: "Invalid direction",
171 mappingID: "test-mapper",
172 opts: MappingOptions{
Akrona1a183f2025-05-26 17:47:33 +0200173 Direction: Direction(false),
Akron32d53de2025-05-22 13:45:32 +0200174 },
175 input: `{
176 "@type": "koral:token",
177 "wrap": {
178 "@type": "koral:term",
179 "foundry": "opennlp",
180 "key": "PIDAT",
181 "layer": "p",
182 "match": "match:eq"
183 }
184 }`,
Akrona1a183f2025-05-26 17:47:33 +0200185 expected: `{
186 "@type": "koral:token",
187 "wrap": {
188 "@type": "koral:term",
189 "foundry": "opennlp",
190 "key": "PIDAT",
191 "layer": "p",
192 "match": "match:eq"
193 }
194 }`,
195 expectError: false,
Akron32d53de2025-05-22 13:45:32 +0200196 },
197 }
198
199 for _, tt := range tests {
200 t.Run(tt.name, func(t *testing.T) {
201 // Parse input JSON
202 var inputData interface{}
203 err := json.Unmarshal([]byte(tt.input), &inputData)
204 require.NoError(t, err)
205
206 // Apply mappings
Akron7b4984e2025-05-26 19:12:20 +0200207 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
Akron32d53de2025-05-22 13:45:32 +0200208 if tt.expectError {
209 assert.Error(t, err)
210 return
211 }
212 require.NoError(t, err)
213
214 // Parse expected JSON
215 var expectedData interface{}
216 err = json.Unmarshal([]byte(tt.expected), &expectedData)
217 require.NoError(t, err)
218
219 // Compare results
220 assert.Equal(t, expectedData, result)
221 })
222 }
223}
Akrond5850f82025-05-23 16:44:44 +0200224
Akroncc83eb52025-05-27 14:39:12 +0200225func TestTokenToTermGroupWithRewrites(t *testing.T) {
226 // Create test mapping list specifically for token to termGroup test
227 mappingList := config.MappingList{
228 ID: "test-token-to-termgroup",
229 FoundryA: "opennlp",
230 LayerA: "p",
231 FoundryB: "opennlp", // Keep the same foundry for both sides
232 LayerB: "p",
233 Mappings: []config.MappingRule{
234 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
235 },
236 }
237
238 // Create a new mapper
239 m, err := NewMapper([]config.MappingList{mappingList})
240 require.NoError(t, err)
241
242 input := `{
243 "@type": "koral:token",
244 "rewrites": [
245 {
246 "@type": "koral:rewrite",
247 "_comment": "This rewrite should be preserved",
248 "editor": "TestEditor",
249 "operation": "operation:test",
250 "src": "TestSource"
251 }
252 ],
253 "wrap": {
254 "@type": "koral:term",
255 "foundry": "opennlp",
256 "key": "PIDAT",
257 "layer": "p",
258 "match": "match:eq"
259 }
260 }`
261
262 expected := `{
263 "@type": "koral:token",
264 "rewrites": [
265 {
266 "@type": "koral:rewrite",
267 "_comment": "This rewrite should be preserved",
268 "editor": "TestEditor",
269 "operation": "operation:test",
270 "src": "TestSource"
271 }
272 ],
273 "wrap": {
274 "@type": "koral:termGroup",
275 "operands": [
276 {
277 "@type": "koral:term",
278 "foundry": "opennlp",
279 "key": "PIDAT",
280 "layer": "p",
281 "match": "match:eq"
282 },
283 {
284 "@type": "koral:term",
285 "foundry": "opennlp",
286 "key": "AdjType",
287 "layer": "p",
288 "match": "match:eq",
289 "value": "Pdt"
290 }
291 ],
292 "relation": "relation:and"
293 }
294 }`
295
296 // Parse input JSON
297 var inputData interface{}
298 err = json.Unmarshal([]byte(input), &inputData)
299 require.NoError(t, err)
300
301 // Apply mappings
302 result, err := m.ApplyQueryMappings("test-token-to-termgroup", MappingOptions{Direction: AtoB}, inputData)
303 require.NoError(t, err)
304
305 // Parse expected JSON
306 var expectedData interface{}
307 err = json.Unmarshal([]byte(expected), &expectedData)
308 require.NoError(t, err)
309
310 // Compare results
311 assert.Equal(t, expectedData, result)
312}
313
Akrond5850f82025-05-23 16:44:44 +0200314func TestMatchComplexPatterns(t *testing.T) {
315 tests := []struct {
316 name string
317 pattern ast.Pattern
318 replacement ast.Replacement
319 input ast.Node
320 expected ast.Node
321 }{
322 {
323 name: "Deep nested pattern with mixed operators",
324 pattern: ast.Pattern{
325 Root: &ast.TermGroup{
326 Operands: []ast.Node{
327 &ast.Term{
328 Key: "A",
329 Match: ast.MatchEqual,
330 },
331 &ast.TermGroup{
332 Operands: []ast.Node{
333 &ast.Term{
334 Key: "B",
335 Match: ast.MatchEqual,
336 },
337 &ast.TermGroup{
338 Operands: []ast.Node{
339 &ast.Term{
340 Key: "C",
341 Match: ast.MatchEqual,
342 },
343 &ast.Term{
344 Key: "D",
345 Match: ast.MatchEqual,
346 },
347 },
348 Relation: ast.AndRelation,
349 },
350 },
351 Relation: ast.OrRelation,
352 },
353 },
354 Relation: ast.AndRelation,
355 },
356 },
357 replacement: ast.Replacement{
358 Root: &ast.Term{
359 Key: "RESULT",
360 Match: ast.MatchEqual,
361 },
362 },
363 input: &ast.TermGroup{
364 Operands: []ast.Node{
365 &ast.Term{
366 Key: "A",
367 Match: ast.MatchEqual,
368 },
369 &ast.TermGroup{
370 Operands: []ast.Node{
371 &ast.Term{
372 Key: "C",
373 Match: ast.MatchEqual,
374 },
375 &ast.Term{
376 Key: "D",
377 Match: ast.MatchEqual,
378 },
379 },
380 Relation: ast.AndRelation,
381 },
382 },
383 Relation: ast.AndRelation,
384 },
385 expected: &ast.Term{
386 Key: "RESULT",
387 Match: ast.MatchEqual,
388 },
389 },
390 }
391
392 for _, tt := range tests {
393 t.Run(tt.name, func(t *testing.T) {
394 m, err := matcher.NewMatcher(tt.pattern, tt.replacement)
395 require.NoError(t, err)
396 result := m.Replace(tt.input)
397 assert.Equal(t, tt.expected, result)
398 })
399 }
400}
401
402func TestInvalidPatternReplacement(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +0200403 // Create test mapping list
404 mappingList := config.MappingList{
405 ID: "test-mapper",
406 FoundryA: "opennlp",
407 LayerA: "p",
408 FoundryB: "upos",
409 LayerB: "p",
410 Mappings: []config.MappingRule{
411 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
412 },
413 }
Akrond5850f82025-05-23 16:44:44 +0200414
415 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +0200416 m, err := NewMapper([]config.MappingList{mappingList})
Akrond5850f82025-05-23 16:44:44 +0200417 require.NoError(t, err)
418
419 tests := []struct {
420 name string
421 input string
422 expectError bool
423 errorMsg string
424 }{
425 {
426 name: "Invalid input - empty term group",
427 input: `{
428 "@type": "koral:token",
429 "wrap": {
430 "@type": "koral:termGroup",
431 "operands": [],
432 "relation": "relation:and"
433 }
434 }`,
435 expectError: true,
436 errorMsg: "failed to parse JSON into AST: error parsing wrapped node: term group must have at least one operand",
437 },
438 }
439
440 for _, tt := range tests {
441 t.Run(tt.name, func(t *testing.T) {
442 var inputData any
443 err := json.Unmarshal([]byte(tt.input), &inputData)
444 require.NoError(t, err)
445
Akron7b4984e2025-05-26 19:12:20 +0200446 result, err := m.ApplyQueryMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
Akrond5850f82025-05-23 16:44:44 +0200447 if tt.expectError {
448 assert.Error(t, err)
449 assert.Equal(t, tt.errorMsg, err.Error())
450 assert.Nil(t, result)
451 } else {
452 assert.NoError(t, err)
453 assert.NotNil(t, result)
454 }
455 })
456 }
457}
Akron7b4984e2025-05-26 19:12:20 +0200458
459func TestQueryWrapperMappings(t *testing.T) {
460
461 mappingList := config.MappingList{
462 ID: "test-wrapper",
463 FoundryA: "opennlp",
464 LayerA: "orth",
465 FoundryB: "upos",
466 LayerB: "orth",
467 Mappings: []config.MappingRule{
468 "[opennlp/orth=Baum] <> [opennlp/orth=X]",
469 },
470 }
471
472 // Create a new mapper
473 m, err := NewMapper([]config.MappingList{mappingList})
474 require.NoError(t, err)
475
476 tests := []struct {
477 name string
478 mappingID string
479 opts MappingOptions
480 input string
481 expected string
482 expectError bool
483 }{
484 {
Akroncc83eb52025-05-27 14:39:12 +0200485 name: "Query wrapper case with rewrites preservation",
Akron7b4984e2025-05-26 19:12:20 +0200486 mappingID: "test-wrapper",
487 opts: MappingOptions{
488 Direction: AtoB,
489 },
490 input: `{
491 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
492 "collection": {
493 "@type": "koral:doc",
494 "key": "availability",
495 "match": "match:eq",
Akron7b4984e2025-05-26 19:12:20 +0200496 "type": "type:regex",
497 "value": "CC.*"
498 },
499 "query": {
500 "@type": "koral:token",
Akroncc83eb52025-05-27 14:39:12 +0200501 "rewrites": [
502 {
503 "@type": "koral:rewrite",
504 "_comment": "Original rewrite that should be preserved",
505 "editor": "Original",
506 "operation": "operation:original",
507 "src": "Original"
508 }
509 ],
Akron7b4984e2025-05-26 19:12:20 +0200510 "wrap": {
511 "@type": "koral:term",
512 "foundry": "opennlp",
513 "key": "Baum",
514 "layer": "orth",
515 "match": "match:eq"
516 }
517 }
518 }`,
519 expected: `{
520 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
521 "collection": {
522 "@type": "koral:doc",
523 "key": "availability",
524 "match": "match:eq",
Akron7b4984e2025-05-26 19:12:20 +0200525 "type": "type:regex",
526 "value": "CC.*"
527 },
528 "query": {
529 "@type": "koral:token",
Akroncc83eb52025-05-27 14:39:12 +0200530 "rewrites": [
531 {
532 "@type": "koral:rewrite",
533 "_comment": "Original rewrite that should be preserved",
534 "editor": "Original",
535 "operation": "operation:original",
536 "src": "Original"
537 }
538 ],
Akron7b4984e2025-05-26 19:12:20 +0200539 "wrap": {
540 "@type": "koral:term",
541 "foundry": "opennlp",
542 "key": "X",
543 "layer": "orth",
544 "match": "match:eq"
545 }
546 }
547 }`,
548 },
549 {
550 name: "Empty query field",
551 mappingID: "test-wrapper",
552 opts: MappingOptions{
553 Direction: AtoB,
554 },
555 input: `{
556 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
557 "query": null
558 }`,
559 expected: `{
560 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
561 "query": null
562 }`,
563 },
564 {
565 name: "Missing query field",
566 mappingID: "test-wrapper",
567 opts: MappingOptions{
568 Direction: AtoB,
569 },
570 input: `{
571 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
572 "collection": {
573 "@type": "koral:doc"
574 }
575 }`,
576 expected: `{
577 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
578 "collection": {
579 "@type": "koral:doc"
580 }
581 }`,
582 },
583 {
584 name: "Query field with non-object value",
585 mappingID: "test-wrapper",
586 opts: MappingOptions{
587 Direction: AtoB,
588 },
589 input: `{
590 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
591 "query": "invalid"
592 }`,
593 expected: `{
594 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
595 "query": "invalid"
596 }`,
597 },
Akroncc83eb52025-05-27 14:39:12 +0200598 {
599 name: "Query with rewrites in nested token",
600 mappingID: "test-wrapper",
601 opts: MappingOptions{
602 Direction: AtoB,
603 },
604 input: `{
605 "@type": "koral:token",
606 "rewrites": [
607 {
608 "@type": "koral:rewrite",
609 "_comment": "Nested rewrite that should be preserved",
610 "editor": "Nested",
611 "operation": "operation:nested",
612 "src": "Nested"
613 }
614 ],
615 "wrap": {
616 "@type": "koral:term",
617 "foundry": "opennlp",
618 "key": "Baum",
619 "layer": "orth",
620 "match": "match:eq"
621 }
622 }`,
623 expected: `{
624 "@type": "koral:token",
625 "rewrites": [
626 {
627 "@type": "koral:rewrite",
628 "_comment": "Nested rewrite that should be preserved",
629 "editor": "Nested",
630 "operation": "operation:nested",
631 "src": "Nested"
632 }
633 ],
634 "wrap": {
635 "@type": "koral:term",
636 "foundry": "opennlp",
637 "key": "X",
638 "layer": "orth",
639 "match": "match:eq"
640 }
641 }`,
642 },
Akron7b4984e2025-05-26 19:12:20 +0200643 }
644
645 for _, tt := range tests {
646 t.Run(tt.name, func(t *testing.T) {
647 // Parse input JSON
648 var inputData interface{}
649 err := json.Unmarshal([]byte(tt.input), &inputData)
650 require.NoError(t, err)
651
652 // Apply mappings
653 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
654 if tt.expectError {
655 assert.Error(t, err)
656 return
657 }
658 require.NoError(t, err)
659
660 // Parse expected JSON
661 var expectedData interface{}
662 err = json.Unmarshal([]byte(tt.expected), &expectedData)
663 require.NoError(t, err)
664
665 // Compare results
666 assert.Equal(t, expectedData, result)
667 })
668 }
669}