blob: 566f235c25d0cf01fe8cc2284d836cf71bf593af [file] [log] [blame]
Akrona3675e92025-06-26 17:46:59 +02001package mapper
2
3import (
4 "encoding/json"
5 "testing"
6
7 "github.com/KorAP/KoralPipe-TermMapper/config"
8 "github.com/stretchr/testify/assert"
9 "github.com/stretchr/testify/require"
10)
11
Akrona3675e92025-06-26 17:46:59 +020012// TestResponseMappingAnnotationCreation tests creating new annotations based on RestrictToObligatory
13func TestResponseMappingAnnotationCreation(t *testing.T) {
14 // Simple snippet with a single annotated token
15 responseSnippet := `{
16 "snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"
17 }`
18
19 // Create test mapping list
20 mappingList := config.MappingList{
21 ID: "test-mapper",
22 FoundryA: "marmot",
23 LayerA: "m",
24 FoundryB: "opennlp",
25 LayerB: "p",
26 Mappings: []config.MappingRule{
Akron5aa16232025-07-01 12:25:33 +020027 "[gender:masc] <> [p=M & m=M]",
Akrona3675e92025-06-26 17:46:59 +020028 },
29 }
30
31 // Create a new mapper
32 m, err := NewMapper([]config.MappingList{mappingList})
33 require.NoError(t, err)
34
35 var inputData any
36 err = json.Unmarshal([]byte(responseSnippet), &inputData)
37 assert.Nil(t, err)
38
39 result, err := m.ApplyResponseMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
40 assert.Nil(t, err)
41
42 // For step 4, we should at least get back a processed result (even if snippet is unchanged)
43 // The main test is that no errors occurred in the processing
44 assert.NotNil(t, result)
45
46 // Verify the result is still a map with a snippet field
47 resultMap, ok := result.(map[string]any)
48 assert.True(t, ok)
49 assert.Contains(t, resultMap, "snippet")
50 assert.Equal(t, "<span title=\"marmot/m:gender:masc\"><span title=\"opennlp/p:M\" class=\"notinindex\"><span title=\"opennlp/m:M\" class=\"notinindex\">Der</span></span></span>", resultMap["snippet"])
51}
52
53// TestResponseMappingDebug helps debug the mapping process
54func TestResponseMappingDebug(t *testing.T) {
55 // Simple snippet with a single annotated token
56 responseSnippet := `{
57 "snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"
58 }`
59
60 // Create test mapping list
61 mappingList := config.MappingList{
62 ID: "test-mapper",
63 FoundryA: "marmot",
64 LayerA: "m",
65 FoundryB: "opennlp",
66 LayerB: "p",
67 Mappings: []config.MappingRule{
68 "[gender=masc] <> [p=M & m=M]",
69 },
70 }
71
72 // Create a new mapper
73 m, err := NewMapper([]config.MappingList{mappingList})
74 require.NoError(t, err)
75
76 // Debug: Print what the parsed rules look like
77 rules := m.parsedRules["test-mapper"]
78 t.Logf("Number of parsed rules: %d", len(rules))
79 for i, rule := range rules {
80 t.Logf("Rule %d - Upper: %+v", i, rule.Upper)
81 t.Logf("Rule %d - Lower: %+v", i, rule.Lower)
82 }
83
84 var inputData any
85 err = json.Unmarshal([]byte(responseSnippet), &inputData)
86 assert.Nil(t, err)
87
88 // Include proper foundry and layer information in the options
89 result, err := m.ApplyResponseMappings("test-mapper", MappingOptions{
90 Direction: AtoB,
91 FoundryA: "marmot",
92 LayerA: "m",
93 FoundryB: "opennlp",
94 LayerB: "p",
95 }, inputData)
96 assert.Nil(t, err)
97 t.Logf("Result: %+v", result)
98}
99
100// TestResponseMappingWithAndRelation tests mapping rules with AND relations
101func TestResponseMappingWithAndRelation(t *testing.T) {
102 // Snippet with multiple annotations on a single token - both must be on the same span for AND to work
103 responseSnippet := `{
104 "snippet": "<span title=\"marmot/p:DET\"><span title=\"marmot/p:gender:masc\">Der</span></span>"
105 }`
106
107 // Create test mapping list with AND relation
108 mappingList := config.MappingList{
109 ID: "test-and-mapper",
110 FoundryA: "marmot",
111 LayerA: "p",
112 FoundryB: "opennlp",
113 LayerB: "p",
114 Mappings: []config.MappingRule{
115 "[DET & gender:masc] <> [p=DT & case=nom]",
116 },
117 }
118
119 // Create a new mapper
120 m, err := NewMapper([]config.MappingList{mappingList})
121 require.NoError(t, err)
122
123 var inputData any
124 err = json.Unmarshal([]byte(responseSnippet), &inputData)
125 assert.Nil(t, err)
126
127 result, err := m.ApplyResponseMappings("test-and-mapper", MappingOptions{
128 Direction: AtoB,
129 FoundryA: "marmot",
130 LayerA: "p",
131 FoundryB: "opennlp",
132 LayerB: "p",
133 }, inputData)
134 assert.Nil(t, err)
135
136 // Verify the result contains the expected annotations
137 resultMap, ok := result.(map[string]any)
138 assert.True(t, ok)
139 assert.Contains(t, resultMap, "snippet")
140
141 snippet := resultMap["snippet"].(string)
142 // Should contain both new annotations - checking the actual format produced
143 assert.Contains(t, snippet, `title="marmot/p:DET"`)
144 assert.Contains(t, snippet, `title="opennlp/p:DT"`)
145 assert.Contains(t, snippet, `title="marmot/p:gender:masc"`)
146 assert.Contains(t, snippet, `title="opennlp/case:nom"`) // Format is foundry/layer:value for single values
147 assert.Contains(t, snippet, `class="notinindex"`)
148}
149
150// TestResponseMappingWithOrRelation tests mapping rules with OR relations
151func TestResponseMappingWithOrRelation(t *testing.T) {
152 // Snippet with one token that matches the OR condition
153 responseSnippet := `{
154 "snippet": "<span title=\"marmot/p:DET\">Der</span>"
155 }`
156
157 // Create test mapping list with OR relation
158 mappingList := config.MappingList{
159 ID: "test-or-mapper",
160 FoundryA: "marmot",
161 LayerA: "p",
162 FoundryB: "opennlp",
163 LayerB: "p",
164 Mappings: []config.MappingRule{
165 "[DET | ART] <> [determiner=true]",
166 },
167 }
168
169 // Create a new mapper
170 m, err := NewMapper([]config.MappingList{mappingList})
171 require.NoError(t, err)
172
173 var inputData any
174 err = json.Unmarshal([]byte(responseSnippet), &inputData)
175 assert.Nil(t, err)
176
177 result, err := m.ApplyResponseMappings("test-or-mapper", MappingOptions{Direction: AtoB}, inputData)
178 assert.Nil(t, err)
179
180 // Verify the result
181 resultMap, ok := result.(map[string]any)
182 assert.True(t, ok)
183 assert.Contains(t, resultMap, "snippet")
184
185 snippet := resultMap["snippet"].(string)
186
187 assert.Contains(t, snippet, `title="marmot/p:DET"`)
188 assert.Contains(t, snippet, `title="opennlp/determiner:true" class="notinindex"`)
189 assert.NotEmpty(t, snippet)
190}
191
192// TestResponseMappingComplexPattern1 tests complex nested patterns
193func TestResponseMappingComplexPattern1(t *testing.T) {
194 // Snippet with a token that has nested annotations
195 responseSnippet := `{
196 "snippet": "<span title=\"marmot/p:ADJA\"><span title=\"marmot/m:gender:masc\"><span title=\"marmot/m:case:nom\">alter</span></span></span>"
197 }`
198
199 // Create test mapping list with complex pattern
200 mappingList := config.MappingList{
201 ID: "test-complex-mapper",
202 FoundryA: "marmot",
203 LayerA: "p",
204 FoundryB: "opennlp",
205 LayerB: "p",
206 Mappings: []config.MappingRule{
207 "[ADJA & gender=masc & case=nom] <> [pos=ADJ & gender=M & case=NOM]",
208 },
209 }
210
211 // Create a new mapper
212 m, err := NewMapper([]config.MappingList{mappingList})
213 require.NoError(t, err)
214
215 var inputData any
216 err = json.Unmarshal([]byte(responseSnippet), &inputData)
217 assert.Nil(t, err)
218
219 result, err := m.ApplyResponseMappings("test-complex-mapper", MappingOptions{Direction: AtoB}, inputData)
220 assert.Nil(t, err)
221
222 // Verify the result contains the expected annotations
223 resultMap, ok := result.(map[string]any)
224 assert.True(t, ok)
225 assert.Contains(t, resultMap, "snippet")
226
227 snippet := resultMap["snippet"].(string)
228 assert.Contains(t, snippet, `title="marmot/p:ADJA`)
229 assert.Contains(t, snippet, `title="marmot/m:gender:masc`)
230 assert.NotContains(t, snippet, `title="opennlp`)
231 assert.NotEmpty(t, snippet) // At minimum, processing should succeed
232}
233
234// TestResponseMappingComplexPattern2 tests complex nested patterns
235func TestResponseMappingComplexPattern2(t *testing.T) {
236 // Snippet with a token that has nested annotations
237 responseSnippet := `{
238 "snippet": "<span title=\"marmot/p:ADJA\"><span title=\"marmot/p:gender:masc\"><span title=\"marmot/p:case:nom\">alter</span></span></span>"
239 }`
240
241 // Create test mapping list with complex pattern
242 mappingList := config.MappingList{
243 ID: "test-complex-mapper",
244 FoundryA: "marmot",
245 LayerA: "p",
246 FoundryB: "opennlp",
247 LayerB: "p",
248 Mappings: []config.MappingRule{
249 "[ADJA & gender:masc & case:nom] <> [pos=ADJ & gender=M & case=NOM]",
250 },
251 }
252
253 // Create a new mapper
254 m, err := NewMapper([]config.MappingList{mappingList})
255 require.NoError(t, err)
256
257 var inputData any
258 err = json.Unmarshal([]byte(responseSnippet), &inputData)
259 assert.Nil(t, err)
260
261 result, err := m.ApplyResponseMappings("test-complex-mapper", MappingOptions{Direction: AtoB}, inputData)
262 assert.Nil(t, err)
263
264 // Verify the result contains the expected annotations
265 resultMap, ok := result.(map[string]any)
266 assert.True(t, ok)
267 assert.Contains(t, resultMap, "snippet")
268
269 snippet := resultMap["snippet"].(string)
270 assert.Contains(t, snippet, `title="marmot/p:ADJA`)
271 assert.Contains(t, snippet, `title="marmot/p:gender:masc`)
272 assert.Contains(t, snippet, `title="opennlp/pos:ADJ" class="notinindex"`)
273 assert.Contains(t, snippet, `title="opennlp/gender:M" class="notinindex"`)
274 assert.Contains(t, snippet, `title="opennlp/case:NOM" class="notinindex"`)
275 assert.NotEmpty(t, snippet) // At minimum, processing should succeed
276}
277
278// TestResponseMappingMultipleTokens tests mapping across multiple tokens
279func TestResponseMappingMultipleTokens(t *testing.T) {
280 // Snippet with multiple tokens
281 responseSnippet := `{
282 "snippet": "<span title=\"marmot/p:DET\">Der</span> <span title=\"marmot/p:ADJA\"><span title=\"marmot/m:gender:masc\">alte</span></span> <span title=\"marmot/p:NN\">Mann</span>"
283 }`
284
285 // Create test mapping list that matches multiple patterns
286 mappingList := config.MappingList{
287 ID: "test-multi-mapper",
288 FoundryA: "marmot",
289 LayerA: "p",
290 FoundryB: "opennlp",
291 LayerB: "p",
292 Mappings: []config.MappingRule{
293 "[DET] <> [determiner=true]",
294 "[ADJA & gender:masc] <> [adjective=true & gender=M]",
295 "[NN] <> [noun=true]",
296 },
297 }
298
299 // Create a new mapper
300 m, err := NewMapper([]config.MappingList{mappingList})
301 require.NoError(t, err)
302
303 var inputData any
304 err = json.Unmarshal([]byte(responseSnippet), &inputData)
305 assert.Nil(t, err)
306
307 result, err := m.ApplyResponseMappings("test-multi-mapper", MappingOptions{Direction: AtoB}, inputData)
308 assert.Nil(t, err)
309
310 // Verify the result
311 resultMap, ok := result.(map[string]any)
312 assert.True(t, ok)
313 assert.Contains(t, resultMap, "snippet")
314
315 snippet := resultMap["snippet"].(string)
316 // Should contain annotations for each matching token (checking actual output format)
317 assert.Contains(t, snippet, `title="marmot/p:DET"`)
318 assert.Contains(t, snippet, `title="opennlp/determiner:true" class="notinindex"`) // Format is foundry/layer:value for single values
319 assert.NotContains(t, snippet, `title="opennlp/adjective:true" class="notinindex"`)
320 assert.Contains(t, snippet, `title="opennlp/noun:true" class="notinindex"`)
321}
322
323// TestResponseMappingNoMatch tests behavior when no patterns match
324func TestResponseMappingNoMatch(t *testing.T) {
325 // Snippet with tokens that don't match the pattern
326 responseSnippet := `{
327 "snippet": "<span title=\"marmot/p:VERB\">läuft</span>"
328 }`
329
330 // Create test mapping list with pattern that won't match
331 mappingList := config.MappingList{
332 ID: "test-nomatch-mapper",
333 FoundryA: "marmot",
334 LayerA: "p",
335 FoundryB: "opennlp",
336 LayerB: "p",
337 Mappings: []config.MappingRule{
338 "[DET] <> [determiner=true]",
339 },
340 }
341
342 // Create a new mapper
343 m, err := NewMapper([]config.MappingList{mappingList})
344 require.NoError(t, err)
345
346 var inputData any
347 err = json.Unmarshal([]byte(responseSnippet), &inputData)
348 assert.Nil(t, err)
349
350 result, err := m.ApplyResponseMappings("test-nomatch-mapper", MappingOptions{Direction: AtoB}, inputData)
351 assert.Nil(t, err)
352
353 // Verify the result is unchanged since no patterns matched
354 resultMap, ok := result.(map[string]any)
355 assert.True(t, ok)
356 assert.Contains(t, resultMap, "snippet")
357
358 snippet := resultMap["snippet"].(string)
359 // Should be the original snippet without new annotations
360 assert.Equal(t, `<span title="marmot/p:VERB">läuft</span>`, snippet)
361 assert.NotContains(t, snippet, `class="notinindex"`)
362}
363
364// TestResponseMappingBidirectional tests bidirectional mapping (B to A direction)
365func TestResponseMappingBidirectional(t *testing.T) {
366 // Snippet with opennlp annotations
367 responseSnippet := `{
368 "snippet": "<span title=\"opennlp/p:DT\"><span title=\"opennlp/p:determiner:true\">Der</span></span>"
369 }`
370
371 // Create test mapping list
372 mappingList := config.MappingList{
373 ID: "test-bidirectional-mapper",
374 FoundryA: "marmot",
375 LayerA: "p",
376 FoundryB: "opennlp",
377 LayerB: "p",
378 Mappings: []config.MappingRule{
379 "[DET] <> [DT & determiner:true]",
380 },
381 }
382
383 // Create a new mapper
384 m, err := NewMapper([]config.MappingList{mappingList})
385 require.NoError(t, err)
386
387 var inputData any
388 err = json.Unmarshal([]byte(responseSnippet), &inputData)
389 assert.Nil(t, err)
390
391 // Test B to A direction
392 result, err := m.ApplyResponseMappings("test-bidirectional-mapper", MappingOptions{Direction: BtoA}, inputData)
393 assert.Nil(t, err)
394
395 // Verify the result
396 resultMap, ok := result.(map[string]any)
397 assert.True(t, ok)
398 assert.Contains(t, resultMap, "snippet")
399
400 snippet := resultMap["snippet"].(string)
401
402 assert.Contains(t, snippet, `title="opennlp/p:DT"`)
403 assert.Contains(t, snippet, `title="marmot/p:DET" class="notinindex"`)
404 assert.NotEmpty(t, snippet) // At minimum, processing should succeed
405}
406
407// TestResponseMappingWithValuePatterns tests patterns with specific values
408func TestResponseMappingWithValuePatterns(t *testing.T) {
409 // Snippet with value-specific annotations
410 responseSnippet := `{
411 "snippet": "<span title=\"marmot/m:case:nom\"><span title=\"marmot/m:gender:fem\">die</span></span>"
412 }`
413
414 // Create test mapping list with value-specific patterns
415 mappingList := config.MappingList{
416 ID: "test-value-mapper",
417 FoundryA: "marmot",
418 LayerA: "m",
419 FoundryB: "opennlp",
420 LayerB: "m",
421 Mappings: []config.MappingRule{
422 "[case:nom & gender:fem] <> [case=NOM & gender=F]",
423 },
424 }
425
426 // Create a new mapper
427 m, err := NewMapper([]config.MappingList{mappingList})
428 require.NoError(t, err)
429
430 var inputData any
431 err = json.Unmarshal([]byte(responseSnippet), &inputData)
432 assert.Nil(t, err)
433
434 result, err := m.ApplyResponseMappings("test-value-mapper", MappingOptions{Direction: AtoB}, inputData)
435 assert.Nil(t, err)
436
437 // Verify the result
438 resultMap, ok := result.(map[string]any)
439 assert.True(t, ok)
440 assert.Contains(t, resultMap, "snippet")
441
442 snippet := resultMap["snippet"].(string)
443 assert.Contains(t, snippet, `title="marmot/m:case:nom"`) // Format is foundry/layer:value
444 assert.Contains(t, snippet, `title="opennlp/case:NOM" class="notinindex"`) // Format is foundry/layer:value
445 assert.Contains(t, snippet, `title="opennlp/gender:F" class="notinindex"`)
446}
447
448// TestResponseMappingNestedSpans tests handling of deeply nested span structures
449func TestResponseMappingNestedSpans(t *testing.T) {
450 // Snippet with deeply nested spans
451 responseSnippet := `{
Akron4de47a92025-06-27 11:58:11 +0200452 "snippet": "<span title=\"level1/l:outer\"><span title=\"level2/l:middle\"><span title=\"marmot/p:DET\">der</span></span></span>",
453 "author": "John Doe"
Akrona3675e92025-06-26 17:46:59 +0200454 }`
455
456 // Create test mapping list
457 mappingList := config.MappingList{
458 ID: "test-nested-mapper",
459 FoundryA: "marmot",
460 LayerA: "p",
461 FoundryB: "opennlp",
462 LayerB: "p",
463 Mappings: []config.MappingRule{
464 "[DET] <> [determiner=yes]",
465 },
466 }
467
468 // Create a new mapper
469 m, err := NewMapper([]config.MappingList{mappingList})
470 require.NoError(t, err)
471
472 var inputData any
473 err = json.Unmarshal([]byte(responseSnippet), &inputData)
474 assert.Nil(t, err)
475
476 result, err := m.ApplyResponseMappings("test-nested-mapper", MappingOptions{Direction: AtoB}, inputData)
477 assert.Nil(t, err)
478
479 // Verify the result preserves the nested structure and adds new annotations
480 resultMap, ok := result.(map[string]any)
481 assert.True(t, ok)
482 assert.Contains(t, resultMap, "snippet")
483
484 snippet := resultMap["snippet"].(string)
485 // Should contain the new annotation while preserving existing structure
486 assert.Contains(t, snippet, `title="opennlp/determiner:yes"`) // Format is foundry/layer:value
487 assert.Contains(t, snippet, `class="notinindex"`)
488 assert.Contains(t, snippet, `title="level1/l:outer"`)
489 assert.Contains(t, snippet, `title="level2/l:middle"`)
490 assert.Contains(t, snippet, `title="marmot/p:DET"`)
Akron4de47a92025-06-27 11:58:11 +0200491
492 author := resultMap["author"].(string)
493 assert.Equal(t, "John Doe", author)
Akrona3675e92025-06-26 17:46:59 +0200494}
Akron497cfe82025-07-03 13:26:54 +0200495
496// TestResponseMappingWithLayerOverride tests layer precedence rules
497func TestResponseMappingWithLayerOverride(t *testing.T) {
498 // Test 1: Explicit layer in mapping rule should take precedence over MappingOptions
499 t.Run("Explicit layer takes precedence", func(t *testing.T) {
500 responseSnippet := `{
501 "snippet": "<span title=\"marmot/p:DET\">Der</span>"
502 }`
503
504 // Mapping rule with explicit layer [p=DT] - this should NOT be overridden
505 mappingList := config.MappingList{
506 ID: "test-layer-precedence",
507 FoundryA: "marmot",
508 LayerA: "p",
509 FoundryB: "opennlp",
510 LayerB: "p", // default layer
511 Mappings: []config.MappingRule{
512 "[DET] <> [p=DT]", // Explicit layer "p" should not be overridden
513 },
514 }
515
516 m, err := NewMapper([]config.MappingList{mappingList})
517 require.NoError(t, err)
518
519 var inputData any
520 err = json.Unmarshal([]byte(responseSnippet), &inputData)
521 require.NoError(t, err)
522
523 // Apply with layer override - should NOT affect explicit layer in mapping rule
524 result, err := m.ApplyResponseMappings("test-layer-precedence", MappingOptions{
525 Direction: AtoB,
526 LayerB: "pos", // This should NOT override the explicit "p" layer in [p=DT]
527 }, inputData)
528 require.NoError(t, err)
529
530 resultMap := result.(map[string]any)
531 snippet := resultMap["snippet"].(string)
532
533 // Should use explicit layer "p" from mapping rule, NOT "pos" from override
534 assert.Contains(t, snippet, `title="opennlp/p:DT" class="notinindex"`)
535 assert.NotContains(t, snippet, `title="opennlp/pos:DT" class="notinindex"`)
536 })
537
538 // Test 2: Implicit layer in mapping rule should use MappingOptions layer override
539 t.Run("Implicit layer uses MappingOptions override", func(t *testing.T) {
540 responseSnippet := `{
541 "snippet": "<span title=\"marmot/p:DET\">Der</span>"
542 }`
543
544 // Mapping rule with implicit layer [DT] - this should use layer override
545 mappingList := config.MappingList{
546 ID: "test-layer-override",
547 FoundryA: "marmot",
548 LayerA: "p",
549 FoundryB: "opennlp",
550 LayerB: "p", // default layer
551 Mappings: []config.MappingRule{
552 "[DET] <> [DT]", // No explicit layer - should use override
553 },
554 }
555
556 m, err := NewMapper([]config.MappingList{mappingList})
557 require.NoError(t, err)
558
559 var inputData any
560 err = json.Unmarshal([]byte(responseSnippet), &inputData)
561 require.NoError(t, err)
562
563 // Apply with layer override - should affect implicit layer in mapping rule
564 result, err := m.ApplyResponseMappings("test-layer-override", MappingOptions{
565 Direction: AtoB,
566 LayerB: "pos", // This should override the default layer for [DT]
567 }, inputData)
568 require.NoError(t, err)
569
570 resultMap := result.(map[string]any)
571 snippet := resultMap["snippet"].(string)
572
573 // Should use layer "pos" from override, NOT default "p" layer
574 assert.Contains(t, snippet, `title="opennlp/pos:DT" class="notinindex"`)
575 assert.NotContains(t, snippet, `title="opennlp/p:DT" class="notinindex"`)
576 })
577}