blob: 9a0c668a81c62a05353bea4bee72baa0971884c0 [file] [log] [blame]
Akrona3675e92025-06-26 17:46:59 +02001package mapper
2
3import (
4 "encoding/json"
5 "testing"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/config"
Akrona3675e92025-06-26 17:46:59 +02008 "github.com/stretchr/testify/assert"
9 "github.com/stretchr/testify/require"
10)
11
Akrona3675e92025-06-26 17:46:59 +020012// TestResponseMappingAnnotationCreation tests creating new annotations based on RestrictToObligatory
13func TestResponseMappingAnnotationCreation(t *testing.T) {
14 // Simple snippet with a single annotated token
15 responseSnippet := `{
16 "snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"
17 }`
18
19 // Create test mapping list
20 mappingList := config.MappingList{
21 ID: "test-mapper",
22 FoundryA: "marmot",
23 LayerA: "m",
24 FoundryB: "opennlp",
25 LayerB: "p",
26 Mappings: []config.MappingRule{
Akron5aa16232025-07-01 12:25:33 +020027 "[gender:masc] <> [p=M & m=M]",
Akrona3675e92025-06-26 17:46:59 +020028 },
29 }
30
31 // Create a new mapper
32 m, err := NewMapper([]config.MappingList{mappingList})
33 require.NoError(t, err)
34
35 var inputData any
36 err = json.Unmarshal([]byte(responseSnippet), &inputData)
37 assert.Nil(t, err)
38
39 result, err := m.ApplyResponseMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
40 assert.Nil(t, err)
41
42 // For step 4, we should at least get back a processed result (even if snippet is unchanged)
43 // The main test is that no errors occurred in the processing
44 assert.NotNil(t, result)
45
46 // Verify the result is still a map with a snippet field
47 resultMap, ok := result.(map[string]any)
48 assert.True(t, ok)
49 assert.Contains(t, resultMap, "snippet")
50 assert.Equal(t, "<span title=\"marmot/m:gender:masc\"><span title=\"opennlp/p:M\" class=\"notinindex\"><span title=\"opennlp/m:M\" class=\"notinindex\">Der</span></span></span>", resultMap["snippet"])
51}
52
53// TestResponseMappingDebug helps debug the mapping process
54func TestResponseMappingDebug(t *testing.T) {
55 // Simple snippet with a single annotated token
56 responseSnippet := `{
57 "snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"
58 }`
59
60 // Create test mapping list
61 mappingList := config.MappingList{
62 ID: "test-mapper",
63 FoundryA: "marmot",
64 LayerA: "m",
65 FoundryB: "opennlp",
66 LayerB: "p",
67 Mappings: []config.MappingRule{
68 "[gender=masc] <> [p=M & m=M]",
69 },
70 }
71
72 // Create a new mapper
73 m, err := NewMapper([]config.MappingList{mappingList})
74 require.NoError(t, err)
75
76 // Debug: Print what the parsed rules look like
Akron2f93c582026-02-19 16:49:13 +010077 rules := m.parsedQueryRules["test-mapper"]
Akrona3675e92025-06-26 17:46:59 +020078 t.Logf("Number of parsed rules: %d", len(rules))
79 for i, rule := range rules {
80 t.Logf("Rule %d - Upper: %+v", i, rule.Upper)
81 t.Logf("Rule %d - Lower: %+v", i, rule.Lower)
82 }
83
84 var inputData any
85 err = json.Unmarshal([]byte(responseSnippet), &inputData)
86 assert.Nil(t, err)
87
88 // Include proper foundry and layer information in the options
89 result, err := m.ApplyResponseMappings("test-mapper", MappingOptions{
90 Direction: AtoB,
91 FoundryA: "marmot",
92 LayerA: "m",
93 FoundryB: "opennlp",
94 LayerB: "p",
95 }, inputData)
96 assert.Nil(t, err)
97 t.Logf("Result: %+v", result)
98}
99
100// TestResponseMappingWithAndRelation tests mapping rules with AND relations
101func TestResponseMappingWithAndRelation(t *testing.T) {
102 // Snippet with multiple annotations on a single token - both must be on the same span for AND to work
103 responseSnippet := `{
104 "snippet": "<span title=\"marmot/p:DET\"><span title=\"marmot/p:gender:masc\">Der</span></span>"
105 }`
106
107 // Create test mapping list with AND relation
108 mappingList := config.MappingList{
109 ID: "test-and-mapper",
110 FoundryA: "marmot",
111 LayerA: "p",
112 FoundryB: "opennlp",
113 LayerB: "p",
114 Mappings: []config.MappingRule{
115 "[DET & gender:masc] <> [p=DT & case=nom]",
116 },
117 }
118
119 // Create a new mapper
120 m, err := NewMapper([]config.MappingList{mappingList})
121 require.NoError(t, err)
122
123 var inputData any
124 err = json.Unmarshal([]byte(responseSnippet), &inputData)
125 assert.Nil(t, err)
126
127 result, err := m.ApplyResponseMappings("test-and-mapper", MappingOptions{
128 Direction: AtoB,
129 FoundryA: "marmot",
130 LayerA: "p",
131 FoundryB: "opennlp",
132 LayerB: "p",
133 }, inputData)
134 assert.Nil(t, err)
135
136 // Verify the result contains the expected annotations
137 resultMap, ok := result.(map[string]any)
138 assert.True(t, ok)
139 assert.Contains(t, resultMap, "snippet")
140
141 snippet := resultMap["snippet"].(string)
142 // Should contain both new annotations - checking the actual format produced
143 assert.Contains(t, snippet, `title="marmot/p:DET"`)
144 assert.Contains(t, snippet, `title="opennlp/p:DT"`)
145 assert.Contains(t, snippet, `title="marmot/p:gender:masc"`)
146 assert.Contains(t, snippet, `title="opennlp/case:nom"`) // Format is foundry/layer:value for single values
147 assert.Contains(t, snippet, `class="notinindex"`)
148}
149
150// TestResponseMappingWithOrRelation tests mapping rules with OR relations
151func TestResponseMappingWithOrRelation(t *testing.T) {
152 // Snippet with one token that matches the OR condition
153 responseSnippet := `{
154 "snippet": "<span title=\"marmot/p:DET\">Der</span>"
155 }`
156
157 // Create test mapping list with OR relation
158 mappingList := config.MappingList{
159 ID: "test-or-mapper",
160 FoundryA: "marmot",
161 LayerA: "p",
162 FoundryB: "opennlp",
163 LayerB: "p",
164 Mappings: []config.MappingRule{
165 "[DET | ART] <> [determiner=true]",
166 },
167 }
168
169 // Create a new mapper
170 m, err := NewMapper([]config.MappingList{mappingList})
171 require.NoError(t, err)
172
173 var inputData any
174 err = json.Unmarshal([]byte(responseSnippet), &inputData)
175 assert.Nil(t, err)
176
177 result, err := m.ApplyResponseMappings("test-or-mapper", MappingOptions{Direction: AtoB}, inputData)
178 assert.Nil(t, err)
179
180 // Verify the result
181 resultMap, ok := result.(map[string]any)
182 assert.True(t, ok)
183 assert.Contains(t, resultMap, "snippet")
184
185 snippet := resultMap["snippet"].(string)
186
187 assert.Contains(t, snippet, `title="marmot/p:DET"`)
188 assert.Contains(t, snippet, `title="opennlp/determiner:true" class="notinindex"`)
189 assert.NotEmpty(t, snippet)
190}
191
192// TestResponseMappingComplexPattern1 tests complex nested patterns
193func TestResponseMappingComplexPattern1(t *testing.T) {
194 // Snippet with a token that has nested annotations
195 responseSnippet := `{
196 "snippet": "<span title=\"marmot/p:ADJA\"><span title=\"marmot/m:gender:masc\"><span title=\"marmot/m:case:nom\">alter</span></span></span>"
197 }`
198
199 // Create test mapping list with complex pattern
200 mappingList := config.MappingList{
201 ID: "test-complex-mapper",
202 FoundryA: "marmot",
203 LayerA: "p",
204 FoundryB: "opennlp",
205 LayerB: "p",
206 Mappings: []config.MappingRule{
207 "[ADJA & gender=masc & case=nom] <> [pos=ADJ & gender=M & case=NOM]",
208 },
209 }
210
211 // Create a new mapper
212 m, err := NewMapper([]config.MappingList{mappingList})
213 require.NoError(t, err)
214
215 var inputData any
216 err = json.Unmarshal([]byte(responseSnippet), &inputData)
217 assert.Nil(t, err)
218
219 result, err := m.ApplyResponseMappings("test-complex-mapper", MappingOptions{Direction: AtoB}, inputData)
220 assert.Nil(t, err)
221
222 // Verify the result contains the expected annotations
223 resultMap, ok := result.(map[string]any)
224 assert.True(t, ok)
225 assert.Contains(t, resultMap, "snippet")
226
227 snippet := resultMap["snippet"].(string)
228 assert.Contains(t, snippet, `title="marmot/p:ADJA`)
229 assert.Contains(t, snippet, `title="marmot/m:gender:masc`)
230 assert.NotContains(t, snippet, `title="opennlp`)
231 assert.NotEmpty(t, snippet) // At minimum, processing should succeed
232}
233
234// TestResponseMappingComplexPattern2 tests complex nested patterns
235func TestResponseMappingComplexPattern2(t *testing.T) {
236 // Snippet with a token that has nested annotations
237 responseSnippet := `{
238 "snippet": "<span title=\"marmot/p:ADJA\"><span title=\"marmot/p:gender:masc\"><span title=\"marmot/p:case:nom\">alter</span></span></span>"
239 }`
240
241 // Create test mapping list with complex pattern
242 mappingList := config.MappingList{
243 ID: "test-complex-mapper",
244 FoundryA: "marmot",
245 LayerA: "p",
246 FoundryB: "opennlp",
247 LayerB: "p",
248 Mappings: []config.MappingRule{
249 "[ADJA & gender:masc & case:nom] <> [pos=ADJ & gender=M & case=NOM]",
250 },
251 }
252
253 // Create a new mapper
254 m, err := NewMapper([]config.MappingList{mappingList})
255 require.NoError(t, err)
256
257 var inputData any
258 err = json.Unmarshal([]byte(responseSnippet), &inputData)
259 assert.Nil(t, err)
260
261 result, err := m.ApplyResponseMappings("test-complex-mapper", MappingOptions{Direction: AtoB}, inputData)
262 assert.Nil(t, err)
263
264 // Verify the result contains the expected annotations
265 resultMap, ok := result.(map[string]any)
266 assert.True(t, ok)
267 assert.Contains(t, resultMap, "snippet")
268
269 snippet := resultMap["snippet"].(string)
270 assert.Contains(t, snippet, `title="marmot/p:ADJA`)
271 assert.Contains(t, snippet, `title="marmot/p:gender:masc`)
272 assert.Contains(t, snippet, `title="opennlp/pos:ADJ" class="notinindex"`)
273 assert.Contains(t, snippet, `title="opennlp/gender:M" class="notinindex"`)
274 assert.Contains(t, snippet, `title="opennlp/case:NOM" class="notinindex"`)
275 assert.NotEmpty(t, snippet) // At minimum, processing should succeed
276}
277
278// TestResponseMappingMultipleTokens tests mapping across multiple tokens
279func TestResponseMappingMultipleTokens(t *testing.T) {
280 // Snippet with multiple tokens
281 responseSnippet := `{
282 "snippet": "<span title=\"marmot/p:DET\">Der</span> <span title=\"marmot/p:ADJA\"><span title=\"marmot/m:gender:masc\">alte</span></span> <span title=\"marmot/p:NN\">Mann</span>"
283 }`
284
285 // Create test mapping list that matches multiple patterns
286 mappingList := config.MappingList{
287 ID: "test-multi-mapper",
288 FoundryA: "marmot",
289 LayerA: "p",
290 FoundryB: "opennlp",
291 LayerB: "p",
292 Mappings: []config.MappingRule{
293 "[DET] <> [determiner=true]",
294 "[ADJA & gender:masc] <> [adjective=true & gender=M]",
295 "[NN] <> [noun=true]",
296 },
297 }
298
299 // Create a new mapper
300 m, err := NewMapper([]config.MappingList{mappingList})
301 require.NoError(t, err)
302
303 var inputData any
304 err = json.Unmarshal([]byte(responseSnippet), &inputData)
305 assert.Nil(t, err)
306
307 result, err := m.ApplyResponseMappings("test-multi-mapper", MappingOptions{Direction: AtoB}, inputData)
308 assert.Nil(t, err)
309
310 // Verify the result
311 resultMap, ok := result.(map[string]any)
312 assert.True(t, ok)
313 assert.Contains(t, resultMap, "snippet")
314
315 snippet := resultMap["snippet"].(string)
316 // Should contain annotations for each matching token (checking actual output format)
317 assert.Contains(t, snippet, `title="marmot/p:DET"`)
318 assert.Contains(t, snippet, `title="opennlp/determiner:true" class="notinindex"`) // Format is foundry/layer:value for single values
319 assert.NotContains(t, snippet, `title="opennlp/adjective:true" class="notinindex"`)
320 assert.Contains(t, snippet, `title="opennlp/noun:true" class="notinindex"`)
321}
322
323// TestResponseMappingNoMatch tests behavior when no patterns match
324func TestResponseMappingNoMatch(t *testing.T) {
325 // Snippet with tokens that don't match the pattern
326 responseSnippet := `{
327 "snippet": "<span title=\"marmot/p:VERB\">läuft</span>"
328 }`
329
330 // Create test mapping list with pattern that won't match
331 mappingList := config.MappingList{
332 ID: "test-nomatch-mapper",
333 FoundryA: "marmot",
334 LayerA: "p",
335 FoundryB: "opennlp",
336 LayerB: "p",
337 Mappings: []config.MappingRule{
338 "[DET] <> [determiner=true]",
339 },
340 }
341
342 // Create a new mapper
343 m, err := NewMapper([]config.MappingList{mappingList})
344 require.NoError(t, err)
345
346 var inputData any
347 err = json.Unmarshal([]byte(responseSnippet), &inputData)
348 assert.Nil(t, err)
349
350 result, err := m.ApplyResponseMappings("test-nomatch-mapper", MappingOptions{Direction: AtoB}, inputData)
351 assert.Nil(t, err)
352
353 // Verify the result is unchanged since no patterns matched
354 resultMap, ok := result.(map[string]any)
355 assert.True(t, ok)
356 assert.Contains(t, resultMap, "snippet")
357
358 snippet := resultMap["snippet"].(string)
359 // Should be the original snippet without new annotations
360 assert.Equal(t, `<span title="marmot/p:VERB">läuft</span>`, snippet)
361 assert.NotContains(t, snippet, `class="notinindex"`)
362}
363
364// TestResponseMappingBidirectional tests bidirectional mapping (B to A direction)
365func TestResponseMappingBidirectional(t *testing.T) {
366 // Snippet with opennlp annotations
367 responseSnippet := `{
368 "snippet": "<span title=\"opennlp/p:DT\"><span title=\"opennlp/p:determiner:true\">Der</span></span>"
369 }`
370
371 // Create test mapping list
372 mappingList := config.MappingList{
373 ID: "test-bidirectional-mapper",
374 FoundryA: "marmot",
375 LayerA: "p",
376 FoundryB: "opennlp",
377 LayerB: "p",
378 Mappings: []config.MappingRule{
379 "[DET] <> [DT & determiner:true]",
380 },
381 }
382
383 // Create a new mapper
384 m, err := NewMapper([]config.MappingList{mappingList})
385 require.NoError(t, err)
386
387 var inputData any
388 err = json.Unmarshal([]byte(responseSnippet), &inputData)
389 assert.Nil(t, err)
390
391 // Test B to A direction
392 result, err := m.ApplyResponseMappings("test-bidirectional-mapper", MappingOptions{Direction: BtoA}, inputData)
393 assert.Nil(t, err)
394
395 // Verify the result
396 resultMap, ok := result.(map[string]any)
397 assert.True(t, ok)
398 assert.Contains(t, resultMap, "snippet")
399
400 snippet := resultMap["snippet"].(string)
401
402 assert.Contains(t, snippet, `title="opennlp/p:DT"`)
403 assert.Contains(t, snippet, `title="marmot/p:DET" class="notinindex"`)
404 assert.NotEmpty(t, snippet) // At minimum, processing should succeed
405}
406
407// TestResponseMappingWithValuePatterns tests patterns with specific values
408func TestResponseMappingWithValuePatterns(t *testing.T) {
409 // Snippet with value-specific annotations
410 responseSnippet := `{
411 "snippet": "<span title=\"marmot/m:case:nom\"><span title=\"marmot/m:gender:fem\">die</span></span>"
412 }`
413
414 // Create test mapping list with value-specific patterns
415 mappingList := config.MappingList{
416 ID: "test-value-mapper",
417 FoundryA: "marmot",
418 LayerA: "m",
419 FoundryB: "opennlp",
420 LayerB: "m",
421 Mappings: []config.MappingRule{
422 "[case:nom & gender:fem] <> [case=NOM & gender=F]",
423 },
424 }
425
426 // Create a new mapper
427 m, err := NewMapper([]config.MappingList{mappingList})
428 require.NoError(t, err)
429
430 var inputData any
431 err = json.Unmarshal([]byte(responseSnippet), &inputData)
432 assert.Nil(t, err)
433
434 result, err := m.ApplyResponseMappings("test-value-mapper", MappingOptions{Direction: AtoB}, inputData)
435 assert.Nil(t, err)
436
437 // Verify the result
438 resultMap, ok := result.(map[string]any)
439 assert.True(t, ok)
440 assert.Contains(t, resultMap, "snippet")
441
442 snippet := resultMap["snippet"].(string)
443 assert.Contains(t, snippet, `title="marmot/m:case:nom"`) // Format is foundry/layer:value
444 assert.Contains(t, snippet, `title="opennlp/case:NOM" class="notinindex"`) // Format is foundry/layer:value
445 assert.Contains(t, snippet, `title="opennlp/gender:F" class="notinindex"`)
446}
447
448// TestResponseMappingNestedSpans tests handling of deeply nested span structures
449func TestResponseMappingNestedSpans(t *testing.T) {
450 // Snippet with deeply nested spans
451 responseSnippet := `{
Akron4de47a92025-06-27 11:58:11 +0200452 "snippet": "<span title=\"level1/l:outer\"><span title=\"level2/l:middle\"><span title=\"marmot/p:DET\">der</span></span></span>",
453 "author": "John Doe"
Akrona3675e92025-06-26 17:46:59 +0200454 }`
455
456 // Create test mapping list
457 mappingList := config.MappingList{
458 ID: "test-nested-mapper",
459 FoundryA: "marmot",
460 LayerA: "p",
461 FoundryB: "opennlp",
462 LayerB: "p",
463 Mappings: []config.MappingRule{
464 "[DET] <> [determiner=yes]",
465 },
466 }
467
468 // Create a new mapper
469 m, err := NewMapper([]config.MappingList{mappingList})
470 require.NoError(t, err)
471
472 var inputData any
473 err = json.Unmarshal([]byte(responseSnippet), &inputData)
474 assert.Nil(t, err)
475
476 result, err := m.ApplyResponseMappings("test-nested-mapper", MappingOptions{Direction: AtoB}, inputData)
477 assert.Nil(t, err)
478
479 // Verify the result preserves the nested structure and adds new annotations
480 resultMap, ok := result.(map[string]any)
481 assert.True(t, ok)
482 assert.Contains(t, resultMap, "snippet")
483
484 snippet := resultMap["snippet"].(string)
485 // Should contain the new annotation while preserving existing structure
486 assert.Contains(t, snippet, `title="opennlp/determiner:yes"`) // Format is foundry/layer:value
487 assert.Contains(t, snippet, `class="notinindex"`)
488 assert.Contains(t, snippet, `title="level1/l:outer"`)
489 assert.Contains(t, snippet, `title="level2/l:middle"`)
490 assert.Contains(t, snippet, `title="marmot/p:DET"`)
Akron4de47a92025-06-27 11:58:11 +0200491
492 author := resultMap["author"].(string)
493 assert.Equal(t, "John Doe", author)
Akrona3675e92025-06-26 17:46:59 +0200494}
Akron497cfe82025-07-03 13:26:54 +0200495
Akron9663af92026-02-20 13:45:08 +0100496// TestResponseAnnotationDuplicateTokenText tests that when the same token text
497// appears multiple times, only the correct occurrence is annotated based on its
498// annotation context (not string position).
499func TestResponseAnnotationDuplicateTokenText(t *testing.T) {
500 // "Der" appears twice: first as NN (no match), then as DET (match).
501 // The old string-heuristic would annotate the first "Der" because it
502 // finds the first occurrence preceded by ">".
503 responseSnippet := `{
504 "snippet": "<span title=\"marmot/p:NN\">Der</span> <span title=\"marmot/p:DET\">Der</span>"
505 }`
506
507 mappingList := config.MappingList{
508 ID: "test-dup-mapper",
509 FoundryA: "marmot",
510 LayerA: "p",
511 FoundryB: "opennlp",
512 LayerB: "p",
513 Mappings: []config.MappingRule{
514 "[DET] <> [DT]",
515 },
516 }
517
518 m, err := NewMapper([]config.MappingList{mappingList})
519 require.NoError(t, err)
520
521 var inputData any
522 err = json.Unmarshal([]byte(responseSnippet), &inputData)
523 require.NoError(t, err)
524
525 result, err := m.ApplyResponseMappings("test-dup-mapper", MappingOptions{Direction: AtoB}, inputData)
526 require.NoError(t, err)
527
528 resultMap := result.(map[string]any)
529 snippet := resultMap["snippet"].(string)
530
531 // Only the second "Der" (DET) should be annotated
532 expected := `<span title="marmot/p:NN">Der</span> <span title="marmot/p:DET"><span title="opennlp/p:DT" class="notinindex">Der</span></span>`
533 assert.Equal(t, expected, snippet)
534}
535
536// TestResponseAnnotationTextInTitle verifies that the SAX rewriter only wraps
537// text nodes, not content inside title attributes, even when the token text
538// matches part of an attribute value.
539func TestResponseAnnotationTextInTitle(t *testing.T) {
540 responseSnippet := `{
541 "snippet": "<span title=\"marmot/p:NN\">NN</span>"
542 }`
543
544 mappingList := config.MappingList{
545 ID: "test-title-mapper",
546 FoundryA: "marmot",
547 LayerA: "p",
548 FoundryB: "opennlp",
549 LayerB: "p",
550 Mappings: []config.MappingRule{
551 "[NN] <> [NOUN]",
552 },
553 }
554
555 m, err := NewMapper([]config.MappingList{mappingList})
556 require.NoError(t, err)
557
558 var inputData any
559 err = json.Unmarshal([]byte(responseSnippet), &inputData)
560 require.NoError(t, err)
561
562 result, err := m.ApplyResponseMappings("test-title-mapper", MappingOptions{Direction: AtoB}, inputData)
563 require.NoError(t, err)
564
565 resultMap := result.(map[string]any)
566 snippet := resultMap["snippet"].(string)
567
568 expected := `<span title="marmot/p:NN"><span title="opennlp/p:NOUN" class="notinindex">NN</span></span>`
569 assert.Equal(t, expected, snippet)
570}
571
572// TestResponseAnnotationWhitespaceAroundText tests that annotations are applied
573// even when there is whitespace between the enclosing tag and the text content.
574// The old string-heuristic fails because it requires ">" immediately before the text.
575func TestResponseAnnotationWhitespaceAroundText(t *testing.T) {
576 responseSnippet := `{
577 "snippet": "<span title=\"marmot/p:DET\"> Der </span>"
578 }`
579
580 mappingList := config.MappingList{
581 ID: "test-ws-mapper",
582 FoundryA: "marmot",
583 LayerA: "p",
584 FoundryB: "opennlp",
585 LayerB: "p",
586 Mappings: []config.MappingRule{
587 "[DET] <> [DT]",
588 },
589 }
590
591 m, err := NewMapper([]config.MappingList{mappingList})
592 require.NoError(t, err)
593
594 var inputData any
595 err = json.Unmarshal([]byte(responseSnippet), &inputData)
596 require.NoError(t, err)
597
598 result, err := m.ApplyResponseMappings("test-ws-mapper", MappingOptions{Direction: AtoB}, inputData)
599 require.NoError(t, err)
600
601 resultMap := result.(map[string]any)
602 snippet := resultMap["snippet"].(string)
603
604 // Whitespace should be preserved, annotation wraps only the token text
605 expected := `<span title="marmot/p:DET"> <span title="opennlp/p:DT" class="notinindex">Der</span> </span>`
606 assert.Equal(t, expected, snippet)
607}
608
609// TestResponseAnnotationCrossElementText tests annotation of individual tokens
610// whose text spans across sibling/child elements.
611func TestResponseAnnotationCrossElementText(t *testing.T) {
612 responseSnippet := `{
613 "snippet": "<span title=\"marmot/p:DET\">Die</span> <span title=\"base/s:s\"><span title=\"marmot/p:NN\">Sonne</span></span>"
614 }`
615
616 mappingList := config.MappingList{
617 ID: "test-cross-mapper",
618 FoundryA: "marmot",
619 LayerA: "p",
620 FoundryB: "opennlp",
621 LayerB: "p",
622 Mappings: []config.MappingRule{
623 "[DET] <> [DT]",
624 "[NN] <> [NOUN]",
625 },
626 }
627
628 m, err := NewMapper([]config.MappingList{mappingList})
629 require.NoError(t, err)
630
631 var inputData any
632 err = json.Unmarshal([]byte(responseSnippet), &inputData)
633 require.NoError(t, err)
634
635 result, err := m.ApplyResponseMappings("test-cross-mapper", MappingOptions{Direction: AtoB}, inputData)
636 require.NoError(t, err)
637
638 resultMap := result.(map[string]any)
639 snippet := resultMap["snippet"].(string)
640
641 assert.Contains(t, snippet, `<span title="opennlp/p:DT" class="notinindex">Die</span>`)
642 assert.Contains(t, snippet, `<span title="opennlp/p:NOUN" class="notinindex">Sonne</span>`)
643 assert.Contains(t, snippet, `title="base/s:s"`)
644}
645
646// TestResponseAnnotationSubstringToken tests that a short token ("er") is
647// annotated only in its own text node and not when it appears as a prefix of
648// another word ("er Mann") in an earlier text node.
649func TestResponseAnnotationSubstringToken(t *testing.T) {
650 // "er" appears at the start of "er Mann" (NN span) and as standalone (PPER span).
651 // The old heuristic matches the first occurrence because "er" is preceded by ">"
652 // and followed by " ".
653 responseSnippet := `{
654 "snippet": "<span title=\"marmot/p:NN\">er Mann</span> <span title=\"marmot/p:PPER\">er</span>"
655 }`
656
657 mappingList := config.MappingList{
658 ID: "test-sub-mapper",
659 FoundryA: "marmot",
660 LayerA: "p",
661 FoundryB: "opennlp",
662 LayerB: "p",
663 Mappings: []config.MappingRule{
664 "[PPER] <> [PRP]",
665 },
666 }
667
668 m, err := NewMapper([]config.MappingList{mappingList})
669 require.NoError(t, err)
670
671 var inputData any
672 err = json.Unmarshal([]byte(responseSnippet), &inputData)
673 require.NoError(t, err)
674
675 result, err := m.ApplyResponseMappings("test-sub-mapper", MappingOptions{Direction: AtoB}, inputData)
676 require.NoError(t, err)
677
678 resultMap := result.(map[string]any)
679 snippet := resultMap["snippet"].(string)
680
681 // The NN "er Mann" must remain unchanged; only the PPER "er" gets annotated
682 expected := `<span title="marmot/p:NN">er Mann</span> <span title="marmot/p:PPER"><span title="opennlp/p:PRP" class="notinindex">er</span></span>`
683 assert.Equal(t, expected, snippet)
684}
685
686// TestResponseAnnotationSelfClosingTags verifies that self-closing tags like
687// <br/> are preserved and do not interfere with annotation insertion.
688func TestResponseAnnotationSelfClosingTags(t *testing.T) {
689 responseSnippet := `{
690 "snippet": "<span title=\"marmot/p:DET\">Der</span><br/><span title=\"marmot/p:NN\">Mann</span>"
691 }`
692
693 mappingList := config.MappingList{
694 ID: "test-br-mapper",
695 FoundryA: "marmot",
696 LayerA: "p",
697 FoundryB: "opennlp",
698 LayerB: "p",
699 Mappings: []config.MappingRule{
700 "[DET] <> [DT]",
701 "[NN] <> [NOUN]",
702 },
703 }
704
705 m, err := NewMapper([]config.MappingList{mappingList})
706 require.NoError(t, err)
707
708 var inputData any
709 err = json.Unmarshal([]byte(responseSnippet), &inputData)
710 require.NoError(t, err)
711
712 result, err := m.ApplyResponseMappings("test-br-mapper", MappingOptions{Direction: AtoB}, inputData)
713 require.NoError(t, err)
714
715 snippet := result.(map[string]any)["snippet"].(string)
716
717 assert.Contains(t, snippet, "<br/>")
718 assert.Contains(t, snippet, `<span title="opennlp/p:DT" class="notinindex">Der</span>`)
719 assert.Contains(t, snippet, `<span title="opennlp/p:NOUN" class="notinindex">Mann</span>`)
720}
721
722// TestResponseAnnotationEntityReferences verifies that entity references
723// (&amp;, &lt;, etc.) are correctly preserved in output.
724func TestResponseAnnotationEntityReferences(t *testing.T) {
725 responseSnippet := `{
726 "snippet": "<span title=\"marmot/p:NN\">Haus &amp; Hof</span>"
727 }`
728
729 mappingList := config.MappingList{
730 ID: "test-entity-mapper",
731 FoundryA: "marmot",
732 LayerA: "p",
733 FoundryB: "opennlp",
734 LayerB: "p",
735 Mappings: []config.MappingRule{
736 "[NN] <> [NOUN]",
737 },
738 }
739
740 m, err := NewMapper([]config.MappingList{mappingList})
741 require.NoError(t, err)
742
743 var inputData any
744 err = json.Unmarshal([]byte(responseSnippet), &inputData)
745 require.NoError(t, err)
746
747 result, err := m.ApplyResponseMappings("test-entity-mapper", MappingOptions{Direction: AtoB}, inputData)
748 require.NoError(t, err)
749
750 snippet := result.(map[string]any)["snippet"].(string)
751
752 // Entity reference must be preserved (re-encoded) in the annotated output
753 expected := `<span title="marmot/p:NN"><span title="opennlp/p:NOUN" class="notinindex">Haus &amp; Hof</span></span>`
754 assert.Equal(t, expected, snippet)
755}
756
757// TestResponseAnnotationEntityLtGt verifies &lt; and &gt; are re-encoded.
758func TestResponseAnnotationEntityLtGt(t *testing.T) {
759 responseSnippet := `{
760 "snippet": "<span title=\"marmot/p:SYM\">&lt;tag&gt;</span>"
761 }`
762
763 mappingList := config.MappingList{
764 ID: "test-ltgt-mapper",
765 FoundryA: "marmot",
766 LayerA: "p",
767 FoundryB: "opennlp",
768 LayerB: "p",
769 Mappings: []config.MappingRule{
770 "[SYM] <> [PUNCT]",
771 },
772 }
773
774 m, err := NewMapper([]config.MappingList{mappingList})
775 require.NoError(t, err)
776
777 var inputData any
778 err = json.Unmarshal([]byte(responseSnippet), &inputData)
779 require.NoError(t, err)
780
781 result, err := m.ApplyResponseMappings("test-ltgt-mapper", MappingOptions{Direction: AtoB}, inputData)
782 require.NoError(t, err)
783
784 snippet := result.(map[string]any)["snippet"].(string)
785
786 expected := `<span title="marmot/p:SYM"><span title="opennlp/p:PUNCT" class="notinindex">&lt;tag&gt;</span></span>`
787 assert.Equal(t, expected, snippet)
788}
789
790// TestResponseAnnotationCDATAGraceful verifies that a CDATA section in the
791// snippet does not cause errors and is passed through unchanged.
792func TestResponseAnnotationCDATAGraceful(t *testing.T) {
793 responseSnippet := `{
794 "snippet": "<span title=\"marmot/p:DET\">Der</span><![CDATA[ raw ]]><span title=\"marmot/p:NN\">Mann</span>"
795 }`
796
797 mappingList := config.MappingList{
798 ID: "test-cdata-mapper",
799 FoundryA: "marmot",
800 LayerA: "p",
801 FoundryB: "opennlp",
802 LayerB: "p",
803 Mappings: []config.MappingRule{
804 "[NN] <> [NOUN]",
805 },
806 }
807
808 m, err := NewMapper([]config.MappingList{mappingList})
809 require.NoError(t, err)
810
811 var inputData any
812 err = json.Unmarshal([]byte(responseSnippet), &inputData)
813 require.NoError(t, err)
814
815 result, err := m.ApplyResponseMappings("test-cdata-mapper", MappingOptions{Direction: AtoB}, inputData)
816 require.NoError(t, err)
817
818 snippet := result.(map[string]any)["snippet"].(string)
819
820 assert.Contains(t, snippet, "<![CDATA[ raw ]]>")
821 assert.Contains(t, snippet, `<span title="opennlp/p:NOUN" class="notinindex">Mann</span>`)
822}
823
824// TestResponseAnnotationOverlappingSpans verifies that when two independent
825// rules match the same token, both annotations are applied.
826func TestResponseAnnotationOverlappingSpans(t *testing.T) {
827 responseSnippet := `{
828 "snippet": "<span title=\"marmot/p:DET\"><span title=\"marmot/m:case:nom\">Der</span></span>"
829 }`
830
831 mappingList := config.MappingList{
832 ID: "test-overlap-mapper",
833 FoundryA: "marmot",
834 LayerA: "p",
835 FoundryB: "opennlp",
836 LayerB: "p",
837 Mappings: []config.MappingRule{
838 "[DET] <> [DT]",
839 },
840 }
841
842 m, err := NewMapper([]config.MappingList{mappingList})
843 require.NoError(t, err)
844
845 var inputData any
846 err = json.Unmarshal([]byte(responseSnippet), &inputData)
847 require.NoError(t, err)
848
849 result, err := m.ApplyResponseMappings("test-overlap-mapper", MappingOptions{Direction: AtoB}, inputData)
850 require.NoError(t, err)
851
852 snippet := result.(map[string]any)["snippet"].(string)
853
854 // The existing nested structure must be preserved, with new annotation added
855 assert.Contains(t, snippet, `title="marmot/p:DET"`)
856 assert.Contains(t, snippet, `title="marmot/m:case:nom"`)
857 assert.Contains(t, snippet, `title="opennlp/p:DT" class="notinindex"`)
858 assert.Contains(t, snippet, "Der")
859}
860
861// TestResponseAnnotationEmptyTextNodes verifies that empty or whitespace-only
862// text nodes are passed through without errors and without spurious annotations.
863func TestResponseAnnotationEmptyTextNodes(t *testing.T) {
864 responseSnippet := `{
865 "snippet": "<span title=\"marmot/p:DET\"></span> <span title=\"marmot/p:NN\">Mann</span>"
866 }`
867
868 mappingList := config.MappingList{
869 ID: "test-empty-mapper",
870 FoundryA: "marmot",
871 LayerA: "p",
872 FoundryB: "opennlp",
873 LayerB: "p",
874 Mappings: []config.MappingRule{
875 "[DET] <> [DT]",
876 "[NN] <> [NOUN]",
877 },
878 }
879
880 m, err := NewMapper([]config.MappingList{mappingList})
881 require.NoError(t, err)
882
883 var inputData any
884 err = json.Unmarshal([]byte(responseSnippet), &inputData)
885 require.NoError(t, err)
886
887 result, err := m.ApplyResponseMappings("test-empty-mapper", MappingOptions{Direction: AtoB}, inputData)
888 require.NoError(t, err)
889
890 snippet := result.(map[string]any)["snippet"].(string)
891
892 // The empty DET span should not get an annotation
893 // The NN token "Mann" should be annotated
894 assert.Contains(t, snippet, `<span title="marmot/p:DET"></span>`)
895 assert.Contains(t, snippet, `<span title="opennlp/p:NOUN" class="notinindex">Mann</span>`)
896}
897
898// TestResponseAnnotationWhitespaceOnlyNodes verifies that whitespace-only text
899// nodes are preserved without annotations.
900func TestResponseAnnotationWhitespaceOnlyNodes(t *testing.T) {
901 responseSnippet := `{
902 "snippet": "<span title=\"marmot/p:DET\"> </span><span title=\"marmot/p:NN\">Mann</span>"
903 }`
904
905 mappingList := config.MappingList{
906 ID: "test-wsonly-mapper",
907 FoundryA: "marmot",
908 LayerA: "p",
909 FoundryB: "opennlp",
910 LayerB: "p",
911 Mappings: []config.MappingRule{
912 "[DET] <> [DT]",
913 "[NN] <> [NOUN]",
914 },
915 }
916
917 m, err := NewMapper([]config.MappingList{mappingList})
918 require.NoError(t, err)
919
920 var inputData any
921 err = json.Unmarshal([]byte(responseSnippet), &inputData)
922 require.NoError(t, err)
923
924 result, err := m.ApplyResponseMappings("test-wsonly-mapper", MappingOptions{Direction: AtoB}, inputData)
925 require.NoError(t, err)
926
927 snippet := result.(map[string]any)["snippet"].(string)
928
929 // Whitespace-only text should not be annotated
930 assert.Contains(t, snippet, `<span title="marmot/p:DET"> </span>`)
931 assert.Contains(t, snippet, `<span title="opennlp/p:NOUN" class="notinindex">Mann</span>`)
932}
933
Akron497cfe82025-07-03 13:26:54 +0200934// TestResponseMappingWithLayerOverride tests layer precedence rules
935func TestResponseMappingWithLayerOverride(t *testing.T) {
936 // Test 1: Explicit layer in mapping rule should take precedence over MappingOptions
937 t.Run("Explicit layer takes precedence", func(t *testing.T) {
938 responseSnippet := `{
939 "snippet": "<span title=\"marmot/p:DET\">Der</span>"
940 }`
941
942 // Mapping rule with explicit layer [p=DT] - this should NOT be overridden
943 mappingList := config.MappingList{
944 ID: "test-layer-precedence",
945 FoundryA: "marmot",
946 LayerA: "p",
947 FoundryB: "opennlp",
948 LayerB: "p", // default layer
949 Mappings: []config.MappingRule{
950 "[DET] <> [p=DT]", // Explicit layer "p" should not be overridden
951 },
952 }
953
954 m, err := NewMapper([]config.MappingList{mappingList})
955 require.NoError(t, err)
956
957 var inputData any
958 err = json.Unmarshal([]byte(responseSnippet), &inputData)
959 require.NoError(t, err)
960
961 // Apply with layer override - should NOT affect explicit layer in mapping rule
962 result, err := m.ApplyResponseMappings("test-layer-precedence", MappingOptions{
963 Direction: AtoB,
964 LayerB: "pos", // This should NOT override the explicit "p" layer in [p=DT]
965 }, inputData)
966 require.NoError(t, err)
967
968 resultMap := result.(map[string]any)
969 snippet := resultMap["snippet"].(string)
970
971 // Should use explicit layer "p" from mapping rule, NOT "pos" from override
972 assert.Contains(t, snippet, `title="opennlp/p:DT" class="notinindex"`)
973 assert.NotContains(t, snippet, `title="opennlp/pos:DT" class="notinindex"`)
974 })
975
976 // Test 2: Implicit layer in mapping rule should use MappingOptions layer override
977 t.Run("Implicit layer uses MappingOptions override", func(t *testing.T) {
978 responseSnippet := `{
979 "snippet": "<span title=\"marmot/p:DET\">Der</span>"
980 }`
981
982 // Mapping rule with implicit layer [DT] - this should use layer override
983 mappingList := config.MappingList{
984 ID: "test-layer-override",
985 FoundryA: "marmot",
986 LayerA: "p",
987 FoundryB: "opennlp",
988 LayerB: "p", // default layer
989 Mappings: []config.MappingRule{
990 "[DET] <> [DT]", // No explicit layer - should use override
991 },
992 }
993
994 m, err := NewMapper([]config.MappingList{mappingList})
995 require.NoError(t, err)
996
997 var inputData any
998 err = json.Unmarshal([]byte(responseSnippet), &inputData)
999 require.NoError(t, err)
1000
1001 // Apply with layer override - should affect implicit layer in mapping rule
1002 result, err := m.ApplyResponseMappings("test-layer-override", MappingOptions{
1003 Direction: AtoB,
1004 LayerB: "pos", // This should override the default layer for [DT]
1005 }, inputData)
1006 require.NoError(t, err)
1007
1008 resultMap := result.(map[string]any)
1009 snippet := resultMap["snippet"].(string)
1010
1011 // Should use layer "pos" from override, NOT default "p" layer
1012 assert.Contains(t, snippet, `title="opennlp/pos:DT" class="notinindex"`)
1013 assert.NotContains(t, snippet, `title="opennlp/p:DT" class="notinindex"`)
1014 })
1015}