blob: 76fd45ec76f3a12fb0790d86dce10fbaf38e74fc [file] [log] [blame]
Akrona3675e92025-06-26 17:46:59 +02001package mapper
2
3import (
4 "encoding/json"
5 "testing"
6
7 "github.com/KorAP/KoralPipe-TermMapper/config"
8 "github.com/stretchr/testify/assert"
9 "github.com/stretchr/testify/require"
10)
11
12func XTestResponseMapping(t *testing.T) {
13
14 responseSnippet := `{
15 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
16 "ID": null,
17 "author": "Schmelzle, u.a.",
18 "availability": "CC-BY-SA",
19 "context": {
20 "left": [
21 "token",
22 0
23 ],
24 "right": [
25 "token",
26 0
27 ]
28 },
29 "corpusID": null,
30 "corpusSigle": "WPD17",
31 "docID": null,
32 "docSigle": "WPD17/J80",
33 "fields": [
34 {
35 "@type": "koral:field",
36 "key": "ID"
37 },
38 {
39 "@type": "koral:field",
40 "key": "textSigle",
41 "type": "type:string",
42 "value": "WPD17/J80/33968"
43 },
44 {
45 "@type": "koral:field",
46 "key": "corpusID"
47 },
48 {
49 "@type": "koral:field",
50 "key": "author",
51 "type": "type:text",
52 "value": "Schmelzle, u.a."
53 },
54 {
55 "@type": "koral:field",
56 "key": "title",
57 "type": "type:text",
58 "value": "Johanne von Gemmingen"
59 },
60 {
61 "@type": "koral:field",
62 "key": "subTitle"
63 },
64 {
65 "@type": "koral:field",
66 "key": "textClass"
67 },
68 {
69 "@type": "koral:field",
70 "key": "pubPlace",
71 "type": "type:string",
72 "value": "URL:http://de.wikipedia.org"
73 },
74 {
75 "@type": "koral:field",
76 "key": "pubDate",
77 "type": "type:date",
78 "value": "2017-07-01"
79 },
80 {
81 "@type": "koral:field",
82 "key": "availability",
83 "type": "type:string",
84 "value": "CC-BY-SA"
85 },
86 {
87 "@type": "koral:field",
88 "key": "layerInfos",
89 "type": "type:store",
90 "value": "corenlp/c=spans corenlp/p=tokens corenlp/s=spans dereko/s=spans malt/d=rels marmot/m=tokens marmot/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens"
91 },
92 {
93 "@type": "koral:field",
94 "key": "docSigle",
95 "type": "type:string",
96 "value": "WPD17/J80"
97 },
98 {
99 "@type": "koral:field",
100 "key": "corpusSigle",
101 "type": "type:string",
102 "value": "WPD17"
103 }
104 ],
105 "hasSnippet": true,
106 "hasTokens": false,
107 "layerInfos": "corenlp/c=spans corenlp/p=tokens corenlp/s=spans dereko/s=spans malt/d=rels marmot/m=tokens marmot/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens",
108 "matchID": "p162-165(1)163-163x_yuvMM6VZLzLe_qZ0zb9yguvk37eDi-pSoL1nBdUkhNs",
109 "meta": {
110 "version": "Krill-0.64.1"
111 },
112 "pubDate": "2017-07-01",
113 "pubPlace": "URL:http://de.wikipedia.org",
114 "snippet": "<span class=\"context-left\">` +
115 `</span>` +
116 `<span class=\"match\">` +
117 `<mark>` +
118 `<span title=\"corenlp/p:ART\">` +
119 `<span title=\"marmot/m:case:nom\">` +
120 `<span title=\"marmot/m:gender:masc\">` +
121 `<span title=\"marmot/m:number:sg\">` +
122 `<span title=\"marmot/p:ART\">` +
123 `<span title=\"opennlp/p:ART\">` +
124 `<span title=\"tt/l:die\">` +
125 `<span title=\"tt/p:ART\">Der</span>` +
126 `</span>` +
127 `</span>` +
128 `</span>` +
129 `</span>` +
130 `</span>` +
131 `</span>` +
132 `</span> ` +
133 `<span title=\"corenlp/p:ADJA\">` +
134 `<span title=\"marmot/m:case:nom\">` +
135 `<span title=\"marmot/m:degree:pos\">` +
136 `<span title=\"marmot/m:gender:masc\">` +
137 `<span title=\"marmot/m:number:sg\">` +
138 `<span title=\"marmot/p:ADJA\">` +
139 `<span title=\"opennlp/p:ADJA\">` +
140 `<span title=\"tt/l:alt\">` +
141 `<span title=\"tt/p:ADJA\">alte</span>` +
142 `</span>` +
143 `</span>` +
144 `</span>` +
145 `</span>` +
146 `</span>` +
147 `</span>` +
148 `</span>` +
149 `</span> ` +
150 `<span title=\"corenlp/p:NN\">` +
151 `<span title=\"marmot/m:case:nom\">` +
152 `<span title=\"marmot/m:gender:masc\">` +
153 `<span title=\"marmot/m:number:sg\">` +
154 `<span title=\"marmot/p:NN\">` +
155 `<span title=\"opennlp/p:NN\">` +
156 `<span title=\"tt/l:Baum\">` +
157 `<span title=\"tt/p:NN\">Baum</span>` +
158 `</span>` +
159 `</span>` +
160 `</span>` +
161 `</span>` +
162 `</span>` +
163 `</span>` +
164 `</span>` +
165 `</mark> ` +
166 `<span title=\"corenlp/p:KON\">` +
167 `<span title=\"marmot/p:KON\">` +
168 `<span title=\"opennlp/p:KON\">` +
169 `<span title=\"tt/l:und\">` +
170 `<span title=\"tt/p:KON\">und</span>` +
171 `</span>` +
172 `</span>` +
173 `</span>` +
174 `</span> ` +
175 `<span title=\"corenlp/p:ADJA\">` +
176 `<span title=\"marmot/m:case:nom\">` +
177 `<span title=\"marmot/m:degree:pos\">` +
178 `<span title=\"marmot/m:gender:masc\">` +
179 `<span title=\"marmot/m:number:pl\">` +
180 `<span title=\"marmot/p:ADJA\">` +
181 `<span title=\"opennlp/p:ADJA\">` +
182 `<span title=\"tt/l:andere\">` +
183 `<span title=\"tt/p:PIAT\">` +
184 `<span title=\"tt/p:PIS\">andere</span>` +
185 `</span>` +
186 `</span>` +
187 `</span>` +
188 `</span>` +
189 `</span>` +
190 `</span>` +
191 `</span>` +
192 `</span>` +
193 `</span> ` +
194 `<span title=\"corenlp/p:NN\">` +
195 `<span title=\"marmot/m:case:nom\">` +
196 `<span title=\"marmot/m:gender:masc\">` +
197 `<span title=\"marmot/m:number:pl\">` +
198 `<span title=\"marmot/p:NN\">` +
199 `<span title=\"opennlp/p:NN\">` +
200 `<span title=\"tt/l:Märchen\">` +
201 `<span title=\"tt/p:NN\">Märchen</span>` +
202 `</span>` +
203 `</span>` +
204 `</span>` +
205 `</span>` +
206 `</span>` +
207 `</span>` +
208 `</span>, ` +
209 `<span title=\"corenlp/p:CARD\">` +
210 `<span title=\"marmot/p:CARD\">` +
211 `<span title=\"opennlp/p:CARD\">` +
212 `<span title=\"tt/l:@card@\">` +
213 `<span title=\"tt/p:CARD\">1946</span>` +
214 `</span>` +
215 `</span>` +
216 `</span>` +
217 `</span> ` +
218 `</span>` +
219 `<span class=\"context-right\"></span>",` +
220 `"subTitle": null,
221 "textClass": null,
222 "textID": null,
223 "textSigle": "WPD17/J80/33968",
224 "title": "Johanne von Gemmingen"
225}`
226
227 expectedOutput := `{
228 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
229 "ID": null,
230 "author": "Schmelzle, u.a.",
231 "availability": "CC-BY-SA",
232 "context": {
233 "left": [
234 "token",
235 0
236 ],
237 "right": [
238 "token",
239 0
240 ]
241 },
242 "corpusID": null,
243 "corpusSigle": "WPD17",
244 "docID": null,
245 "docSigle": "WPD17/J80",
246 "fields": [
247 {
248 "@type": "koral:field",
249 "key": "ID"
250 },
251 {
252 "@type": "koral:field",
253 "key": "textSigle",
254 "type": "type:string",
255 "value": "WPD17/J80/33968"
256 },
257 {
258 "@type": "koral:field",
259 "key": "corpusID"
260 },
261 {
262 "@type": "koral:field",
263 "key": "author",
264 "type": "type:text",
265 "value": "Schmelzle, u.a."
266 },
267 {
268 "@type": "koral:field",
269 "key": "title",
270 "type": "type:text",
271 "value": "Johanne von Gemmingen"
272 },
273 {
274 "@type": "koral:field",
275 "key": "subTitle"
276 },
277 {
278 "@type": "koral:field",
279 "key": "textClass"
280 },
281 {
282 "@type": "koral:field",
283 "key": "pubPlace",
284 "type": "type:string",
285 "value": "URL:http://de.wikipedia.org"
286 },
287 {
288 "@type": "koral:field",
289 "key": "pubDate",
290 "type": "type:date",
291 "value": "2017-07-01"
292 },
293 {
294 "@type": "koral:field",
295 "key": "availability",
296 "type": "type:string",
297 "value": "CC-BY-SA"
298 },
299 {
300 "@type": "koral:field",
301 "key": "layerInfos",
302 "type": "type:store",
303 "value": "corenlp/c=spans corenlp/p=tokens corenlp/s=spans dereko/s=spans malt/d=rels marmot/m=tokens marmot/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens"
304 },
305 {
306 "@type": "koral:field",
307 "key": "docSigle",
308 "type": "type:string",
309 "value": "WPD17/J80"
310 },
311 {
312 "@type": "koral:field",
313 "key": "corpusSigle",
314 "type": "type:string",
315 "value": "WPD17"
316 }
317 ],
318 "hasSnippet": true,
319 "hasTokens": false,
320 "layerInfos": "corenlp/c=spans corenlp/p=tokens corenlp/s=spans dereko/s=spans malt/d=rels marmot/m=tokens marmot/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens",
321 "matchID": "p162-165(1)163-163x_yuvMM6VZLzLe_qZ0zb9yguvk37eDi-pSoL1nBdUkhNs",
322 "meta": {
323 "version": "Krill-0.64.1"
324 },
325 "pubDate": "2017-07-01",
326 "pubPlace": "URL:http://de.wikipedia.org",
327 "snippet": "<span class=\"context-left\">` +
328 `</span>` +
329 `<span class=\"match\">` +
330 `<mark>` +
331 `<span title=\"corenlp/p:ART\">` +
332 `<span title=\"marmot/m:case:nom\">` +
333 `<span title=\"marmot/m:gender:masc\">` +
334 `<span title=\"marmot/m:number:sg\">` +
335 `<span title=\"marmot/p:ART\">` +
336 `<span title=\"opennlp/p:ART\">` +
337 `<span title=\"tt/l:die\">` +
338 `<span title=\"tt/p:ART\">` +
339 `<span title=\"opennlp/p:M\" class=\"notinindex\">` +
340 `<span title=\"opennlp/m:M\" class=\"notinindex\">Der</span>` +
341 `</span>` +
342 `</span>` +
343 `</span>` +
344 `</span>` +
345 `</span>` +
346 `</span>` +
347 `</span>` +
348 `</span>` +
349 `</span> ` +
350 `<span title=\"corenlp/p:ADJA\">` +
351 `<span title=\"marmot/m:case:nom\">` +
352 `<span title=\"marmot/m:degree:pos\">` +
353 `<span title=\"marmot/m:gender:masc\">` +
354 `<span title=\"marmot/m:number:sg\">` +
355 `<span title=\"marmot/p:ADJA\">` +
356 `<span title=\"opennlp/p:ADJA\">` +
357 `<span title=\"tt/l:alt\">` +
358 `<span title=\"tt/p:ADJA\">` +
359 `<span title=\"opennlp/p:M\" class=\"notinindex\">` +
360 `<span title=\"opennlp/m:M\" class=\"notinindex\">alte</span>` +
361 `</span>` +
362 `</span>` +
363 `</span>` +
364 `</span>` +
365 `</span>` +
366 `</span>` +
367 `</span>` +
368 `</span>` +
369 `</span>` +
370 `</span> ` +
371 `<span title=\"corenlp/p:NN\">` +
372 `<span title=\"marmot/m:case:nom\">` +
373 `<span title=\"marmot/m:gender:masc\">` +
374 `<span title=\"marmot/m:number:sg\">` +
375 `<span title=\"marmot/p:NN\">` +
376 `<span title=\"opennlp/p:NN\">` +
377 `<span title=\"tt/l:Baum\">` +
378 `<span title=\"tt/p:NN\">` +
379 `<span title=\"opennlp/p:M\" class=\"notinindex\">` +
380 `<span title=\"opennlp/m:M\" class=\"notinindex\">Baum</span>` +
381 `</span>` +
382 `</span>` +
383 `</span>` +
384 `</span>` +
385 `</span>` +
386 `</span>` +
387 `</span>` +
388 `</span>` +
389 `</span>` +
390 `</mark> ` +
391 `<span title=\"corenlp/p:KON\">` +
392 `<span title=\"marmot/p:KON\">` +
393 `<span title=\"opennlp/p:KON\">` +
394 `<span title=\"tt/l:und\">` +
395 `<span title=\"tt/p:KON\">und</span>` +
396 `</span>` +
397 `</span>` +
398 `</span>` +
399 `</span> ` +
400 `<span title=\"corenlp/p:ADJA\">` +
401 `<span title=\"marmot/m:case:nom\">` +
402 `<span title=\"marmot/m:degree:pos\">` +
403 `<span title=\"marmot/m:gender:masc\">` +
404 `<span title=\"marmot/m:number:pl\">` +
405 `<span title=\"marmot/p:ADJA\">` +
406 `<span title=\"opennlp/p:ADJA\">` +
407 `<span title=\"tt/l:andere\">` +
408 `<span title=\"tt/p:PIAT\">` +
409 `<span title=\"tt/p:PIS\">` +
410 `<span title=\"opennlp/p:M\" class=\"notinindex\">` +
411 `<span title=\"opennlp/m:M\" class=\"notinindex\">andere</span>` +
412 `</span>` +
413 `</span>` +
414 `</span>` +
415 `</span>` +
416 `</span>` +
417 `</span>` +
418 `</span>` +
419 `</span>` +
420 `</span>` +
421 `</span>` +
422 `</span> ` +
423 `<span title=\"corenlp/p:NN\">` +
424 `<span title=\"marmot/m:case:nom\">` +
425 `<span title=\"marmot/m:gender:masc\">` +
426 `<span title=\"marmot/m:number:pl\">` +
427 `<span title=\"marmot/p:NN\">` +
428 `<span title=\"opennlp/p:NN\">` +
429 `<span title=\"tt/l:Märchen\">` +
430 `<span title=\"tt/p:NN\">` +
431 `<span title=\"opennlp/p:M\" class=\"notinindex\">` +
432 `<span title=\"opennlp/p:M\" class=\"notinindex\">Märchen</span>` +
433 `</span>` +
434 `</span>` +
435 `</span>` +
436 `</span>` +
437 `</span>` +
438 `</span>` +
439 `</span>` +
440 `</span>` +
441 `</span>, ` +
442 `<span title=\"corenlp/p:CARD\">` +
443 `<span title=\"marmot/p:CARD\">` +
444 `<span title=\"opennlp/p:CARD\">` +
445 `<span title=\"tt/l:@card@\">` +
446 `<span title=\"tt/p:CARD\">1946</span>` +
447 `</span>` +
448 `</span>` +
449 `</span>` +
450 `</span> ` +
451 `</span>` +
452 `<span class=\"context-right\"></span>",` +
453 `"subTitle": null,
454 "textClass": null,
455 "textID": null,
456 "textSigle": "WPD17/J80/33968",
457 "title": "Johanne von Gemmingen"
458}`
459
460 // Create test mapping list specifically for token to termGroup test
461 mappingList := config.MappingList{
462 ID: "test-mapper",
463 FoundryA: "marmot",
464 LayerA: "m",
465 FoundryB: "opennlp", // Keep the same foundry for both sides
466 LayerB: "p",
467 Mappings: []config.MappingRule{
468 "[gender=masc] <> [opennlp/p=M & opennlp/m=M]",
469 },
470 }
471
472 // Create a new mapper
473 m, err := NewMapper([]config.MappingList{mappingList})
474 require.NoError(t, err)
475
476 var inputData any
477 err = json.Unmarshal([]byte(responseSnippet), &inputData)
478 assert.Nil(t, err)
479
480 result, err := m.ApplyResponseMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
481 assert.Nil(t, err)
482
483 var expectedData any
484 err = json.Unmarshal([]byte(expectedOutput), &expectedData)
485
486 assert.Equal(t, expectedData, result)
Akron3caee162025-07-01 17:44:58 +0200487 assert.Nil(t, err)
Akrona3675e92025-06-26 17:46:59 +0200488}
489
490// TestResponseMappingAnnotationCreation tests creating new annotations based on RestrictToObligatory
491func TestResponseMappingAnnotationCreation(t *testing.T) {
492 // Simple snippet with a single annotated token
493 responseSnippet := `{
494 "snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"
495 }`
496
497 // Create test mapping list
498 mappingList := config.MappingList{
499 ID: "test-mapper",
500 FoundryA: "marmot",
501 LayerA: "m",
502 FoundryB: "opennlp",
503 LayerB: "p",
504 Mappings: []config.MappingRule{
Akron5aa16232025-07-01 12:25:33 +0200505 "[gender:masc] <> [p=M & m=M]",
Akrona3675e92025-06-26 17:46:59 +0200506 },
507 }
508
509 // Create a new mapper
510 m, err := NewMapper([]config.MappingList{mappingList})
511 require.NoError(t, err)
512
513 var inputData any
514 err = json.Unmarshal([]byte(responseSnippet), &inputData)
515 assert.Nil(t, err)
516
517 result, err := m.ApplyResponseMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
518 assert.Nil(t, err)
519
520 // For step 4, we should at least get back a processed result (even if snippet is unchanged)
521 // The main test is that no errors occurred in the processing
522 assert.NotNil(t, result)
523
524 // Verify the result is still a map with a snippet field
525 resultMap, ok := result.(map[string]any)
526 assert.True(t, ok)
527 assert.Contains(t, resultMap, "snippet")
528 assert.Equal(t, "<span title=\"marmot/m:gender:masc\"><span title=\"opennlp/p:M\" class=\"notinindex\"><span title=\"opennlp/m:M\" class=\"notinindex\">Der</span></span></span>", resultMap["snippet"])
529}
530
531// TestResponseMappingDebug helps debug the mapping process
532func TestResponseMappingDebug(t *testing.T) {
533 // Simple snippet with a single annotated token
534 responseSnippet := `{
535 "snippet": "<span title=\"marmot/m:gender:masc\">Der</span>"
536 }`
537
538 // Create test mapping list
539 mappingList := config.MappingList{
540 ID: "test-mapper",
541 FoundryA: "marmot",
542 LayerA: "m",
543 FoundryB: "opennlp",
544 LayerB: "p",
545 Mappings: []config.MappingRule{
546 "[gender=masc] <> [p=M & m=M]",
547 },
548 }
549
550 // Create a new mapper
551 m, err := NewMapper([]config.MappingList{mappingList})
552 require.NoError(t, err)
553
554 // Debug: Print what the parsed rules look like
555 rules := m.parsedRules["test-mapper"]
556 t.Logf("Number of parsed rules: %d", len(rules))
557 for i, rule := range rules {
558 t.Logf("Rule %d - Upper: %+v", i, rule.Upper)
559 t.Logf("Rule %d - Lower: %+v", i, rule.Lower)
560 }
561
562 var inputData any
563 err = json.Unmarshal([]byte(responseSnippet), &inputData)
564 assert.Nil(t, err)
565
566 // Include proper foundry and layer information in the options
567 result, err := m.ApplyResponseMappings("test-mapper", MappingOptions{
568 Direction: AtoB,
569 FoundryA: "marmot",
570 LayerA: "m",
571 FoundryB: "opennlp",
572 LayerB: "p",
573 }, inputData)
574 assert.Nil(t, err)
575 t.Logf("Result: %+v", result)
576}
577
578// TestResponseMappingWithAndRelation tests mapping rules with AND relations
579func TestResponseMappingWithAndRelation(t *testing.T) {
580 // Snippet with multiple annotations on a single token - both must be on the same span for AND to work
581 responseSnippet := `{
582 "snippet": "<span title=\"marmot/p:DET\"><span title=\"marmot/p:gender:masc\">Der</span></span>"
583 }`
584
585 // Create test mapping list with AND relation
586 mappingList := config.MappingList{
587 ID: "test-and-mapper",
588 FoundryA: "marmot",
589 LayerA: "p",
590 FoundryB: "opennlp",
591 LayerB: "p",
592 Mappings: []config.MappingRule{
593 "[DET & gender:masc] <> [p=DT & case=nom]",
594 },
595 }
596
597 // Create a new mapper
598 m, err := NewMapper([]config.MappingList{mappingList})
599 require.NoError(t, err)
600
601 var inputData any
602 err = json.Unmarshal([]byte(responseSnippet), &inputData)
603 assert.Nil(t, err)
604
605 result, err := m.ApplyResponseMappings("test-and-mapper", MappingOptions{
606 Direction: AtoB,
607 FoundryA: "marmot",
608 LayerA: "p",
609 FoundryB: "opennlp",
610 LayerB: "p",
611 }, inputData)
612 assert.Nil(t, err)
613
614 // Verify the result contains the expected annotations
615 resultMap, ok := result.(map[string]any)
616 assert.True(t, ok)
617 assert.Contains(t, resultMap, "snippet")
618
619 snippet := resultMap["snippet"].(string)
620 // Should contain both new annotations - checking the actual format produced
621 assert.Contains(t, snippet, `title="marmot/p:DET"`)
622 assert.Contains(t, snippet, `title="opennlp/p:DT"`)
623 assert.Contains(t, snippet, `title="marmot/p:gender:masc"`)
624 assert.Contains(t, snippet, `title="opennlp/case:nom"`) // Format is foundry/layer:value for single values
625 assert.Contains(t, snippet, `class="notinindex"`)
626}
627
628// TestResponseMappingWithOrRelation tests mapping rules with OR relations
629func TestResponseMappingWithOrRelation(t *testing.T) {
630 // Snippet with one token that matches the OR condition
631 responseSnippet := `{
632 "snippet": "<span title=\"marmot/p:DET\">Der</span>"
633 }`
634
635 // Create test mapping list with OR relation
636 mappingList := config.MappingList{
637 ID: "test-or-mapper",
638 FoundryA: "marmot",
639 LayerA: "p",
640 FoundryB: "opennlp",
641 LayerB: "p",
642 Mappings: []config.MappingRule{
643 "[DET | ART] <> [determiner=true]",
644 },
645 }
646
647 // Create a new mapper
648 m, err := NewMapper([]config.MappingList{mappingList})
649 require.NoError(t, err)
650
651 var inputData any
652 err = json.Unmarshal([]byte(responseSnippet), &inputData)
653 assert.Nil(t, err)
654
655 result, err := m.ApplyResponseMappings("test-or-mapper", MappingOptions{Direction: AtoB}, inputData)
656 assert.Nil(t, err)
657
658 // Verify the result
659 resultMap, ok := result.(map[string]any)
660 assert.True(t, ok)
661 assert.Contains(t, resultMap, "snippet")
662
663 snippet := resultMap["snippet"].(string)
664
665 assert.Contains(t, snippet, `title="marmot/p:DET"`)
666 assert.Contains(t, snippet, `title="opennlp/determiner:true" class="notinindex"`)
667 assert.NotEmpty(t, snippet)
668}
669
670// TestResponseMappingComplexPattern1 tests complex nested patterns
671func TestResponseMappingComplexPattern1(t *testing.T) {
672 // Snippet with a token that has nested annotations
673 responseSnippet := `{
674 "snippet": "<span title=\"marmot/p:ADJA\"><span title=\"marmot/m:gender:masc\"><span title=\"marmot/m:case:nom\">alter</span></span></span>"
675 }`
676
677 // Create test mapping list with complex pattern
678 mappingList := config.MappingList{
679 ID: "test-complex-mapper",
680 FoundryA: "marmot",
681 LayerA: "p",
682 FoundryB: "opennlp",
683 LayerB: "p",
684 Mappings: []config.MappingRule{
685 "[ADJA & gender=masc & case=nom] <> [pos=ADJ & gender=M & case=NOM]",
686 },
687 }
688
689 // Create a new mapper
690 m, err := NewMapper([]config.MappingList{mappingList})
691 require.NoError(t, err)
692
693 var inputData any
694 err = json.Unmarshal([]byte(responseSnippet), &inputData)
695 assert.Nil(t, err)
696
697 result, err := m.ApplyResponseMappings("test-complex-mapper", MappingOptions{Direction: AtoB}, inputData)
698 assert.Nil(t, err)
699
700 // Verify the result contains the expected annotations
701 resultMap, ok := result.(map[string]any)
702 assert.True(t, ok)
703 assert.Contains(t, resultMap, "snippet")
704
705 snippet := resultMap["snippet"].(string)
706 assert.Contains(t, snippet, `title="marmot/p:ADJA`)
707 assert.Contains(t, snippet, `title="marmot/m:gender:masc`)
708 assert.NotContains(t, snippet, `title="opennlp`)
709 assert.NotEmpty(t, snippet) // At minimum, processing should succeed
710}
711
712// TestResponseMappingComplexPattern2 tests complex nested patterns
713func TestResponseMappingComplexPattern2(t *testing.T) {
714 // Snippet with a token that has nested annotations
715 responseSnippet := `{
716 "snippet": "<span title=\"marmot/p:ADJA\"><span title=\"marmot/p:gender:masc\"><span title=\"marmot/p:case:nom\">alter</span></span></span>"
717 }`
718
719 // Create test mapping list with complex pattern
720 mappingList := config.MappingList{
721 ID: "test-complex-mapper",
722 FoundryA: "marmot",
723 LayerA: "p",
724 FoundryB: "opennlp",
725 LayerB: "p",
726 Mappings: []config.MappingRule{
727 "[ADJA & gender:masc & case:nom] <> [pos=ADJ & gender=M & case=NOM]",
728 },
729 }
730
731 // Create a new mapper
732 m, err := NewMapper([]config.MappingList{mappingList})
733 require.NoError(t, err)
734
735 var inputData any
736 err = json.Unmarshal([]byte(responseSnippet), &inputData)
737 assert.Nil(t, err)
738
739 result, err := m.ApplyResponseMappings("test-complex-mapper", MappingOptions{Direction: AtoB}, inputData)
740 assert.Nil(t, err)
741
742 // Verify the result contains the expected annotations
743 resultMap, ok := result.(map[string]any)
744 assert.True(t, ok)
745 assert.Contains(t, resultMap, "snippet")
746
747 snippet := resultMap["snippet"].(string)
748 assert.Contains(t, snippet, `title="marmot/p:ADJA`)
749 assert.Contains(t, snippet, `title="marmot/p:gender:masc`)
750 assert.Contains(t, snippet, `title="opennlp/pos:ADJ" class="notinindex"`)
751 assert.Contains(t, snippet, `title="opennlp/gender:M" class="notinindex"`)
752 assert.Contains(t, snippet, `title="opennlp/case:NOM" class="notinindex"`)
753 assert.NotEmpty(t, snippet) // At minimum, processing should succeed
754}
755
756// TestResponseMappingMultipleTokens tests mapping across multiple tokens
757func TestResponseMappingMultipleTokens(t *testing.T) {
758 // Snippet with multiple tokens
759 responseSnippet := `{
760 "snippet": "<span title=\"marmot/p:DET\">Der</span> <span title=\"marmot/p:ADJA\"><span title=\"marmot/m:gender:masc\">alte</span></span> <span title=\"marmot/p:NN\">Mann</span>"
761 }`
762
763 // Create test mapping list that matches multiple patterns
764 mappingList := config.MappingList{
765 ID: "test-multi-mapper",
766 FoundryA: "marmot",
767 LayerA: "p",
768 FoundryB: "opennlp",
769 LayerB: "p",
770 Mappings: []config.MappingRule{
771 "[DET] <> [determiner=true]",
772 "[ADJA & gender:masc] <> [adjective=true & gender=M]",
773 "[NN] <> [noun=true]",
774 },
775 }
776
777 // Create a new mapper
778 m, err := NewMapper([]config.MappingList{mappingList})
779 require.NoError(t, err)
780
781 var inputData any
782 err = json.Unmarshal([]byte(responseSnippet), &inputData)
783 assert.Nil(t, err)
784
785 result, err := m.ApplyResponseMappings("test-multi-mapper", MappingOptions{Direction: AtoB}, inputData)
786 assert.Nil(t, err)
787
788 // Verify the result
789 resultMap, ok := result.(map[string]any)
790 assert.True(t, ok)
791 assert.Contains(t, resultMap, "snippet")
792
793 snippet := resultMap["snippet"].(string)
794 // Should contain annotations for each matching token (checking actual output format)
795 assert.Contains(t, snippet, `title="marmot/p:DET"`)
796 assert.Contains(t, snippet, `title="opennlp/determiner:true" class="notinindex"`) // Format is foundry/layer:value for single values
797 assert.NotContains(t, snippet, `title="opennlp/adjective:true" class="notinindex"`)
798 assert.Contains(t, snippet, `title="opennlp/noun:true" class="notinindex"`)
799}
800
801// TestResponseMappingNoMatch tests behavior when no patterns match
802func TestResponseMappingNoMatch(t *testing.T) {
803 // Snippet with tokens that don't match the pattern
804 responseSnippet := `{
805 "snippet": "<span title=\"marmot/p:VERB\">läuft</span>"
806 }`
807
808 // Create test mapping list with pattern that won't match
809 mappingList := config.MappingList{
810 ID: "test-nomatch-mapper",
811 FoundryA: "marmot",
812 LayerA: "p",
813 FoundryB: "opennlp",
814 LayerB: "p",
815 Mappings: []config.MappingRule{
816 "[DET] <> [determiner=true]",
817 },
818 }
819
820 // Create a new mapper
821 m, err := NewMapper([]config.MappingList{mappingList})
822 require.NoError(t, err)
823
824 var inputData any
825 err = json.Unmarshal([]byte(responseSnippet), &inputData)
826 assert.Nil(t, err)
827
828 result, err := m.ApplyResponseMappings("test-nomatch-mapper", MappingOptions{Direction: AtoB}, inputData)
829 assert.Nil(t, err)
830
831 // Verify the result is unchanged since no patterns matched
832 resultMap, ok := result.(map[string]any)
833 assert.True(t, ok)
834 assert.Contains(t, resultMap, "snippet")
835
836 snippet := resultMap["snippet"].(string)
837 // Should be the original snippet without new annotations
838 assert.Equal(t, `<span title="marmot/p:VERB">läuft</span>`, snippet)
839 assert.NotContains(t, snippet, `class="notinindex"`)
840}
841
842// TestResponseMappingBidirectional tests bidirectional mapping (B to A direction)
843func TestResponseMappingBidirectional(t *testing.T) {
844 // Snippet with opennlp annotations
845 responseSnippet := `{
846 "snippet": "<span title=\"opennlp/p:DT\"><span title=\"opennlp/p:determiner:true\">Der</span></span>"
847 }`
848
849 // Create test mapping list
850 mappingList := config.MappingList{
851 ID: "test-bidirectional-mapper",
852 FoundryA: "marmot",
853 LayerA: "p",
854 FoundryB: "opennlp",
855 LayerB: "p",
856 Mappings: []config.MappingRule{
857 "[DET] <> [DT & determiner:true]",
858 },
859 }
860
861 // Create a new mapper
862 m, err := NewMapper([]config.MappingList{mappingList})
863 require.NoError(t, err)
864
865 var inputData any
866 err = json.Unmarshal([]byte(responseSnippet), &inputData)
867 assert.Nil(t, err)
868
869 // Test B to A direction
870 result, err := m.ApplyResponseMappings("test-bidirectional-mapper", MappingOptions{Direction: BtoA}, inputData)
871 assert.Nil(t, err)
872
873 // Verify the result
874 resultMap, ok := result.(map[string]any)
875 assert.True(t, ok)
876 assert.Contains(t, resultMap, "snippet")
877
878 snippet := resultMap["snippet"].(string)
879
880 assert.Contains(t, snippet, `title="opennlp/p:DT"`)
881 assert.Contains(t, snippet, `title="marmot/p:DET" class="notinindex"`)
882 assert.NotEmpty(t, snippet) // At minimum, processing should succeed
883}
884
885// TestResponseMappingWithValuePatterns tests patterns with specific values
886func TestResponseMappingWithValuePatterns(t *testing.T) {
887 // Snippet with value-specific annotations
888 responseSnippet := `{
889 "snippet": "<span title=\"marmot/m:case:nom\"><span title=\"marmot/m:gender:fem\">die</span></span>"
890 }`
891
892 // Create test mapping list with value-specific patterns
893 mappingList := config.MappingList{
894 ID: "test-value-mapper",
895 FoundryA: "marmot",
896 LayerA: "m",
897 FoundryB: "opennlp",
898 LayerB: "m",
899 Mappings: []config.MappingRule{
900 "[case:nom & gender:fem] <> [case=NOM & gender=F]",
901 },
902 }
903
904 // Create a new mapper
905 m, err := NewMapper([]config.MappingList{mappingList})
906 require.NoError(t, err)
907
908 var inputData any
909 err = json.Unmarshal([]byte(responseSnippet), &inputData)
910 assert.Nil(t, err)
911
912 result, err := m.ApplyResponseMappings("test-value-mapper", MappingOptions{Direction: AtoB}, inputData)
913 assert.Nil(t, err)
914
915 // Verify the result
916 resultMap, ok := result.(map[string]any)
917 assert.True(t, ok)
918 assert.Contains(t, resultMap, "snippet")
919
920 snippet := resultMap["snippet"].(string)
921 assert.Contains(t, snippet, `title="marmot/m:case:nom"`) // Format is foundry/layer:value
922 assert.Contains(t, snippet, `title="opennlp/case:NOM" class="notinindex"`) // Format is foundry/layer:value
923 assert.Contains(t, snippet, `title="opennlp/gender:F" class="notinindex"`)
924}
925
926// TestResponseMappingNestedSpans tests handling of deeply nested span structures
927func TestResponseMappingNestedSpans(t *testing.T) {
928 // Snippet with deeply nested spans
929 responseSnippet := `{
Akron4de47a92025-06-27 11:58:11 +0200930 "snippet": "<span title=\"level1/l:outer\"><span title=\"level2/l:middle\"><span title=\"marmot/p:DET\">der</span></span></span>",
931 "author": "John Doe"
Akrona3675e92025-06-26 17:46:59 +0200932 }`
933
934 // Create test mapping list
935 mappingList := config.MappingList{
936 ID: "test-nested-mapper",
937 FoundryA: "marmot",
938 LayerA: "p",
939 FoundryB: "opennlp",
940 LayerB: "p",
941 Mappings: []config.MappingRule{
942 "[DET] <> [determiner=yes]",
943 },
944 }
945
946 // Create a new mapper
947 m, err := NewMapper([]config.MappingList{mappingList})
948 require.NoError(t, err)
949
950 var inputData any
951 err = json.Unmarshal([]byte(responseSnippet), &inputData)
952 assert.Nil(t, err)
953
954 result, err := m.ApplyResponseMappings("test-nested-mapper", MappingOptions{Direction: AtoB}, inputData)
955 assert.Nil(t, err)
956
957 // Verify the result preserves the nested structure and adds new annotations
958 resultMap, ok := result.(map[string]any)
959 assert.True(t, ok)
960 assert.Contains(t, resultMap, "snippet")
961
962 snippet := resultMap["snippet"].(string)
963 // Should contain the new annotation while preserving existing structure
964 assert.Contains(t, snippet, `title="opennlp/determiner:yes"`) // Format is foundry/layer:value
965 assert.Contains(t, snippet, `class="notinindex"`)
966 assert.Contains(t, snippet, `title="level1/l:outer"`)
967 assert.Contains(t, snippet, `title="level2/l:middle"`)
968 assert.Contains(t, snippet, `title="marmot/p:DET"`)
Akron4de47a92025-06-27 11:58:11 +0200969
970 author := resultMap["author"].(string)
971 assert.Equal(t, "John Doe", author)
Akrona3675e92025-06-26 17:46:59 +0200972}