blob: cddd9a3a031573f1cfeb333726d09a2bf914607a [file] [log] [blame]
Akrone4f570d2026-02-20 08:18:06 +01001package mapper
2
3import (
4 "encoding/json"
5 "testing"
6
7 "github.com/KorAP/Koral-Mapper/config"
8 "github.com/stretchr/testify/assert"
9 "github.com/stretchr/testify/require"
10)
11
12// parseJSON is a test helper that unmarshals a JSON string.
13func parseJSON(t *testing.T, s string) any {
14 t.Helper()
15 var v any
16 require.NoError(t, json.Unmarshal([]byte(s), &v))
17 return v
18}
19
20func TestCascadeQueryTwoAnnotationMappings(t *testing.T) {
21 m, err := NewMapper([]config.MappingList{
22 {
23 ID: "ann-step1", FoundryA: "opennlp", LayerA: "p",
Akron422cd252026-05-19 16:31:19 +020024 FoundryB: "stts", LayerB: "p",
Akrone4f570d2026-02-20 08:18:06 +010025 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
26 },
27 {
Akron422cd252026-05-19 16:31:19 +020028 ID: "ann-step2", FoundryA: "stts", LayerA: "p",
Akrone4f570d2026-02-20 08:18:06 +010029 FoundryB: "upos", LayerB: "p",
30 Mappings: []config.MappingRule{`[DET] <> [PRON]`},
31 },
32 })
33 require.NoError(t, err)
34
35 input := parseJSON(t, `{
36 "@type": "koral:token",
37 "wrap": {
38 "@type": "koral:term",
39 "foundry": "opennlp",
40 "key": "PIDAT",
41 "layer": "p",
42 "match": "match:eq"
43 }
44 }`)
45
46 result, err := m.CascadeQueryMappings(
47 []string{"ann-step1", "ann-step2"},
48 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
49 input,
50 )
51 require.NoError(t, err)
52
53 expected := parseJSON(t, `{
54 "@type": "koral:token",
55 "wrap": {
56 "@type": "koral:term",
57 "foundry": "upos",
58 "key": "PRON",
59 "layer": "p",
60 "match": "match:eq"
61 }
62 }`)
63 assert.Equal(t, expected, result)
64}
65
66func TestCascadeQueryMixAnnotationAndCorpus(t *testing.T) {
67 m, err := NewMapper([]config.MappingList{
68 {
69 ID: "ann-mapper", FoundryA: "opennlp", LayerA: "p",
70 FoundryB: "upos", LayerB: "p",
71 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
72 },
73 {
74 ID: "corpus-mapper",
75 Type: "corpus",
76 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
77 },
78 })
79 require.NoError(t, err)
80
81 input := parseJSON(t, `{
82 "query": {
83 "@type": "koral:token",
84 "wrap": {
85 "@type": "koral:term",
86 "foundry": "opennlp",
87 "key": "PIDAT",
88 "layer": "p",
89 "match": "match:eq"
90 }
91 },
92 "collection": {
93 "@type": "koral:doc",
94 "key": "textClass",
95 "value": "novel",
96 "match": "match:eq"
97 }
98 }`)
99
100 result, err := m.CascadeQueryMappings(
101 []string{"ann-mapper", "corpus-mapper"},
102 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
103 input,
104 )
105 require.NoError(t, err)
106
107 resultMap := result.(map[string]any)
108
109 query := resultMap["query"].(map[string]any)
110 wrap := query["wrap"].(map[string]any)
111 assert.Equal(t, "DET", wrap["key"])
112 assert.Equal(t, "upos", wrap["foundry"])
113
114 collection := resultMap["collection"].(map[string]any)
115 assert.Equal(t, "genre", collection["key"])
116 assert.Equal(t, "fiction", collection["value"])
117}
118
119func TestCascadeQuerySingleElement(t *testing.T) {
120 m, err := NewMapper([]config.MappingList{{
121 ID: "single", FoundryA: "opennlp", LayerA: "p",
122 FoundryB: "upos", LayerB: "p",
123 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
124 }})
125 require.NoError(t, err)
126
127 makeInput := func() any {
128 return parseJSON(t, `{
129 "@type": "koral:token",
130 "wrap": {
131 "@type": "koral:term",
132 "foundry": "opennlp",
133 "key": "PIDAT",
134 "layer": "p",
135 "match": "match:eq"
136 }
137 }`)
138 }
139
140 opts := MappingOptions{Direction: AtoB}
141
142 cascadeResult, err := m.CascadeQueryMappings(
143 []string{"single"}, []MappingOptions{opts}, makeInput(),
144 )
145 require.NoError(t, err)
146
147 directResult, err := m.ApplyQueryMappings("single", opts, makeInput())
148 require.NoError(t, err)
149
150 assert.Equal(t, directResult, cascadeResult)
151}
152
153func TestCascadeQueryEmptyList(t *testing.T) {
154 m, err := NewMapper([]config.MappingList{{
155 ID: "dummy", FoundryA: "x", LayerA: "y",
156 FoundryB: "a", LayerB: "b",
157 Mappings: []config.MappingRule{`[X] <> [Y]`},
158 }})
159 require.NoError(t, err)
160
161 input := parseJSON(t, `{
162 "@type": "koral:token",
163 "wrap": {"@type": "koral:term", "key": "Z"}
164 }`)
165
166 result, err := m.CascadeQueryMappings(nil, nil, input)
167 require.NoError(t, err)
168 assert.Equal(t, input, result)
169}
170
171func TestCascadeQueryUnknownID(t *testing.T) {
172 m, err := NewMapper([]config.MappingList{{
173 ID: "known", FoundryA: "x", LayerA: "y",
174 FoundryB: "a", LayerB: "b",
175 Mappings: []config.MappingRule{`[X] <> [Y]`},
176 }})
177 require.NoError(t, err)
178
179 input := parseJSON(t, `{
180 "@type": "koral:token",
181 "wrap": {"@type": "koral:term", "key": "X"}
182 }`)
183
184 _, err = m.CascadeQueryMappings(
185 []string{"known", "nonexistent"},
186 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
187 input,
188 )
189 assert.Error(t, err)
190 assert.Contains(t, err.Error(), "nonexistent")
191}
192
193// --- Response cascade tests ---
194
Akron330c8212026-05-19 14:12:39 +0200195func TestCascadeQueryRewritesPreservedAcrossSteps(t *testing.T) {
Akron422cd252026-05-19 16:31:19 +0200196 // Step 1 changes foundry (opennlp->stts) and key (PIDAT->DET).
197 // Step 2 changes foundry (stts->upos) and key (DET->PRON).
Akron330c8212026-05-19 14:12:39 +0200198 // Rewrites from step 1 must survive step 2's replacement.
199 m, err := NewMapper([]config.MappingList{
200 {
201 ID: "step1", FoundryA: "opennlp", LayerA: "p",
Akron422cd252026-05-19 16:31:19 +0200202 FoundryB: "stts", LayerB: "p",
Akron330c8212026-05-19 14:12:39 +0200203 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
204 },
205 {
Akron422cd252026-05-19 16:31:19 +0200206 ID: "step2", FoundryA: "stts", LayerA: "p",
Akron330c8212026-05-19 14:12:39 +0200207 FoundryB: "upos", LayerB: "p",
208 Mappings: []config.MappingRule{`[DET] <> [PRON]`},
209 },
210 })
211 require.NoError(t, err)
212
213 input := parseJSON(t, `{
214 "@type": "koral:token",
215 "wrap": {
216 "@type": "koral:term",
217 "foundry": "opennlp",
218 "key": "PIDAT",
219 "layer": "p",
220 "match": "match:eq"
221 }
222 }`)
223
224 result, err := m.CascadeQueryMappings(
225 []string{"step1", "step2"},
226 []MappingOptions{
227 {Direction: AtoB, AddRewrites: true},
228 {Direction: AtoB, AddRewrites: true},
229 },
230 input,
231 )
232 require.NoError(t, err)
233
Akrona0174352026-05-19 17:04:42 +0200234 // After both steps, the term should have one rewrite per step:
235 // step 1 recorded the full original term (opennlp/p/PIDAT),
236 // step 2 recorded the full original term (stts/p/DET).
Akron330c8212026-05-19 14:12:39 +0200237 expected := parseJSON(t, `{
238 "@type": "koral:token",
239 "wrap": {
240 "@type": "koral:term",
241 "foundry": "upos",
242 "key": "PRON",
243 "layer": "p",
244 "match": "match:eq",
245 "rewrites": [
246 {
247 "@type": "koral:rewrite",
248 "editor": "Koral-Mapper",
Akrona0174352026-05-19 17:04:42 +0200249 "original": {
250 "@type": "koral:term",
251 "foundry": "opennlp",
252 "key": "PIDAT",
253 "layer": "p",
254 "match": "match:eq"
255 }
Akron422cd252026-05-19 16:31:19 +0200256 },
257 {
258 "@type": "koral:rewrite",
259 "editor": "Koral-Mapper",
Akrona0174352026-05-19 17:04:42 +0200260 "original": {
261 "@type": "koral:term",
262 "foundry": "stts",
263 "key": "DET",
264 "layer": "p",
265 "match": "match:eq"
266 }
Akron330c8212026-05-19 14:12:39 +0200267 }
268 ]
269 }
270 }`)
271 assert.Equal(t, expected, result)
272}
273
274func TestCascadeQueryRewritesPreservedStructuralChange(t *testing.T) {
Akron422cd252026-05-19 16:31:19 +0200275 // Step 1 changes foundry (opennlp->stts) and key (PIDAT->DET).
Akron330c8212026-05-19 14:12:39 +0200276 // Step 2 replaces Term with TermGroup (structural change).
277 // Rewrites from step 1 must be carried into the new TermGroup.
278 m, err := NewMapper([]config.MappingList{
279 {
280 ID: "sc-step1", FoundryA: "opennlp", LayerA: "p",
Akron422cd252026-05-19 16:31:19 +0200281 FoundryB: "stts", LayerB: "p",
Akron330c8212026-05-19 14:12:39 +0200282 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
283 },
284 {
Akron422cd252026-05-19 16:31:19 +0200285 ID: "sc-step2", FoundryA: "stts", LayerA: "p",
286 FoundryB: "tt", LayerB: "pos",
Akron330c8212026-05-19 14:12:39 +0200287 Mappings: []config.MappingRule{`[DET] <> [opennlp/p=DET & opennlp/p=PronType:Art]`},
288 },
289 })
290 require.NoError(t, err)
291
292 input := parseJSON(t, `{
293 "@type": "koral:token",
294 "wrap": {
295 "@type": "koral:term",
296 "foundry": "opennlp",
297 "key": "PIDAT",
298 "layer": "p",
299 "match": "match:eq"
300 }
301 }`)
302
303 result, err := m.CascadeQueryMappings(
304 []string{"sc-step1", "sc-step2"},
305 []MappingOptions{
306 {Direction: AtoB, AddRewrites: true},
307 {Direction: AtoB, AddRewrites: true},
308 },
309 input,
310 )
311 require.NoError(t, err)
312
Akrona0174352026-05-19 17:04:42 +0200313 // Step 1 produced a single rewrite with the full original term (opennlp/p/PIDAT).
314 // Step 2 replaced the term with a TermGroup (structural change) and produced
315 // another single rewrite with the full original term (stts/p/DET).
316 // Both rewrites must appear on the TermGroup.
Akron330c8212026-05-19 14:12:39 +0200317 resultMap := result.(map[string]any)
318 wrap := resultMap["wrap"].(map[string]any)
319 require.Equal(t, "koral:termGroup", wrap["@type"])
320
321 rewrites := wrap["rewrites"].([]any)
Akrona0174352026-05-19 17:04:42 +0200322 require.Len(t, rewrites, 2)
323
324 // First rewrite is from step 1 (carried forward): full original term
Akron330c8212026-05-19 14:12:39 +0200325 rw0 := rewrites[0].(map[string]any)
Akrona0174352026-05-19 17:04:42 +0200326 assert.Equal(t, "Koral-Mapper", rw0["editor"])
327 original0 := rw0["original"].(map[string]any)
328 assert.Equal(t, "koral:term", original0["@type"])
329 assert.Equal(t, "PIDAT", original0["key"])
330 assert.Equal(t, "opennlp", original0["foundry"])
Akron422cd252026-05-19 16:31:19 +0200331
Akrona0174352026-05-19 17:04:42 +0200332 // Second rewrite is from step 2: full original term
Akron422cd252026-05-19 16:31:19 +0200333 rw1 := rewrites[1].(map[string]any)
Akrona0174352026-05-19 17:04:42 +0200334 assert.Equal(t, "Koral-Mapper", rw1["editor"])
335 original1 := rw1["original"].(map[string]any)
336 assert.Equal(t, "koral:term", original1["@type"])
337 assert.Equal(t, "DET", original1["key"])
Akron330c8212026-05-19 14:12:39 +0200338}
339
Akrone4f570d2026-02-20 08:18:06 +0100340func TestCascadeResponseTwoCorpusMappings(t *testing.T) {
341 m, err := NewMapper([]config.MappingList{
342 {
343 ID: "corpus-step1", Type: "corpus",
344 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
345 },
346 {
347 ID: "corpus-step2", Type: "corpus",
348 Mappings: []config.MappingRule{`genre=fiction <> category=lit`},
349 },
350 })
351 require.NoError(t, err)
352
353 input := parseJSON(t, `{
354 "fields": [{
355 "@type": "koral:field",
356 "key": "textClass",
357 "value": "novel",
358 "type": "type:string"
359 }]
360 }`)
361
362 result, err := m.CascadeResponseMappings(
363 []string{"corpus-step1", "corpus-step2"},
364 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
365 input,
366 )
367 require.NoError(t, err)
368
369 fields := result.(map[string]any)["fields"].([]any)
370 require.GreaterOrEqual(t, len(fields), 3)
371
372 assert.Equal(t, "textClass", fields[0].(map[string]any)["key"])
373
374 assert.Equal(t, "genre", fields[1].(map[string]any)["key"])
375 assert.Equal(t, "fiction", fields[1].(map[string]any)["value"])
376
377 assert.Equal(t, "category", fields[2].(map[string]any)["key"])
378 assert.Equal(t, "lit", fields[2].(map[string]any)["value"])
379}
380
381func TestCascadeResponseMixAnnotationAndCorpus(t *testing.T) {
382 m, err := NewMapper([]config.MappingList{
383 {
384 ID: "ann-resp", FoundryA: "opennlp", LayerA: "p",
385 FoundryB: "upos", LayerB: "p",
386 Mappings: []config.MappingRule{`[DET] <> [PRON]`},
387 },
388 {
389 ID: "corpus-resp",
390 Type: "corpus",
391 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
392 },
393 })
394 require.NoError(t, err)
395
396 input := parseJSON(t, `{
397 "snippet": "<span title=\"opennlp/p:DET\">Der</span>",
398 "fields": [{
399 "@type": "koral:field",
400 "key": "textClass",
401 "value": "novel",
402 "type": "type:string"
403 }]
404 }`)
405
406 result, err := m.CascadeResponseMappings(
407 []string{"ann-resp", "corpus-resp"},
408 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
409 input,
410 )
411 require.NoError(t, err)
412
413 resultMap := result.(map[string]any)
414
415 snippet := resultMap["snippet"].(string)
416 assert.Contains(t, snippet, "opennlp/p:DET")
417 assert.Contains(t, snippet, "upos/p:PRON")
418
419 fields := resultMap["fields"].([]any)
420 require.GreaterOrEqual(t, len(fields), 2)
421 assert.Equal(t, "genre", fields[1].(map[string]any)["key"])
422}
423
424func TestCascadeResponseSingleElement(t *testing.T) {
425 m, err := NewMapper([]config.MappingList{{
426 ID: "corpus-single", Type: "corpus",
427 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
428 }})
429 require.NoError(t, err)
430
431 makeInput := func() any {
432 return parseJSON(t, `{
433 "fields": [{
434 "@type": "koral:field",
435 "key": "textClass",
436 "value": "novel",
437 "type": "type:string"
438 }]
439 }`)
440 }
441
442 opts := MappingOptions{Direction: AtoB}
443
444 cascadeResult, err := m.CascadeResponseMappings(
445 []string{"corpus-single"}, []MappingOptions{opts}, makeInput(),
446 )
447 require.NoError(t, err)
448
449 directResult, err := m.ApplyResponseMappings("corpus-single", opts, makeInput())
450 require.NoError(t, err)
451
452 assert.Equal(t, directResult, cascadeResult)
453}
454
455func TestCascadeResponseEmptyList(t *testing.T) {
456 m, err := NewMapper([]config.MappingList{{
457 ID: "dummy", Type: "corpus",
458 Mappings: []config.MappingRule{`x=y <> a=b`},
459 }})
460 require.NoError(t, err)
461
462 input := parseJSON(t, `{"fields": []}`)
463
464 result, err := m.CascadeResponseMappings(nil, nil, input)
465 require.NoError(t, err)
466 assert.Equal(t, input, result)
467}
468
469func TestCascadeResponseUnknownID(t *testing.T) {
470 m, err := NewMapper([]config.MappingList{{
471 ID: "known", Type: "corpus",
472 Mappings: []config.MappingRule{`x=y <> a=b`},
473 }})
474 require.NoError(t, err)
475
476 _, err = m.CascadeResponseMappings(
477 []string{"nonexistent"},
478 []MappingOptions{{Direction: AtoB}},
479 parseJSON(t, `{"fields": []}`),
480 )
481 assert.Error(t, err)
482 assert.Contains(t, err.Error(), "nonexistent")
483}