blob: b46f3acc2774de0de1b72889e92f0e6fc158a024 [file] [log] [blame]
Akrone4f570d2026-02-20 08:18:06 +01001package mapper
2
3import (
4 "encoding/json"
5 "testing"
6
7 "github.com/KorAP/Koral-Mapper/config"
8 "github.com/stretchr/testify/assert"
9 "github.com/stretchr/testify/require"
10)
11
12// parseJSON is a test helper that unmarshals a JSON string.
13func parseJSON(t *testing.T, s string) any {
14 t.Helper()
15 var v any
16 require.NoError(t, json.Unmarshal([]byte(s), &v))
17 return v
18}
19
20func TestCascadeQueryTwoAnnotationMappings(t *testing.T) {
21 m, err := NewMapper([]config.MappingList{
22 {
23 ID: "ann-step1", FoundryA: "opennlp", LayerA: "p",
Akron422cd252026-05-19 16:31:19 +020024 FoundryB: "stts", LayerB: "p",
Akrone4f570d2026-02-20 08:18:06 +010025 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
26 },
27 {
Akron422cd252026-05-19 16:31:19 +020028 ID: "ann-step2", FoundryA: "stts", LayerA: "p",
Akrone4f570d2026-02-20 08:18:06 +010029 FoundryB: "upos", LayerB: "p",
30 Mappings: []config.MappingRule{`[DET] <> [PRON]`},
31 },
32 })
33 require.NoError(t, err)
34
35 input := parseJSON(t, `{
36 "@type": "koral:token",
37 "wrap": {
38 "@type": "koral:term",
39 "foundry": "opennlp",
40 "key": "PIDAT",
41 "layer": "p",
42 "match": "match:eq"
43 }
44 }`)
45
46 result, err := m.CascadeQueryMappings(
47 []string{"ann-step1", "ann-step2"},
48 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
49 input,
50 )
51 require.NoError(t, err)
52
53 expected := parseJSON(t, `{
54 "@type": "koral:token",
55 "wrap": {
56 "@type": "koral:term",
57 "foundry": "upos",
58 "key": "PRON",
59 "layer": "p",
60 "match": "match:eq"
61 }
62 }`)
63 assert.Equal(t, expected, result)
64}
65
66func TestCascadeQueryMixAnnotationAndCorpus(t *testing.T) {
67 m, err := NewMapper([]config.MappingList{
68 {
69 ID: "ann-mapper", FoundryA: "opennlp", LayerA: "p",
70 FoundryB: "upos", LayerB: "p",
71 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
72 },
73 {
74 ID: "corpus-mapper",
75 Type: "corpus",
76 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
77 },
78 })
79 require.NoError(t, err)
80
81 input := parseJSON(t, `{
82 "query": {
83 "@type": "koral:token",
84 "wrap": {
85 "@type": "koral:term",
86 "foundry": "opennlp",
87 "key": "PIDAT",
88 "layer": "p",
89 "match": "match:eq"
90 }
91 },
92 "collection": {
93 "@type": "koral:doc",
94 "key": "textClass",
95 "value": "novel",
96 "match": "match:eq"
97 }
98 }`)
99
100 result, err := m.CascadeQueryMappings(
101 []string{"ann-mapper", "corpus-mapper"},
102 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
103 input,
104 )
105 require.NoError(t, err)
106
107 resultMap := result.(map[string]any)
108
109 query := resultMap["query"].(map[string]any)
110 wrap := query["wrap"].(map[string]any)
111 assert.Equal(t, "DET", wrap["key"])
112 assert.Equal(t, "upos", wrap["foundry"])
113
114 collection := resultMap["collection"].(map[string]any)
115 assert.Equal(t, "genre", collection["key"])
116 assert.Equal(t, "fiction", collection["value"])
117}
118
119func TestCascadeQuerySingleElement(t *testing.T) {
120 m, err := NewMapper([]config.MappingList{{
121 ID: "single", FoundryA: "opennlp", LayerA: "p",
122 FoundryB: "upos", LayerB: "p",
123 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
124 }})
125 require.NoError(t, err)
126
127 makeInput := func() any {
128 return parseJSON(t, `{
129 "@type": "koral:token",
130 "wrap": {
131 "@type": "koral:term",
132 "foundry": "opennlp",
133 "key": "PIDAT",
134 "layer": "p",
135 "match": "match:eq"
136 }
137 }`)
138 }
139
140 opts := MappingOptions{Direction: AtoB}
141
142 cascadeResult, err := m.CascadeQueryMappings(
143 []string{"single"}, []MappingOptions{opts}, makeInput(),
144 )
145 require.NoError(t, err)
146
147 directResult, err := m.ApplyQueryMappings("single", opts, makeInput())
148 require.NoError(t, err)
149
150 assert.Equal(t, directResult, cascadeResult)
151}
152
153func TestCascadeQueryEmptyList(t *testing.T) {
154 m, err := NewMapper([]config.MappingList{{
155 ID: "dummy", FoundryA: "x", LayerA: "y",
156 FoundryB: "a", LayerB: "b",
157 Mappings: []config.MappingRule{`[X] <> [Y]`},
158 }})
159 require.NoError(t, err)
160
161 input := parseJSON(t, `{
162 "@type": "koral:token",
163 "wrap": {"@type": "koral:term", "key": "Z"}
164 }`)
165
166 result, err := m.CascadeQueryMappings(nil, nil, input)
167 require.NoError(t, err)
168 assert.Equal(t, input, result)
169}
170
171func TestCascadeQueryUnknownID(t *testing.T) {
172 m, err := NewMapper([]config.MappingList{{
173 ID: "known", FoundryA: "x", LayerA: "y",
174 FoundryB: "a", LayerB: "b",
175 Mappings: []config.MappingRule{`[X] <> [Y]`},
176 }})
177 require.NoError(t, err)
178
179 input := parseJSON(t, `{
180 "@type": "koral:token",
181 "wrap": {"@type": "koral:term", "key": "X"}
182 }`)
183
184 _, err = m.CascadeQueryMappings(
185 []string{"known", "nonexistent"},
186 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
187 input,
188 )
189 assert.Error(t, err)
190 assert.Contains(t, err.Error(), "nonexistent")
191}
192
193// --- Response cascade tests ---
194
Akron330c8212026-05-19 14:12:39 +0200195func TestCascadeQueryRewritesPreservedAcrossSteps(t *testing.T) {
Akron422cd252026-05-19 16:31:19 +0200196 // Step 1 changes foundry (opennlp->stts) and key (PIDAT->DET).
197 // Step 2 changes foundry (stts->upos) and key (DET->PRON).
Akron330c8212026-05-19 14:12:39 +0200198 // Rewrites from step 1 must survive step 2's replacement.
199 m, err := NewMapper([]config.MappingList{
200 {
201 ID: "step1", FoundryA: "opennlp", LayerA: "p",
Akron422cd252026-05-19 16:31:19 +0200202 FoundryB: "stts", LayerB: "p",
Akron330c8212026-05-19 14:12:39 +0200203 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
204 },
205 {
Akron422cd252026-05-19 16:31:19 +0200206 ID: "step2", FoundryA: "stts", LayerA: "p",
Akron330c8212026-05-19 14:12:39 +0200207 FoundryB: "upos", LayerB: "p",
208 Mappings: []config.MappingRule{`[DET] <> [PRON]`},
209 },
210 })
211 require.NoError(t, err)
212
213 input := parseJSON(t, `{
214 "@type": "koral:token",
215 "wrap": {
216 "@type": "koral:term",
217 "foundry": "opennlp",
218 "key": "PIDAT",
219 "layer": "p",
220 "match": "match:eq"
221 }
222 }`)
223
224 result, err := m.CascadeQueryMappings(
225 []string{"step1", "step2"},
226 []MappingOptions{
227 {Direction: AtoB, AddRewrites: true},
228 {Direction: AtoB, AddRewrites: true},
229 },
230 input,
231 )
232 require.NoError(t, err)
233
234 // After both steps, the term should have rewrites from both steps:
Akron422cd252026-05-19 16:31:19 +0200235 // step 1 recorded scope=foundry original=opennlp and scope=key original=PIDAT,
236 // step 2 recorded scope=foundry original=stts and scope=key original=DET.
Akron330c8212026-05-19 14:12:39 +0200237 expected := parseJSON(t, `{
238 "@type": "koral:token",
239 "wrap": {
240 "@type": "koral:term",
241 "foundry": "upos",
242 "key": "PRON",
243 "layer": "p",
244 "match": "match:eq",
245 "rewrites": [
246 {
247 "@type": "koral:rewrite",
248 "editor": "Koral-Mapper",
Akron422cd252026-05-19 16:31:19 +0200249 "scope": "foundry",
250 "original": "opennlp"
251 },
252 {
253 "@type": "koral:rewrite",
254 "editor": "Koral-Mapper",
Akron330c8212026-05-19 14:12:39 +0200255 "scope": "key",
256 "original": "PIDAT"
257 },
258 {
259 "@type": "koral:rewrite",
260 "editor": "Koral-Mapper",
261 "scope": "foundry",
Akron422cd252026-05-19 16:31:19 +0200262 "original": "stts"
Akron330c8212026-05-19 14:12:39 +0200263 },
264 {
265 "@type": "koral:rewrite",
266 "editor": "Koral-Mapper",
267 "scope": "key",
268 "original": "DET"
269 }
270 ]
271 }
272 }`)
273 assert.Equal(t, expected, result)
274}
275
276func TestCascadeQueryRewritesPreservedStructuralChange(t *testing.T) {
Akron422cd252026-05-19 16:31:19 +0200277 // Step 1 changes foundry (opennlp->stts) and key (PIDAT->DET).
Akron330c8212026-05-19 14:12:39 +0200278 // Step 2 replaces Term with TermGroup (structural change).
279 // Rewrites from step 1 must be carried into the new TermGroup.
280 m, err := NewMapper([]config.MappingList{
281 {
282 ID: "sc-step1", FoundryA: "opennlp", LayerA: "p",
Akron422cd252026-05-19 16:31:19 +0200283 FoundryB: "stts", LayerB: "p",
Akron330c8212026-05-19 14:12:39 +0200284 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
285 },
286 {
Akron422cd252026-05-19 16:31:19 +0200287 ID: "sc-step2", FoundryA: "stts", LayerA: "p",
288 FoundryB: "tt", LayerB: "pos",
Akron330c8212026-05-19 14:12:39 +0200289 Mappings: []config.MappingRule{`[DET] <> [opennlp/p=DET & opennlp/p=PronType:Art]`},
290 },
291 })
292 require.NoError(t, err)
293
294 input := parseJSON(t, `{
295 "@type": "koral:token",
296 "wrap": {
297 "@type": "koral:term",
298 "foundry": "opennlp",
299 "key": "PIDAT",
300 "layer": "p",
301 "match": "match:eq"
302 }
303 }`)
304
305 result, err := m.CascadeQueryMappings(
306 []string{"sc-step1", "sc-step2"},
307 []MappingOptions{
308 {Direction: AtoB, AddRewrites: true},
309 {Direction: AtoB, AddRewrites: true},
310 },
311 input,
312 )
313 require.NoError(t, err)
314
Akron422cd252026-05-19 16:31:19 +0200315 // Step 1 rewrites (scope=foundry original=opennlp, scope=key original=PIDAT)
316 // must appear on the TermGroup created by step 2, along with step 2's
317 // own structural rewrite.
Akron330c8212026-05-19 14:12:39 +0200318 resultMap := result.(map[string]any)
319 wrap := resultMap["wrap"].(map[string]any)
320 require.Equal(t, "koral:termGroup", wrap["@type"])
321
322 rewrites := wrap["rewrites"].([]any)
Akron422cd252026-05-19 16:31:19 +0200323 // First rewrite is from step 1 (carried forward): foundry change
Akron330c8212026-05-19 14:12:39 +0200324 rw0 := rewrites[0].(map[string]any)
Akron422cd252026-05-19 16:31:19 +0200325 assert.Equal(t, "foundry", rw0["scope"])
326 assert.Equal(t, "opennlp", rw0["original"])
327
328 // Second rewrite is from step 1 (carried forward): key change
329 rw1 := rewrites[1].(map[string]any)
330 assert.Equal(t, "key", rw1["scope"])
331 assert.Equal(t, "PIDAT", rw1["original"])
Akron330c8212026-05-19 14:12:39 +0200332
333 // Last rewrite is from step 2 (structural: original is the full term)
334 rwLast := rewrites[len(rewrites)-1].(map[string]any)
335 assert.Equal(t, "Koral-Mapper", rwLast["editor"])
336 // Structural rewrite stores the full original node (no scope)
337 original := rwLast["original"].(map[string]any)
338 assert.Equal(t, "koral:term", original["@type"])
339 assert.Equal(t, "DET", original["key"])
340}
341
Akrone4f570d2026-02-20 08:18:06 +0100342func TestCascadeResponseTwoCorpusMappings(t *testing.T) {
343 m, err := NewMapper([]config.MappingList{
344 {
345 ID: "corpus-step1", Type: "corpus",
346 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
347 },
348 {
349 ID: "corpus-step2", Type: "corpus",
350 Mappings: []config.MappingRule{`genre=fiction <> category=lit`},
351 },
352 })
353 require.NoError(t, err)
354
355 input := parseJSON(t, `{
356 "fields": [{
357 "@type": "koral:field",
358 "key": "textClass",
359 "value": "novel",
360 "type": "type:string"
361 }]
362 }`)
363
364 result, err := m.CascadeResponseMappings(
365 []string{"corpus-step1", "corpus-step2"},
366 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
367 input,
368 )
369 require.NoError(t, err)
370
371 fields := result.(map[string]any)["fields"].([]any)
372 require.GreaterOrEqual(t, len(fields), 3)
373
374 assert.Equal(t, "textClass", fields[0].(map[string]any)["key"])
375
376 assert.Equal(t, "genre", fields[1].(map[string]any)["key"])
377 assert.Equal(t, "fiction", fields[1].(map[string]any)["value"])
378
379 assert.Equal(t, "category", fields[2].(map[string]any)["key"])
380 assert.Equal(t, "lit", fields[2].(map[string]any)["value"])
381}
382
383func TestCascadeResponseMixAnnotationAndCorpus(t *testing.T) {
384 m, err := NewMapper([]config.MappingList{
385 {
386 ID: "ann-resp", FoundryA: "opennlp", LayerA: "p",
387 FoundryB: "upos", LayerB: "p",
388 Mappings: []config.MappingRule{`[DET] <> [PRON]`},
389 },
390 {
391 ID: "corpus-resp",
392 Type: "corpus",
393 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
394 },
395 })
396 require.NoError(t, err)
397
398 input := parseJSON(t, `{
399 "snippet": "<span title=\"opennlp/p:DET\">Der</span>",
400 "fields": [{
401 "@type": "koral:field",
402 "key": "textClass",
403 "value": "novel",
404 "type": "type:string"
405 }]
406 }`)
407
408 result, err := m.CascadeResponseMappings(
409 []string{"ann-resp", "corpus-resp"},
410 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
411 input,
412 )
413 require.NoError(t, err)
414
415 resultMap := result.(map[string]any)
416
417 snippet := resultMap["snippet"].(string)
418 assert.Contains(t, snippet, "opennlp/p:DET")
419 assert.Contains(t, snippet, "upos/p:PRON")
420
421 fields := resultMap["fields"].([]any)
422 require.GreaterOrEqual(t, len(fields), 2)
423 assert.Equal(t, "genre", fields[1].(map[string]any)["key"])
424}
425
426func TestCascadeResponseSingleElement(t *testing.T) {
427 m, err := NewMapper([]config.MappingList{{
428 ID: "corpus-single", Type: "corpus",
429 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
430 }})
431 require.NoError(t, err)
432
433 makeInput := func() any {
434 return parseJSON(t, `{
435 "fields": [{
436 "@type": "koral:field",
437 "key": "textClass",
438 "value": "novel",
439 "type": "type:string"
440 }]
441 }`)
442 }
443
444 opts := MappingOptions{Direction: AtoB}
445
446 cascadeResult, err := m.CascadeResponseMappings(
447 []string{"corpus-single"}, []MappingOptions{opts}, makeInput(),
448 )
449 require.NoError(t, err)
450
451 directResult, err := m.ApplyResponseMappings("corpus-single", opts, makeInput())
452 require.NoError(t, err)
453
454 assert.Equal(t, directResult, cascadeResult)
455}
456
457func TestCascadeResponseEmptyList(t *testing.T) {
458 m, err := NewMapper([]config.MappingList{{
459 ID: "dummy", Type: "corpus",
460 Mappings: []config.MappingRule{`x=y <> a=b`},
461 }})
462 require.NoError(t, err)
463
464 input := parseJSON(t, `{"fields": []}`)
465
466 result, err := m.CascadeResponseMappings(nil, nil, input)
467 require.NoError(t, err)
468 assert.Equal(t, input, result)
469}
470
471func TestCascadeResponseUnknownID(t *testing.T) {
472 m, err := NewMapper([]config.MappingList{{
473 ID: "known", Type: "corpus",
474 Mappings: []config.MappingRule{`x=y <> a=b`},
475 }})
476 require.NoError(t, err)
477
478 _, err = m.CascadeResponseMappings(
479 []string{"nonexistent"},
480 []MappingOptions{{Direction: AtoB}},
481 parseJSON(t, `{"fields": []}`),
482 )
483 assert.Error(t, err)
484 assert.Contains(t, err.Error(), "nonexistent")
485}