blob: 3dec81ccfa51681d614292d6198fd4f1f8d4fc53 [file] [log] [blame]
Akrone4f570d2026-02-20 08:18:06 +01001package mapper
2
3import (
4 "encoding/json"
5 "testing"
6
7 "github.com/KorAP/Koral-Mapper/config"
8 "github.com/stretchr/testify/assert"
9 "github.com/stretchr/testify/require"
10)
11
12// parseJSON is a test helper that unmarshals a JSON string.
13func parseJSON(t *testing.T, s string) any {
14 t.Helper()
15 var v any
16 require.NoError(t, json.Unmarshal([]byte(s), &v))
17 return v
18}
19
20func TestCascadeQueryTwoAnnotationMappings(t *testing.T) {
21 m, err := NewMapper([]config.MappingList{
22 {
23 ID: "ann-step1", FoundryA: "opennlp", LayerA: "p",
24 FoundryB: "opennlp", LayerB: "p",
25 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
26 },
27 {
28 ID: "ann-step2", FoundryA: "opennlp", LayerA: "p",
29 FoundryB: "upos", LayerB: "p",
30 Mappings: []config.MappingRule{`[DET] <> [PRON]`},
31 },
32 })
33 require.NoError(t, err)
34
35 input := parseJSON(t, `{
36 "@type": "koral:token",
37 "wrap": {
38 "@type": "koral:term",
39 "foundry": "opennlp",
40 "key": "PIDAT",
41 "layer": "p",
42 "match": "match:eq"
43 }
44 }`)
45
46 result, err := m.CascadeQueryMappings(
47 []string{"ann-step1", "ann-step2"},
48 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
49 input,
50 )
51 require.NoError(t, err)
52
53 expected := parseJSON(t, `{
54 "@type": "koral:token",
55 "wrap": {
56 "@type": "koral:term",
57 "foundry": "upos",
58 "key": "PRON",
59 "layer": "p",
60 "match": "match:eq"
61 }
62 }`)
63 assert.Equal(t, expected, result)
64}
65
66func TestCascadeQueryMixAnnotationAndCorpus(t *testing.T) {
67 m, err := NewMapper([]config.MappingList{
68 {
69 ID: "ann-mapper", FoundryA: "opennlp", LayerA: "p",
70 FoundryB: "upos", LayerB: "p",
71 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
72 },
73 {
74 ID: "corpus-mapper",
75 Type: "corpus",
76 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
77 },
78 })
79 require.NoError(t, err)
80
81 input := parseJSON(t, `{
82 "query": {
83 "@type": "koral:token",
84 "wrap": {
85 "@type": "koral:term",
86 "foundry": "opennlp",
87 "key": "PIDAT",
88 "layer": "p",
89 "match": "match:eq"
90 }
91 },
92 "collection": {
93 "@type": "koral:doc",
94 "key": "textClass",
95 "value": "novel",
96 "match": "match:eq"
97 }
98 }`)
99
100 result, err := m.CascadeQueryMappings(
101 []string{"ann-mapper", "corpus-mapper"},
102 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
103 input,
104 )
105 require.NoError(t, err)
106
107 resultMap := result.(map[string]any)
108
109 query := resultMap["query"].(map[string]any)
110 wrap := query["wrap"].(map[string]any)
111 assert.Equal(t, "DET", wrap["key"])
112 assert.Equal(t, "upos", wrap["foundry"])
113
114 collection := resultMap["collection"].(map[string]any)
115 assert.Equal(t, "genre", collection["key"])
116 assert.Equal(t, "fiction", collection["value"])
117}
118
119func TestCascadeQuerySingleElement(t *testing.T) {
120 m, err := NewMapper([]config.MappingList{{
121 ID: "single", FoundryA: "opennlp", LayerA: "p",
122 FoundryB: "upos", LayerB: "p",
123 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
124 }})
125 require.NoError(t, err)
126
127 makeInput := func() any {
128 return parseJSON(t, `{
129 "@type": "koral:token",
130 "wrap": {
131 "@type": "koral:term",
132 "foundry": "opennlp",
133 "key": "PIDAT",
134 "layer": "p",
135 "match": "match:eq"
136 }
137 }`)
138 }
139
140 opts := MappingOptions{Direction: AtoB}
141
142 cascadeResult, err := m.CascadeQueryMappings(
143 []string{"single"}, []MappingOptions{opts}, makeInput(),
144 )
145 require.NoError(t, err)
146
147 directResult, err := m.ApplyQueryMappings("single", opts, makeInput())
148 require.NoError(t, err)
149
150 assert.Equal(t, directResult, cascadeResult)
151}
152
153func TestCascadeQueryEmptyList(t *testing.T) {
154 m, err := NewMapper([]config.MappingList{{
155 ID: "dummy", FoundryA: "x", LayerA: "y",
156 FoundryB: "a", LayerB: "b",
157 Mappings: []config.MappingRule{`[X] <> [Y]`},
158 }})
159 require.NoError(t, err)
160
161 input := parseJSON(t, `{
162 "@type": "koral:token",
163 "wrap": {"@type": "koral:term", "key": "Z"}
164 }`)
165
166 result, err := m.CascadeQueryMappings(nil, nil, input)
167 require.NoError(t, err)
168 assert.Equal(t, input, result)
169}
170
171func TestCascadeQueryUnknownID(t *testing.T) {
172 m, err := NewMapper([]config.MappingList{{
173 ID: "known", FoundryA: "x", LayerA: "y",
174 FoundryB: "a", LayerB: "b",
175 Mappings: []config.MappingRule{`[X] <> [Y]`},
176 }})
177 require.NoError(t, err)
178
179 input := parseJSON(t, `{
180 "@type": "koral:token",
181 "wrap": {"@type": "koral:term", "key": "X"}
182 }`)
183
184 _, err = m.CascadeQueryMappings(
185 []string{"known", "nonexistent"},
186 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
187 input,
188 )
189 assert.Error(t, err)
190 assert.Contains(t, err.Error(), "nonexistent")
191}
192
193// --- Response cascade tests ---
194
Akron330c8212026-05-19 14:12:39 +0200195func TestCascadeQueryRewritesPreservedAcrossSteps(t *testing.T) {
196 // Step 1 changes key (PIDAT->DET) within same foundry/layer.
197 // Step 2 changes foundry+key (DET->PRON, opennlp->upos).
198 // Rewrites from step 1 must survive step 2's replacement.
199 m, err := NewMapper([]config.MappingList{
200 {
201 ID: "step1", FoundryA: "opennlp", LayerA: "p",
202 FoundryB: "opennlp", LayerB: "p",
203 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
204 },
205 {
206 ID: "step2", FoundryA: "opennlp", LayerA: "p",
207 FoundryB: "upos", LayerB: "p",
208 Mappings: []config.MappingRule{`[DET] <> [PRON]`},
209 },
210 })
211 require.NoError(t, err)
212
213 input := parseJSON(t, `{
214 "@type": "koral:token",
215 "wrap": {
216 "@type": "koral:term",
217 "foundry": "opennlp",
218 "key": "PIDAT",
219 "layer": "p",
220 "match": "match:eq"
221 }
222 }`)
223
224 result, err := m.CascadeQueryMappings(
225 []string{"step1", "step2"},
226 []MappingOptions{
227 {Direction: AtoB, AddRewrites: true},
228 {Direction: AtoB, AddRewrites: true},
229 },
230 input,
231 )
232 require.NoError(t, err)
233
234 // After both steps, the term should have rewrites from both steps:
235 // step 1 recorded scope=key original=PIDAT,
236 // step 2 recorded scope=foundry original=opennlp and scope=key original=DET.
237 expected := parseJSON(t, `{
238 "@type": "koral:token",
239 "wrap": {
240 "@type": "koral:term",
241 "foundry": "upos",
242 "key": "PRON",
243 "layer": "p",
244 "match": "match:eq",
245 "rewrites": [
246 {
247 "@type": "koral:rewrite",
248 "editor": "Koral-Mapper",
249 "scope": "key",
250 "original": "PIDAT"
251 },
252 {
253 "@type": "koral:rewrite",
254 "editor": "Koral-Mapper",
255 "scope": "foundry",
256 "original": "opennlp"
257 },
258 {
259 "@type": "koral:rewrite",
260 "editor": "Koral-Mapper",
261 "scope": "key",
262 "original": "DET"
263 }
264 ]
265 }
266 }`)
267 assert.Equal(t, expected, result)
268}
269
270func TestCascadeQueryRewritesPreservedStructuralChange(t *testing.T) {
271 // Step 1 changes key (PIDAT->DET) and records a scoped rewrite.
272 // Step 2 replaces Term with TermGroup (structural change).
273 // Rewrites from step 1 must be carried into the new TermGroup.
274 m, err := NewMapper([]config.MappingList{
275 {
276 ID: "sc-step1", FoundryA: "opennlp", LayerA: "p",
277 FoundryB: "opennlp", LayerB: "p",
278 Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
279 },
280 {
281 ID: "sc-step2", FoundryA: "opennlp", LayerA: "p",
282 FoundryB: "opennlp", LayerB: "p",
283 Mappings: []config.MappingRule{`[DET] <> [opennlp/p=DET & opennlp/p=PronType:Art]`},
284 },
285 })
286 require.NoError(t, err)
287
288 input := parseJSON(t, `{
289 "@type": "koral:token",
290 "wrap": {
291 "@type": "koral:term",
292 "foundry": "opennlp",
293 "key": "PIDAT",
294 "layer": "p",
295 "match": "match:eq"
296 }
297 }`)
298
299 result, err := m.CascadeQueryMappings(
300 []string{"sc-step1", "sc-step2"},
301 []MappingOptions{
302 {Direction: AtoB, AddRewrites: true},
303 {Direction: AtoB, AddRewrites: true},
304 },
305 input,
306 )
307 require.NoError(t, err)
308
309 // Step 1 rewrites (scope=key, original=PIDAT) must appear on the
310 // TermGroup created by step 2, along with step 2's own structural rewrite.
311 resultMap := result.(map[string]any)
312 wrap := resultMap["wrap"].(map[string]any)
313 require.Equal(t, "koral:termGroup", wrap["@type"])
314
315 rewrites := wrap["rewrites"].([]any)
316 // First rewrite is from step 1 (carried forward)
317 rw0 := rewrites[0].(map[string]any)
318 assert.Equal(t, "key", rw0["scope"])
319 assert.Equal(t, "PIDAT", rw0["original"])
320
321 // Last rewrite is from step 2 (structural: original is the full term)
322 rwLast := rewrites[len(rewrites)-1].(map[string]any)
323 assert.Equal(t, "Koral-Mapper", rwLast["editor"])
324 // Structural rewrite stores the full original node (no scope)
325 original := rwLast["original"].(map[string]any)
326 assert.Equal(t, "koral:term", original["@type"])
327 assert.Equal(t, "DET", original["key"])
328}
329
Akrone4f570d2026-02-20 08:18:06 +0100330func TestCascadeResponseTwoCorpusMappings(t *testing.T) {
331 m, err := NewMapper([]config.MappingList{
332 {
333 ID: "corpus-step1", Type: "corpus",
334 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
335 },
336 {
337 ID: "corpus-step2", Type: "corpus",
338 Mappings: []config.MappingRule{`genre=fiction <> category=lit`},
339 },
340 })
341 require.NoError(t, err)
342
343 input := parseJSON(t, `{
344 "fields": [{
345 "@type": "koral:field",
346 "key": "textClass",
347 "value": "novel",
348 "type": "type:string"
349 }]
350 }`)
351
352 result, err := m.CascadeResponseMappings(
353 []string{"corpus-step1", "corpus-step2"},
354 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
355 input,
356 )
357 require.NoError(t, err)
358
359 fields := result.(map[string]any)["fields"].([]any)
360 require.GreaterOrEqual(t, len(fields), 3)
361
362 assert.Equal(t, "textClass", fields[0].(map[string]any)["key"])
363
364 assert.Equal(t, "genre", fields[1].(map[string]any)["key"])
365 assert.Equal(t, "fiction", fields[1].(map[string]any)["value"])
366
367 assert.Equal(t, "category", fields[2].(map[string]any)["key"])
368 assert.Equal(t, "lit", fields[2].(map[string]any)["value"])
369}
370
371func TestCascadeResponseMixAnnotationAndCorpus(t *testing.T) {
372 m, err := NewMapper([]config.MappingList{
373 {
374 ID: "ann-resp", FoundryA: "opennlp", LayerA: "p",
375 FoundryB: "upos", LayerB: "p",
376 Mappings: []config.MappingRule{`[DET] <> [PRON]`},
377 },
378 {
379 ID: "corpus-resp",
380 Type: "corpus",
381 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
382 },
383 })
384 require.NoError(t, err)
385
386 input := parseJSON(t, `{
387 "snippet": "<span title=\"opennlp/p:DET\">Der</span>",
388 "fields": [{
389 "@type": "koral:field",
390 "key": "textClass",
391 "value": "novel",
392 "type": "type:string"
393 }]
394 }`)
395
396 result, err := m.CascadeResponseMappings(
397 []string{"ann-resp", "corpus-resp"},
398 []MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
399 input,
400 )
401 require.NoError(t, err)
402
403 resultMap := result.(map[string]any)
404
405 snippet := resultMap["snippet"].(string)
406 assert.Contains(t, snippet, "opennlp/p:DET")
407 assert.Contains(t, snippet, "upos/p:PRON")
408
409 fields := resultMap["fields"].([]any)
410 require.GreaterOrEqual(t, len(fields), 2)
411 assert.Equal(t, "genre", fields[1].(map[string]any)["key"])
412}
413
414func TestCascadeResponseSingleElement(t *testing.T) {
415 m, err := NewMapper([]config.MappingList{{
416 ID: "corpus-single", Type: "corpus",
417 Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
418 }})
419 require.NoError(t, err)
420
421 makeInput := func() any {
422 return parseJSON(t, `{
423 "fields": [{
424 "@type": "koral:field",
425 "key": "textClass",
426 "value": "novel",
427 "type": "type:string"
428 }]
429 }`)
430 }
431
432 opts := MappingOptions{Direction: AtoB}
433
434 cascadeResult, err := m.CascadeResponseMappings(
435 []string{"corpus-single"}, []MappingOptions{opts}, makeInput(),
436 )
437 require.NoError(t, err)
438
439 directResult, err := m.ApplyResponseMappings("corpus-single", opts, makeInput())
440 require.NoError(t, err)
441
442 assert.Equal(t, directResult, cascadeResult)
443}
444
445func TestCascadeResponseEmptyList(t *testing.T) {
446 m, err := NewMapper([]config.MappingList{{
447 ID: "dummy", Type: "corpus",
448 Mappings: []config.MappingRule{`x=y <> a=b`},
449 }})
450 require.NoError(t, err)
451
452 input := parseJSON(t, `{"fields": []}`)
453
454 result, err := m.CascadeResponseMappings(nil, nil, input)
455 require.NoError(t, err)
456 assert.Equal(t, input, result)
457}
458
459func TestCascadeResponseUnknownID(t *testing.T) {
460 m, err := NewMapper([]config.MappingList{{
461 ID: "known", Type: "corpus",
462 Mappings: []config.MappingRule{`x=y <> a=b`},
463 }})
464 require.NoError(t, err)
465
466 _, err = m.CascadeResponseMappings(
467 []string{"nonexistent"},
468 []MappingOptions{{Direction: AtoB}},
469 parseJSON(t, `{"fields": []}`),
470 )
471 assert.Error(t, err)
472 assert.Contains(t, err.Error(), "nonexistent")
473}