blob: 74696ecf1d7b53df17a1a2a7fd412cf854d1b110 [file] [log] [blame]
Akron32d53de2025-05-22 13:45:32 +02001package mapper
2
3import (
4 "encoding/json"
Akron32d53de2025-05-22 13:45:32 +02005 "testing"
6
Akronfa55bb22025-05-26 15:10:42 +02007 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akrona00d4752025-05-26 17:34:36 +02008 "github.com/KorAP/KoralPipe-TermMapper/config"
Akronfa55bb22025-05-26 15:10:42 +02009 "github.com/KorAP/KoralPipe-TermMapper/matcher"
Akron32d53de2025-05-22 13:45:32 +020010 "github.com/stretchr/testify/assert"
11 "github.com/stretchr/testify/require"
12)
13
14func TestMapper(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +020015 // Create test mapping list
16 mappingList := config.MappingList{
17 ID: "test-mapper",
18 FoundryA: "opennlp",
19 LayerA: "p",
20 FoundryB: "upos",
21 LayerB: "p",
22 Mappings: []config.MappingRule{
23 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
24 "[DET] <> [opennlp/p=DET]",
25 },
26 }
Akron32d53de2025-05-22 13:45:32 +020027
28 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +020029 m, err := NewMapper([]config.MappingList{mappingList})
Akron32d53de2025-05-22 13:45:32 +020030 require.NoError(t, err)
31
32 tests := []struct {
33 name string
34 mappingID string
35 opts MappingOptions
36 input string
37 expected string
38 expectError bool
39 }{
40 {
41 name: "Simple A to B mapping",
42 mappingID: "test-mapper",
43 opts: MappingOptions{
44 Direction: AtoB,
45 },
46 input: `{
47 "@type": "koral:token",
48 "wrap": {
49 "@type": "koral:term",
50 "foundry": "opennlp",
51 "key": "PIDAT",
52 "layer": "p",
53 "match": "match:eq"
54 }
55 }`,
56 expected: `{
57 "@type": "koral:token",
58 "wrap": {
59 "@type": "koral:termGroup",
60 "operands": [
61 {
62 "@type": "koral:term",
63 "foundry": "opennlp",
64 "key": "PIDAT",
65 "layer": "p",
66 "match": "match:eq"
67 },
68 {
69 "@type": "koral:term",
70 "foundry": "opennlp",
71 "key": "AdjType",
72 "layer": "p",
73 "match": "match:eq",
74 "value": "Pdt"
75 }
76 ],
77 "relation": "relation:and"
78 }
79 }`,
80 },
81 {
Akrona1a183f2025-05-26 17:47:33 +020082 name: "B to A direction",
Akron32d53de2025-05-22 13:45:32 +020083 mappingID: "test-mapper",
84 opts: MappingOptions{
85 Direction: BtoA,
86 },
87 input: `{
88 "@type": "koral:token",
89 "wrap": {
Akrona1a183f2025-05-26 17:47:33 +020090 "@type": "koral:term",
91 "foundry": "opennlp",
92 "key": "PIDAT",
93 "layer": "p",
94 "match": "match:eq"
Akron32d53de2025-05-22 13:45:32 +020095 }
96 }`,
97 expected: `{
98 "@type": "koral:token",
99 "wrap": {
100 "@type": "koral:term",
101 "foundry": "opennlp",
102 "key": "PIDAT",
103 "layer": "p",
104 "match": "match:eq"
105 }
106 }`,
Akrona1a183f2025-05-26 17:47:33 +0200107 expectError: false,
Akron32d53de2025-05-22 13:45:32 +0200108 },
109 {
110 name: "Mapping with foundry override",
111 mappingID: "test-mapper",
112 opts: MappingOptions{
113 Direction: AtoB,
114 FoundryB: "custom",
115 },
116 input: `{
117 "@type": "koral:token",
118 "wrap": {
119 "@type": "koral:term",
120 "foundry": "opennlp",
121 "key": "PIDAT",
122 "layer": "p",
123 "match": "match:eq"
124 }
125 }`,
126 expected: `{
127 "@type": "koral:token",
128 "wrap": {
129 "@type": "koral:termGroup",
130 "operands": [
131 {
132 "@type": "koral:term",
133 "foundry": "custom",
134 "key": "PIDAT",
135 "layer": "p",
136 "match": "match:eq"
137 },
138 {
139 "@type": "koral:term",
140 "foundry": "custom",
141 "key": "AdjType",
142 "layer": "p",
143 "match": "match:eq",
144 "value": "Pdt"
145 }
146 ],
147 "relation": "relation:and"
148 }
149 }`,
150 },
151 {
152 name: "Invalid mapping ID",
153 mappingID: "nonexistent",
154 opts: MappingOptions{
155 Direction: AtoB,
156 },
157 input: `{
158 "@type": "koral:token",
159 "wrap": {
160 "@type": "koral:term",
161 "foundry": "opennlp",
162 "key": "PIDAT",
163 "layer": "p",
164 "match": "match:eq"
165 }
166 }`,
167 expectError: true,
168 },
169 {
170 name: "Invalid direction",
171 mappingID: "test-mapper",
172 opts: MappingOptions{
Akrona1a183f2025-05-26 17:47:33 +0200173 Direction: Direction(false),
Akron32d53de2025-05-22 13:45:32 +0200174 },
175 input: `{
176 "@type": "koral:token",
177 "wrap": {
178 "@type": "koral:term",
179 "foundry": "opennlp",
180 "key": "PIDAT",
181 "layer": "p",
182 "match": "match:eq"
183 }
184 }`,
Akrona1a183f2025-05-26 17:47:33 +0200185 expected: `{
186 "@type": "koral:token",
187 "wrap": {
188 "@type": "koral:term",
189 "foundry": "opennlp",
190 "key": "PIDAT",
191 "layer": "p",
192 "match": "match:eq"
193 }
194 }`,
195 expectError: false,
Akron32d53de2025-05-22 13:45:32 +0200196 },
197 }
198
199 for _, tt := range tests {
200 t.Run(tt.name, func(t *testing.T) {
201 // Parse input JSON
202 var inputData interface{}
203 err := json.Unmarshal([]byte(tt.input), &inputData)
204 require.NoError(t, err)
205
206 // Apply mappings
Akron7b4984e2025-05-26 19:12:20 +0200207 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
Akron32d53de2025-05-22 13:45:32 +0200208 if tt.expectError {
209 assert.Error(t, err)
210 return
211 }
212 require.NoError(t, err)
213
214 // Parse expected JSON
215 var expectedData interface{}
216 err = json.Unmarshal([]byte(tt.expected), &expectedData)
217 require.NoError(t, err)
218
219 // Compare results
220 assert.Equal(t, expectedData, result)
221 })
222 }
223}
Akrond5850f82025-05-23 16:44:44 +0200224
225func TestMatchComplexPatterns(t *testing.T) {
226 tests := []struct {
227 name string
228 pattern ast.Pattern
229 replacement ast.Replacement
230 input ast.Node
231 expected ast.Node
232 }{
233 {
234 name: "Deep nested pattern with mixed operators",
235 pattern: ast.Pattern{
236 Root: &ast.TermGroup{
237 Operands: []ast.Node{
238 &ast.Term{
239 Key: "A",
240 Match: ast.MatchEqual,
241 },
242 &ast.TermGroup{
243 Operands: []ast.Node{
244 &ast.Term{
245 Key: "B",
246 Match: ast.MatchEqual,
247 },
248 &ast.TermGroup{
249 Operands: []ast.Node{
250 &ast.Term{
251 Key: "C",
252 Match: ast.MatchEqual,
253 },
254 &ast.Term{
255 Key: "D",
256 Match: ast.MatchEqual,
257 },
258 },
259 Relation: ast.AndRelation,
260 },
261 },
262 Relation: ast.OrRelation,
263 },
264 },
265 Relation: ast.AndRelation,
266 },
267 },
268 replacement: ast.Replacement{
269 Root: &ast.Term{
270 Key: "RESULT",
271 Match: ast.MatchEqual,
272 },
273 },
274 input: &ast.TermGroup{
275 Operands: []ast.Node{
276 &ast.Term{
277 Key: "A",
278 Match: ast.MatchEqual,
279 },
280 &ast.TermGroup{
281 Operands: []ast.Node{
282 &ast.Term{
283 Key: "C",
284 Match: ast.MatchEqual,
285 },
286 &ast.Term{
287 Key: "D",
288 Match: ast.MatchEqual,
289 },
290 },
291 Relation: ast.AndRelation,
292 },
293 },
294 Relation: ast.AndRelation,
295 },
296 expected: &ast.Term{
297 Key: "RESULT",
298 Match: ast.MatchEqual,
299 },
300 },
301 }
302
303 for _, tt := range tests {
304 t.Run(tt.name, func(t *testing.T) {
305 m, err := matcher.NewMatcher(tt.pattern, tt.replacement)
306 require.NoError(t, err)
307 result := m.Replace(tt.input)
308 assert.Equal(t, tt.expected, result)
309 })
310 }
311}
312
313func TestInvalidPatternReplacement(t *testing.T) {
Akrona00d4752025-05-26 17:34:36 +0200314 // Create test mapping list
315 mappingList := config.MappingList{
316 ID: "test-mapper",
317 FoundryA: "opennlp",
318 LayerA: "p",
319 FoundryB: "upos",
320 LayerB: "p",
321 Mappings: []config.MappingRule{
322 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
323 },
324 }
Akrond5850f82025-05-23 16:44:44 +0200325
326 // Create a new mapper
Akrona00d4752025-05-26 17:34:36 +0200327 m, err := NewMapper([]config.MappingList{mappingList})
Akrond5850f82025-05-23 16:44:44 +0200328 require.NoError(t, err)
329
330 tests := []struct {
331 name string
332 input string
333 expectError bool
334 errorMsg string
335 }{
336 {
337 name: "Invalid input - empty term group",
338 input: `{
339 "@type": "koral:token",
340 "wrap": {
341 "@type": "koral:termGroup",
342 "operands": [],
343 "relation": "relation:and"
344 }
345 }`,
346 expectError: true,
347 errorMsg: "failed to parse JSON into AST: error parsing wrapped node: term group must have at least one operand",
348 },
349 }
350
351 for _, tt := range tests {
352 t.Run(tt.name, func(t *testing.T) {
353 var inputData any
354 err := json.Unmarshal([]byte(tt.input), &inputData)
355 require.NoError(t, err)
356
Akron7b4984e2025-05-26 19:12:20 +0200357 result, err := m.ApplyQueryMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
Akrond5850f82025-05-23 16:44:44 +0200358 if tt.expectError {
359 assert.Error(t, err)
360 assert.Equal(t, tt.errorMsg, err.Error())
361 assert.Nil(t, result)
362 } else {
363 assert.NoError(t, err)
364 assert.NotNil(t, result)
365 }
366 })
367 }
368}
Akron7b4984e2025-05-26 19:12:20 +0200369
370func TestQueryWrapperMappings(t *testing.T) {
371
372 mappingList := config.MappingList{
373 ID: "test-wrapper",
374 FoundryA: "opennlp",
375 LayerA: "orth",
376 FoundryB: "upos",
377 LayerB: "orth",
378 Mappings: []config.MappingRule{
379 "[opennlp/orth=Baum] <> [opennlp/orth=X]",
380 },
381 }
382
383 // Create a new mapper
384 m, err := NewMapper([]config.MappingList{mappingList})
385 require.NoError(t, err)
386
387 tests := []struct {
388 name string
389 mappingID string
390 opts MappingOptions
391 input string
392 expected string
393 expectError bool
394 }{
395 {
396 name: "Query wrapper case",
397 mappingID: "test-wrapper",
398 opts: MappingOptions{
399 Direction: AtoB,
400 },
401 input: `{
402 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
403 "collection": {
404 "@type": "koral:doc",
405 "key": "availability",
406 "match": "match:eq",
407 "rewrites": [
408 {
409 "@type": "koral:rewrite",
410 "_comment": "Free corpus access policy has been added.",
411 "editor": "Kustvakt",
412 "operation": "operation:injection",
413 "src": "Kustvakt"
414 }
415 ],
416 "type": "type:regex",
417 "value": "CC.*"
418 },
419 "query": {
420 "@type": "koral:token",
421 "wrap": {
422 "@type": "koral:term",
423 "foundry": "opennlp",
424 "key": "Baum",
425 "layer": "orth",
426 "match": "match:eq"
427 }
428 }
429 }`,
430 expected: `{
431 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
432 "collection": {
433 "@type": "koral:doc",
434 "key": "availability",
435 "match": "match:eq",
436 "rewrites": [
437 {
438 "@type": "koral:rewrite",
439 "_comment": "Free corpus access policy has been added.",
440 "editor": "Kustvakt",
441 "operation": "operation:injection",
442 "src": "Kustvakt"
443 }
444 ],
445 "type": "type:regex",
446 "value": "CC.*"
447 },
448 "query": {
449 "@type": "koral:token",
450 "wrap": {
451 "@type": "koral:term",
452 "foundry": "opennlp",
453 "key": "X",
454 "layer": "orth",
455 "match": "match:eq"
456 }
457 }
458 }`,
459 },
460 {
461 name: "Empty query field",
462 mappingID: "test-wrapper",
463 opts: MappingOptions{
464 Direction: AtoB,
465 },
466 input: `{
467 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
468 "query": null
469 }`,
470 expected: `{
471 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
472 "query": null
473 }`,
474 },
475 {
476 name: "Missing query field",
477 mappingID: "test-wrapper",
478 opts: MappingOptions{
479 Direction: AtoB,
480 },
481 input: `{
482 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
483 "collection": {
484 "@type": "koral:doc"
485 }
486 }`,
487 expected: `{
488 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
489 "collection": {
490 "@type": "koral:doc"
491 }
492 }`,
493 },
494 {
495 name: "Query field with non-object value",
496 mappingID: "test-wrapper",
497 opts: MappingOptions{
498 Direction: AtoB,
499 },
500 input: `{
501 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
502 "query": "invalid"
503 }`,
504 expected: `{
505 "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
506 "query": "invalid"
507 }`,
508 },
509 }
510
511 for _, tt := range tests {
512 t.Run(tt.name, func(t *testing.T) {
513 // Parse input JSON
514 var inputData interface{}
515 err := json.Unmarshal([]byte(tt.input), &inputData)
516 require.NoError(t, err)
517
518 // Apply mappings
519 result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
520 if tt.expectError {
521 assert.Error(t, err)
522 return
523 }
524 require.NoError(t, err)
525
526 // Parse expected JSON
527 var expectedData interface{}
528 err = json.Unmarshal([]byte(tt.expected), &expectedData)
529 require.NoError(t, err)
530
531 // Compare results
532 assert.Equal(t, expectedData, result)
533 })
534 }
535}