blob: 6519f82c1d72883b6e20cc5eb5eb97a91c226b7c [file] [log] [blame]
Akron57ee5582025-05-21 15:25:13 +02001package config
2
3import (
4 "os"
5 "testing"
6
Akronfa55bb22025-05-26 15:10:42 +02007 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akron57ee5582025-05-21 15:25:13 +02008 "github.com/stretchr/testify/assert"
9 "github.com/stretchr/testify/require"
10)
11
12func TestLoadConfig(t *testing.T) {
13 // Create a temporary YAML file
14 content := `
15- id: opennlp-mapper
16 foundryA: opennlp
17 layerA: p
18 foundryB: upos
19 layerB: p
20 mappings:
21 - "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]"
22 - "[PAV] <> [ADV & PronType:Dem]"
23
24- id: simple-mapper
25 mappings:
26 - "[A] <> [B]"
27`
28 tmpfile, err := os.CreateTemp("", "config-*.yaml")
29 require.NoError(t, err)
30 defer os.Remove(tmpfile.Name())
31
32 _, err = tmpfile.WriteString(content)
33 require.NoError(t, err)
34 err = tmpfile.Close()
35 require.NoError(t, err)
36
37 // Test loading the configuration
38 config, err := LoadConfig(tmpfile.Name())
39 require.NoError(t, err)
40
41 // Verify the configuration
42 require.Len(t, config.Lists, 2)
43
44 // Check first mapping list
45 list1 := config.Lists[0]
46 assert.Equal(t, "opennlp-mapper", list1.ID)
47 assert.Equal(t, "opennlp", list1.FoundryA)
48 assert.Equal(t, "p", list1.LayerA)
49 assert.Equal(t, "upos", list1.FoundryB)
50 assert.Equal(t, "p", list1.LayerB)
51 require.Len(t, list1.Mappings, 2)
52 assert.Equal(t, "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]", string(list1.Mappings[0]))
53 assert.Equal(t, "[PAV] <> [ADV & PronType:Dem]", string(list1.Mappings[1]))
54
55 // Check second mapping list
56 list2 := config.Lists[1]
57 assert.Equal(t, "simple-mapper", list2.ID)
58 assert.Empty(t, list2.FoundryA)
59 assert.Empty(t, list2.LayerA)
60 assert.Empty(t, list2.FoundryB)
61 assert.Empty(t, list2.LayerB)
62 require.Len(t, list2.Mappings, 1)
63 assert.Equal(t, "[A] <> [B]", string(list2.Mappings[0]))
64}
65
66func TestParseMappings(t *testing.T) {
67 list := &MappingList{
68 ID: "test-mapper",
69 FoundryA: "opennlp",
70 LayerA: "p",
71 FoundryB: "upos",
72 LayerB: "p",
73 Mappings: []MappingRule{
74 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
75 },
76 }
77
78 results, err := list.ParseMappings()
79 require.NoError(t, err)
80 require.Len(t, results, 1)
81
82 // Check the parsed upper pattern
83 upper := results[0].Upper
84 require.NotNil(t, upper)
85 require.IsType(t, &ast.Token{}, upper)
86 upperTerm := upper.Wrap.(*ast.Term)
87 assert.Equal(t, "opennlp", upperTerm.Foundry)
88 assert.Equal(t, "p", upperTerm.Layer)
89 assert.Equal(t, "PIDAT", upperTerm.Key)
90
91 // Check the parsed lower pattern
92 lower := results[0].Lower
93 require.NotNil(t, lower)
94 require.IsType(t, &ast.Token{}, lower)
95 lowerGroup := lower.Wrap.(*ast.TermGroup)
96 require.Len(t, lowerGroup.Operands, 2)
97 assert.Equal(t, ast.AndRelation, lowerGroup.Relation)
98
99 // Check first operand
100 term1 := lowerGroup.Operands[0].(*ast.Term)
101 assert.Equal(t, "opennlp", term1.Foundry)
102 assert.Equal(t, "p", term1.Layer)
103 assert.Equal(t, "PIDAT", term1.Key)
104
105 // Check second operand
106 term2 := lowerGroup.Operands[1].(*ast.Term)
107 assert.Equal(t, "opennlp", term2.Foundry)
108 assert.Equal(t, "p", term2.Layer)
109 assert.Equal(t, "AdjType", term2.Key)
110 assert.Equal(t, "Pdt", term2.Value)
111}
112
113func TestLoadConfigValidation(t *testing.T) {
114 tests := []struct {
115 name string
116 content string
117 wantErr string
118 }{
119 {
120 name: "Missing ID",
121 content: `
122- foundryA: opennlp
123 mappings:
124 - "[A] <> [B]"
125`,
126 wantErr: "mapping list at index 0 is missing an ID",
127 },
128 {
129 name: "Empty mappings",
130 content: `
131- id: test
132 foundryA: opennlp
133 mappings: []
134`,
135 wantErr: "mapping list 'test' has no mapping rules",
136 },
137 {
138 name: "Empty rule",
139 content: `
140- id: test
141 mappings:
142 - ""
143`,
144 wantErr: "mapping list 'test' rule at index 0 is empty",
145 },
146 }
147
148 for _, tt := range tests {
149 t.Run(tt.name, func(t *testing.T) {
150 tmpfile, err := os.CreateTemp("", "config-*.yaml")
151 require.NoError(t, err)
152 defer os.Remove(tmpfile.Name())
153
154 _, err = tmpfile.WriteString(tt.content)
155 require.NoError(t, err)
156 err = tmpfile.Close()
157 require.NoError(t, err)
158
159 _, err = LoadConfig(tmpfile.Name())
160 require.Error(t, err)
161 assert.Contains(t, err.Error(), tt.wantErr)
162 })
163 }
164}
Akrona5d88142025-05-22 14:42:09 +0200165
166func TestLoadConfigEdgeCases(t *testing.T) {
167 tests := []struct {
168 name string
169 content string
170 wantErr string
171 }{
172 {
173 name: "Duplicate mapping list IDs",
174 content: `
175- id: test
176 mappings:
177 - "[A] <> [B]"
178- id: test
179 mappings:
180 - "[C] <> [D]"`,
181 wantErr: "duplicate mapping list ID found: test",
182 },
183 {
184 name: "Invalid YAML syntax",
185 content: `
186- id: test
187 mappings:
188 - [A] <> [B] # Unquoted special characters
189`,
190 wantErr: "yaml",
191 },
192 {
193 name: "Empty file",
194 content: "",
195 wantErr: "EOF",
196 },
197 {
198 name: "Non-list YAML",
199 content: `
200id: test
201mappings:
202 - "[A] <> [B]"`,
203 wantErr: "cannot unmarshal",
204 },
205 {
206 name: "Missing required fields",
207 content: `
208- mappings:
209 - "[A] <> [B]"
210- id: test2
211 foundryA: opennlp`,
212 wantErr: "missing an ID",
213 },
214 {
215 name: "Empty mappings list",
216 content: `
217- id: test
218 foundryA: opennlp
219 mappings: []`,
220 wantErr: "has no mapping rules",
221 },
222 {
223 name: "Null values in optional fields",
224 content: `
225- id: test
226 foundryA: null
227 layerA: null
228 foundryB: null
229 layerB: null
230 mappings:
231 - "[A] <> [B]"`,
232 wantErr: "",
233 },
234 {
235 name: "Special characters in IDs",
236 content: `
237- id: "test/special@chars#1"
238 mappings:
239 - "[A] <> [B]"`,
240 wantErr: "",
241 },
242 {
243 name: "Unicode characters in mappings",
244 content: `
245- id: test
246 mappings:
247 - "[ß] <> [ss]"
248 - "[é] <> [e]"`,
249 wantErr: "",
250 },
251 }
252
253 for _, tt := range tests {
254 t.Run(tt.name, func(t *testing.T) {
255 tmpfile, err := os.CreateTemp("", "config-*.yaml")
256 require.NoError(t, err)
257 defer os.Remove(tmpfile.Name())
258
259 _, err = tmpfile.WriteString(tt.content)
260 require.NoError(t, err)
261 err = tmpfile.Close()
262 require.NoError(t, err)
263
264 config, err := LoadConfig(tmpfile.Name())
265 if tt.wantErr != "" {
266 require.Error(t, err)
267 assert.Contains(t, err.Error(), tt.wantErr)
268 return
269 }
270 require.NoError(t, err)
271 require.NotNil(t, config)
272 })
273 }
274}
275
276func TestParseMappingsEdgeCases(t *testing.T) {
277 tests := []struct {
278 name string
279 list *MappingList
280 wantErr bool
281 errCheck func(t *testing.T, err error)
282 }{
283 {
284 name: "Empty mapping rule",
285 list: &MappingList{
286 ID: "test",
287 Mappings: []MappingRule{""},
288 },
289 wantErr: true,
290 errCheck: func(t *testing.T, err error) {
291 assert.Contains(t, err.Error(), "empty")
292 },
293 },
294 {
295 name: "Invalid mapping syntax",
296 list: &MappingList{
297 ID: "test",
298 Mappings: []MappingRule{"[A] -> [B]"},
299 },
300 wantErr: true,
301 errCheck: func(t *testing.T, err error) {
302 assert.Contains(t, err.Error(), "failed to parse")
303 },
304 },
305 {
306 name: "Missing brackets",
307 list: &MappingList{
308 ID: "test",
309 Mappings: []MappingRule{"A <> B"},
310 },
311 wantErr: true,
312 errCheck: func(t *testing.T, err error) {
313 assert.Contains(t, err.Error(), "failed to parse")
314 },
315 },
316 {
317 name: "Complex nested expressions",
318 list: &MappingList{
319 ID: "test",
320 Mappings: []MappingRule{
321 "[A & (B | C) & (D | (E & F))] <> [X & (Y | Z)]",
322 },
323 },
324 wantErr: false,
325 },
326 {
327 name: "Multiple foundry/layer combinations",
328 list: &MappingList{
329 ID: "test",
330 Mappings: []MappingRule{
331 "[foundry1/layer1=A & foundry2/layer2=B] <> [foundry3/layer3=C]",
332 },
333 },
334 wantErr: false,
335 },
336 {
337 name: "Default foundry/layer override",
338 list: &MappingList{
339 ID: "test",
340 FoundryA: "defaultFoundry",
341 LayerA: "defaultLayer",
342 Mappings: []MappingRule{
343 "[A] <> [B]", // Should use defaults
344 },
345 },
346 wantErr: false,
347 },
348 }
349
350 for _, tt := range tests {
351 t.Run(tt.name, func(t *testing.T) {
352 results, err := tt.list.ParseMappings()
353 if tt.wantErr {
354 require.Error(t, err)
355 if tt.errCheck != nil {
356 tt.errCheck(t, err)
357 }
358 return
359 }
360 require.NoError(t, err)
361 require.NotNil(t, results)
362 })
363 }
364}
Akroncc25e932025-06-02 19:39:43 +0200365
366func TestUserProvidedMappingRules(t *testing.T) {
367 // Test the exact YAML mapping rules provided by the user
368 content := `
369- id: stts-ud
370 foundryA: opennlp
371 layerA: p
372 foundryB: upos
373 layerB: p
374 mappings:
375 - "[$\\(] <> [PUNCT & PunctType=Brck]"
376 - "[$,] <> [PUNCT & PunctType=Comm]"
377 - "[$.] <> [PUNCT & PunctType=Peri]"
378 - "[ADJA] <> [ADJ]"
379 - "[ADJD] <> [ADJ & Variant=Short]"
380 - "[ADV] <> [ADV]"
381`
382 tmpfile, err := os.CreateTemp("", "user-config-*.yaml")
383 require.NoError(t, err)
384 defer os.Remove(tmpfile.Name())
385
386 _, err = tmpfile.WriteString(content)
387 require.NoError(t, err)
388 err = tmpfile.Close()
389 require.NoError(t, err)
390
391 // Test loading the configuration
392 config, err := LoadConfig(tmpfile.Name())
393 require.NoError(t, err)
394
395 // Verify the configuration loaded correctly
396 require.Len(t, config.Lists, 1)
397 list := config.Lists[0]
398 assert.Equal(t, "stts-ud", list.ID)
399 assert.Equal(t, "opennlp", list.FoundryA)
400 assert.Equal(t, "p", list.LayerA)
401 assert.Equal(t, "upos", list.FoundryB)
402 assert.Equal(t, "p", list.LayerB)
403 require.Len(t, list.Mappings, 6)
404
405 // First, test individual mappings to isolate the issue
406 t.Run("parenthesis mapping", func(t *testing.T) {
407 singleRule := &MappingList{
408 ID: "test-paren",
409 FoundryA: "opennlp",
410 LayerA: "p",
411 FoundryB: "upos",
412 LayerB: "p",
413 Mappings: []MappingRule{"[$\\(] <> [PUNCT & PunctType=Brck]"},
414 }
415 results, err := singleRule.ParseMappings()
416 require.NoError(t, err)
417 require.Len(t, results, 1)
418
419 upperTerm := results[0].Upper.Wrap.(*ast.Term)
420 assert.Equal(t, "$(", upperTerm.Key)
421 })
422
423 t.Run("comma mapping", func(t *testing.T) {
424 singleRule := &MappingList{
425 ID: "test-comma",
426 FoundryA: "opennlp",
427 LayerA: "p",
428 FoundryB: "upos",
429 LayerB: "p",
430 Mappings: []MappingRule{"[$,] <> [PUNCT & PunctType=Comm]"},
431 }
432 results, err := singleRule.ParseMappings()
433 require.NoError(t, err)
434 require.Len(t, results, 1)
435
436 upperTerm := results[0].Upper.Wrap.(*ast.Term)
437 assert.Equal(t, "$,", upperTerm.Key)
438 })
439
440 t.Run("period mapping", func(t *testing.T) {
441 singleRule := &MappingList{
442 ID: "test-period",
443 FoundryA: "opennlp",
444 LayerA: "p",
445 FoundryB: "upos",
446 LayerB: "p",
447 Mappings: []MappingRule{"[$.] <> [PUNCT & PunctType=Peri]"},
448 }
449 results, err := singleRule.ParseMappings()
450 require.NoError(t, err)
451 require.Len(t, results, 1)
452
453 upperTerm := results[0].Upper.Wrap.(*ast.Term)
454 assert.Equal(t, "$.", upperTerm.Key)
455 })
456
457 // Test that all mapping rules can be parsed successfully
458 results, err := list.ParseMappings()
459 require.NoError(t, err)
460 require.Len(t, results, 6)
461
462 // Verify specific parsing of the special character mapping
463 // The first mapping "[$\\(] <> [PUNCT & PunctType=Brck]" should parse correctly
464 firstMapping := results[0]
465 require.NotNil(t, firstMapping.Upper)
466 upperTerm := firstMapping.Upper.Wrap.(*ast.Term)
467 assert.Equal(t, "$(", upperTerm.Key) // The actual parsed key should be "$("
468 assert.Equal(t, "opennlp", upperTerm.Foundry)
469 assert.Equal(t, "p", upperTerm.Layer)
470
471 require.NotNil(t, firstMapping.Lower)
472 lowerGroup := firstMapping.Lower.Wrap.(*ast.TermGroup)
473 require.Len(t, lowerGroup.Operands, 2)
474 assert.Equal(t, ast.AndRelation, lowerGroup.Relation)
475
476 // Check the PUNCT term
477 punctTerm := lowerGroup.Operands[0].(*ast.Term)
478 assert.Equal(t, "PUNCT", punctTerm.Key)
479 assert.Equal(t, "upos", punctTerm.Foundry)
480 assert.Equal(t, "p", punctTerm.Layer)
481
482 // Check the PunctType term
483 punctTypeTerm := lowerGroup.Operands[1].(*ast.Term)
484 assert.Equal(t, "PunctType", punctTypeTerm.Layer)
485 assert.Equal(t, "Brck", punctTypeTerm.Key)
486 assert.Equal(t, "upos", punctTypeTerm.Foundry)
487
488 // Verify the comma mapping as well
489 secondMapping := results[1]
490 upperTerm2 := secondMapping.Upper.Wrap.(*ast.Term)
491 assert.Equal(t, "$,", upperTerm2.Key)
492
493 // Verify the period mapping
494 thirdMapping := results[2]
495 upperTerm3 := thirdMapping.Upper.Wrap.(*ast.Term)
496 assert.Equal(t, "$.", upperTerm3.Key)
497
498 // Verify basic mappings without special characters
499 fourthMapping := results[3]
500 upperTerm4 := fourthMapping.Upper.Wrap.(*ast.Term)
501 assert.Equal(t, "ADJA", upperTerm4.Key)
502 lowerTerm4 := fourthMapping.Lower.Wrap.(*ast.Term)
503 assert.Equal(t, "ADJ", lowerTerm4.Key)
504}
505
506func TestExistingUposYaml(t *testing.T) {
507 // Test that the existing upos.yaml file can be parsed correctly
508 config, err := LoadConfig("../upos.yaml")
509 require.NoError(t, err)
510
511 // Verify the configuration loaded correctly
512 require.Len(t, config.Lists, 1)
513 list := config.Lists[0]
514 assert.Equal(t, "stts-ud", list.ID)
515 assert.Equal(t, "opennlp", list.FoundryA)
516 assert.Equal(t, "p", list.LayerA)
517 assert.Equal(t, "upos", list.FoundryB)
518 assert.Equal(t, "p", list.LayerB)
519 require.Len(t, list.Mappings, 54) // Should have 54 mapping rules
520
521 // Test that all mapping rules can be parsed successfully
522 results, err := list.ParseMappings()
523 require.NoError(t, err)
524 require.Len(t, results, 54)
525
526 // Test a few specific mappings to ensure they parse correctly
527
528 // Test the special character mappings
529 firstMapping := results[0] // "[$\\(] <> [PUNCT & PunctType=Brck]"
530 upperTerm := firstMapping.Upper.Wrap.(*ast.Term)
531 assert.Equal(t, "$(", upperTerm.Key)
532 assert.Equal(t, "opennlp", upperTerm.Foundry)
533 assert.Equal(t, "p", upperTerm.Layer)
534
535 lowerGroup := firstMapping.Lower.Wrap.(*ast.TermGroup)
536 require.Len(t, lowerGroup.Operands, 2)
537 assert.Equal(t, ast.AndRelation, lowerGroup.Relation)
538
539 punctTerm := lowerGroup.Operands[0].(*ast.Term)
540 assert.Equal(t, "PUNCT", punctTerm.Key)
541 assert.Equal(t, "upos", punctTerm.Foundry)
542 assert.Equal(t, "p", punctTerm.Layer)
543
544 punctTypeTerm := lowerGroup.Operands[1].(*ast.Term)
545 assert.Equal(t, "PunctType", punctTypeTerm.Layer)
546 assert.Equal(t, "Brck", punctTypeTerm.Key)
547 assert.Equal(t, "upos", punctTypeTerm.Foundry)
548
549 // Test a complex mapping with multiple attributes
550 // "[PIDAT] <> [DET & AdjType=Pdt & (PronType=Ind | PronType=Neg | PronType=Tot)]"
551 pidatMapping := results[24] // This should be the PIDAT mapping
552 pidatUpper := pidatMapping.Upper.Wrap.(*ast.Term)
553 assert.Equal(t, "PIDAT", pidatUpper.Key)
554
555 pidatLower := pidatMapping.Lower.Wrap.(*ast.TermGroup)
556 assert.Equal(t, ast.AndRelation, pidatLower.Relation)
557 require.Len(t, pidatLower.Operands, 3) // DET, AdjType=Pdt, and the parenthesized group
558
559 detTerm := pidatLower.Operands[0].(*ast.Term)
560 assert.Equal(t, "DET", detTerm.Key)
561
562 adjTypeTerm := pidatLower.Operands[1].(*ast.Term)
563 assert.Equal(t, "AdjType", adjTypeTerm.Layer)
564 assert.Equal(t, "Pdt", adjTypeTerm.Key)
565
566 // The third operand should be a nested TermGroup with OR relation
567 nestedGroup := pidatLower.Operands[2].(*ast.TermGroup)
568 assert.Equal(t, ast.OrRelation, nestedGroup.Relation)
569 require.Len(t, nestedGroup.Operands, 3) // PronType=Ind, PronType=Neg, PronType=Tot
570
571 for i, expectedValue := range []string{"Ind", "Neg", "Tot"} {
572 pronTypeTerm := nestedGroup.Operands[i].(*ast.Term)
573 assert.Equal(t, "PronType", pronTypeTerm.Layer)
574 assert.Equal(t, expectedValue, pronTypeTerm.Key)
575 }
576}
Akron06d21f02025-06-04 14:36:07 +0200577
578func TestConfigWithSdkAndServer(t *testing.T) {
579 tests := []struct {
580 name string
581 content string
582 expectedSDK string
583 expectedServer string
584 wantErr bool
585 }{
586 {
587 name: "Configuration with SDK and Server values",
588 content: `
589sdk: "https://custom.example.com/sdk.js"
590server: "https://custom.example.com/"
591lists:
592- id: test-mapper
593 foundryA: opennlp
594 layerA: p
595 foundryB: upos
596 layerB: p
597 mappings:
598 - "[A] <> [B]"
599`,
600 expectedSDK: "https://custom.example.com/sdk.js",
601 expectedServer: "https://custom.example.com/",
602 wantErr: false,
603 },
604 {
605 name: "Configuration with only SDK value",
606 content: `
607sdk: "https://custom.example.com/sdk.js"
608lists:
609- id: test-mapper
610 mappings:
611 - "[A] <> [B]"
612`,
613 expectedSDK: "https://custom.example.com/sdk.js",
614 expectedServer: "https://korap.ids-mannheim.de/", // default applied
615 wantErr: false,
616 },
617 {
618 name: "Configuration with only Server value",
619 content: `
620server: "https://custom.example.com/"
621lists:
622- id: test-mapper
623 mappings:
624 - "[A] <> [B]"
625`,
626 expectedSDK: "https://korap.ids-mannheim.de/js/korap-plugin-latest.js", // default applied
627 expectedServer: "https://custom.example.com/",
628 wantErr: false,
629 },
630 {
631 name: "Configuration without SDK and Server (old format with defaults applied)",
632 content: `
633- id: test-mapper
634 mappings:
635 - "[A] <> [B]"
636`,
637 expectedSDK: "https://korap.ids-mannheim.de/js/korap-plugin-latest.js", // default applied
638 expectedServer: "https://korap.ids-mannheim.de/", // default applied
639 wantErr: false,
640 },
641 {
642 name: "Configuration with lists field explicitly",
643 content: `
644sdk: "https://custom.example.com/sdk.js"
645server: "https://custom.example.com/"
646lists:
647- id: test-mapper-1
648 mappings:
649 - "[A] <> [B]"
650- id: test-mapper-2
651 mappings:
652 - "[C] <> [D]"
653`,
654 expectedSDK: "https://custom.example.com/sdk.js",
655 expectedServer: "https://custom.example.com/",
656 wantErr: false,
657 },
658 }
659
660 for _, tt := range tests {
661 t.Run(tt.name, func(t *testing.T) {
662 tmpfile, err := os.CreateTemp("", "config-*.yaml")
663 require.NoError(t, err)
664 defer os.Remove(tmpfile.Name())
665
666 _, err = tmpfile.WriteString(tt.content)
667 require.NoError(t, err)
668 err = tmpfile.Close()
669 require.NoError(t, err)
670
671 config, err := LoadConfig(tmpfile.Name())
672 if tt.wantErr {
673 require.Error(t, err)
674 return
675 }
676
677 require.NoError(t, err)
678 require.NotNil(t, config)
679
680 // Check SDK and Server values
681 assert.Equal(t, tt.expectedSDK, config.SDK)
682 assert.Equal(t, tt.expectedServer, config.Server)
683
684 // Ensure lists are still loaded correctly
685 require.Greater(t, len(config.Lists), 0)
686
687 // Verify first mapping list
688 firstList := config.Lists[0]
689 assert.NotEmpty(t, firstList.ID)
690 assert.Greater(t, len(firstList.Mappings), 0)
691 })
692 }
693}