blob: 088115726568a4ea58760362dcb83548f88b9cad [file] [log] [blame]
Akron57ee5582025-05-21 15:25:13 +02001package config
2
3import (
Akron7e8da932025-07-01 11:56:46 +02004 "bytes"
Akron57ee5582025-05-21 15:25:13 +02005 "os"
Akroned787d02026-05-20 12:31:07 +02006 "path/filepath"
Akron57ee5582025-05-21 15:25:13 +02007 "testing"
8
Akron2ef703c2025-07-03 15:57:42 +02009 "github.com/KorAP/Koral-Mapper/ast"
Akrona67de8f2026-02-23 17:54:26 +010010 "github.com/KorAP/Koral-Mapper/parser"
Akron7e8da932025-07-01 11:56:46 +020011 "github.com/rs/zerolog/log"
Akron57ee5582025-05-21 15:25:13 +020012 "github.com/stretchr/testify/assert"
13 "github.com/stretchr/testify/require"
14)
15
16func TestLoadConfig(t *testing.T) {
17 // Create a temporary YAML file
18 content := `
19- id: opennlp-mapper
20 foundryA: opennlp
21 layerA: p
22 foundryB: upos
23 layerB: p
24 mappings:
25 - "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]"
26 - "[PAV] <> [ADV & PronType:Dem]"
27
28- id: simple-mapper
29 mappings:
30 - "[A] <> [B]"
31`
32 tmpfile, err := os.CreateTemp("", "config-*.yaml")
33 require.NoError(t, err)
34 defer os.Remove(tmpfile.Name())
35
36 _, err = tmpfile.WriteString(content)
37 require.NoError(t, err)
38 err = tmpfile.Close()
39 require.NoError(t, err)
40
41 // Test loading the configuration
Akron585f50f2025-07-03 13:55:47 +020042 config, err := LoadFromSources(tmpfile.Name(), nil)
Akron57ee5582025-05-21 15:25:13 +020043 require.NoError(t, err)
44
45 // Verify the configuration
46 require.Len(t, config.Lists, 2)
47
48 // Check first mapping list
49 list1 := config.Lists[0]
50 assert.Equal(t, "opennlp-mapper", list1.ID)
51 assert.Equal(t, "opennlp", list1.FoundryA)
52 assert.Equal(t, "p", list1.LayerA)
53 assert.Equal(t, "upos", list1.FoundryB)
54 assert.Equal(t, "p", list1.LayerB)
55 require.Len(t, list1.Mappings, 2)
56 assert.Equal(t, "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]", string(list1.Mappings[0]))
57 assert.Equal(t, "[PAV] <> [ADV & PronType:Dem]", string(list1.Mappings[1]))
58
59 // Check second mapping list
60 list2 := config.Lists[1]
61 assert.Equal(t, "simple-mapper", list2.ID)
62 assert.Empty(t, list2.FoundryA)
63 assert.Empty(t, list2.LayerA)
64 assert.Empty(t, list2.FoundryB)
65 assert.Empty(t, list2.LayerB)
66 require.Len(t, list2.Mappings, 1)
67 assert.Equal(t, "[A] <> [B]", string(list2.Mappings[0]))
68}
69
70func TestParseMappings(t *testing.T) {
71 list := &MappingList{
72 ID: "test-mapper",
73 FoundryA: "opennlp",
74 LayerA: "p",
75 FoundryB: "upos",
76 LayerB: "p",
77 Mappings: []MappingRule{
78 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
79 },
80 }
81
82 results, err := list.ParseMappings()
83 require.NoError(t, err)
84 require.Len(t, results, 1)
85
86 // Check the parsed upper pattern
87 upper := results[0].Upper
88 require.NotNil(t, upper)
89 require.IsType(t, &ast.Token{}, upper)
90 upperTerm := upper.Wrap.(*ast.Term)
91 assert.Equal(t, "opennlp", upperTerm.Foundry)
92 assert.Equal(t, "p", upperTerm.Layer)
93 assert.Equal(t, "PIDAT", upperTerm.Key)
94
95 // Check the parsed lower pattern
96 lower := results[0].Lower
97 require.NotNil(t, lower)
98 require.IsType(t, &ast.Token{}, lower)
99 lowerGroup := lower.Wrap.(*ast.TermGroup)
100 require.Len(t, lowerGroup.Operands, 2)
101 assert.Equal(t, ast.AndRelation, lowerGroup.Relation)
102
103 // Check first operand
104 term1 := lowerGroup.Operands[0].(*ast.Term)
105 assert.Equal(t, "opennlp", term1.Foundry)
106 assert.Equal(t, "p", term1.Layer)
107 assert.Equal(t, "PIDAT", term1.Key)
108
109 // Check second operand
110 term2 := lowerGroup.Operands[1].(*ast.Term)
111 assert.Equal(t, "opennlp", term2.Foundry)
112 assert.Equal(t, "p", term2.Layer)
113 assert.Equal(t, "AdjType", term2.Key)
114 assert.Equal(t, "Pdt", term2.Value)
115}
116
117func TestLoadConfigValidation(t *testing.T) {
118 tests := []struct {
119 name string
120 content string
121 wantErr string
122 }{
123 {
124 name: "Missing ID",
125 content: `
126- foundryA: opennlp
127 mappings:
128 - "[A] <> [B]"
129`,
130 wantErr: "mapping list at index 0 is missing an ID",
131 },
132 {
133 name: "Empty mappings",
134 content: `
135- id: test
136 foundryA: opennlp
137 mappings: []
138`,
139 wantErr: "mapping list 'test' has no mapping rules",
140 },
141 {
142 name: "Empty rule",
143 content: `
144- id: test
145 mappings:
146 - ""
147`,
148 wantErr: "mapping list 'test' rule at index 0 is empty",
149 },
150 }
151
152 for _, tt := range tests {
153 t.Run(tt.name, func(t *testing.T) {
154 tmpfile, err := os.CreateTemp("", "config-*.yaml")
155 require.NoError(t, err)
156 defer os.Remove(tmpfile.Name())
157
158 _, err = tmpfile.WriteString(tt.content)
159 require.NoError(t, err)
160 err = tmpfile.Close()
161 require.NoError(t, err)
162
Akron585f50f2025-07-03 13:55:47 +0200163 _, err = LoadFromSources(tmpfile.Name(), nil)
Akron57ee5582025-05-21 15:25:13 +0200164 require.Error(t, err)
165 assert.Contains(t, err.Error(), tt.wantErr)
166 })
167 }
168}
Akrona5d88142025-05-22 14:42:09 +0200169
170func TestLoadConfigEdgeCases(t *testing.T) {
171 tests := []struct {
172 name string
173 content string
174 wantErr string
175 }{
176 {
177 name: "Duplicate mapping list IDs",
178 content: `
179- id: test
180 mappings:
181 - "[A] <> [B]"
182- id: test
183 mappings:
184 - "[C] <> [D]"`,
185 wantErr: "duplicate mapping list ID found: test",
186 },
187 {
188 name: "Invalid YAML syntax",
189 content: `
190- id: test
191 mappings:
192 - [A] <> [B] # Unquoted special characters
193`,
194 wantErr: "yaml",
195 },
196 {
197 name: "Empty file",
198 content: "",
199 wantErr: "EOF",
200 },
201 {
202 name: "Non-list YAML",
203 content: `
204id: test
205mappings:
206 - "[A] <> [B]"`,
Akron813780f2025-06-05 15:44:28 +0200207 wantErr: "no mapping lists found",
Akrona5d88142025-05-22 14:42:09 +0200208 },
209 {
210 name: "Missing required fields",
211 content: `
212- mappings:
213 - "[A] <> [B]"
214- id: test2
215 foundryA: opennlp`,
216 wantErr: "missing an ID",
217 },
218 {
219 name: "Empty mappings list",
220 content: `
221- id: test
222 foundryA: opennlp
223 mappings: []`,
224 wantErr: "has no mapping rules",
225 },
226 {
227 name: "Null values in optional fields",
228 content: `
229- id: test
230 foundryA: null
231 layerA: null
232 foundryB: null
233 layerB: null
234 mappings:
235 - "[A] <> [B]"`,
236 wantErr: "",
237 },
238 {
239 name: "Special characters in IDs",
240 content: `
241- id: "test/special@chars#1"
242 mappings:
243 - "[A] <> [B]"`,
244 wantErr: "",
245 },
246 {
247 name: "Unicode characters in mappings",
248 content: `
249- id: test
250 mappings:
251 - "[ß] <> [ss]"
252 - "[é] <> [e]"`,
253 wantErr: "",
254 },
255 }
256
257 for _, tt := range tests {
258 t.Run(tt.name, func(t *testing.T) {
259 tmpfile, err := os.CreateTemp("", "config-*.yaml")
260 require.NoError(t, err)
261 defer os.Remove(tmpfile.Name())
262
263 _, err = tmpfile.WriteString(tt.content)
264 require.NoError(t, err)
265 err = tmpfile.Close()
266 require.NoError(t, err)
267
Akron585f50f2025-07-03 13:55:47 +0200268 config, err := LoadFromSources(tmpfile.Name(), nil)
Akrona5d88142025-05-22 14:42:09 +0200269 if tt.wantErr != "" {
270 require.Error(t, err)
271 assert.Contains(t, err.Error(), tt.wantErr)
272 return
273 }
274 require.NoError(t, err)
275 require.NotNil(t, config)
276 })
277 }
278}
279
280func TestParseMappingsEdgeCases(t *testing.T) {
281 tests := []struct {
282 name string
283 list *MappingList
284 wantErr bool
285 errCheck func(t *testing.T, err error)
286 }{
287 {
288 name: "Empty mapping rule",
289 list: &MappingList{
290 ID: "test",
291 Mappings: []MappingRule{""},
292 },
293 wantErr: true,
294 errCheck: func(t *testing.T, err error) {
295 assert.Contains(t, err.Error(), "empty")
296 },
297 },
298 {
299 name: "Invalid mapping syntax",
300 list: &MappingList{
301 ID: "test",
302 Mappings: []MappingRule{"[A] -> [B]"},
303 },
304 wantErr: true,
305 errCheck: func(t *testing.T, err error) {
306 assert.Contains(t, err.Error(), "failed to parse")
307 },
308 },
309 {
310 name: "Missing brackets",
311 list: &MappingList{
312 ID: "test",
313 Mappings: []MappingRule{"A <> B"},
314 },
315 wantErr: true,
316 errCheck: func(t *testing.T, err error) {
317 assert.Contains(t, err.Error(), "failed to parse")
318 },
319 },
320 {
321 name: "Complex nested expressions",
322 list: &MappingList{
323 ID: "test",
324 Mappings: []MappingRule{
325 "[A & (B | C) & (D | (E & F))] <> [X & (Y | Z)]",
326 },
327 },
328 wantErr: false,
329 },
330 {
331 name: "Multiple foundry/layer combinations",
332 list: &MappingList{
333 ID: "test",
334 Mappings: []MappingRule{
335 "[foundry1/layer1=A & foundry2/layer2=B] <> [foundry3/layer3=C]",
336 },
337 },
338 wantErr: false,
339 },
340 {
341 name: "Default foundry/layer override",
342 list: &MappingList{
343 ID: "test",
344 FoundryA: "defaultFoundry",
345 LayerA: "defaultLayer",
346 Mappings: []MappingRule{
347 "[A] <> [B]", // Should use defaults
348 },
349 },
350 wantErr: false,
351 },
352 }
353
354 for _, tt := range tests {
355 t.Run(tt.name, func(t *testing.T) {
356 results, err := tt.list.ParseMappings()
357 if tt.wantErr {
358 require.Error(t, err)
359 if tt.errCheck != nil {
360 tt.errCheck(t, err)
361 }
362 return
363 }
364 require.NoError(t, err)
365 require.NotNil(t, results)
366 })
367 }
368}
Akroncc25e932025-06-02 19:39:43 +0200369
370func TestUserProvidedMappingRules(t *testing.T) {
371 // Test the exact YAML mapping rules provided by the user
372 content := `
373- id: stts-ud
374 foundryA: opennlp
375 layerA: p
376 foundryB: upos
377 layerB: p
378 mappings:
379 - "[$\\(] <> [PUNCT & PunctType=Brck]"
380 - "[$,] <> [PUNCT & PunctType=Comm]"
381 - "[$.] <> [PUNCT & PunctType=Peri]"
382 - "[ADJA] <> [ADJ]"
383 - "[ADJD] <> [ADJ & Variant=Short]"
384 - "[ADV] <> [ADV]"
385`
386 tmpfile, err := os.CreateTemp("", "user-config-*.yaml")
387 require.NoError(t, err)
388 defer os.Remove(tmpfile.Name())
389
390 _, err = tmpfile.WriteString(content)
391 require.NoError(t, err)
392 err = tmpfile.Close()
393 require.NoError(t, err)
394
395 // Test loading the configuration
Akron585f50f2025-07-03 13:55:47 +0200396 config, err := LoadFromSources(tmpfile.Name(), nil)
Akroncc25e932025-06-02 19:39:43 +0200397 require.NoError(t, err)
398
399 // Verify the configuration loaded correctly
400 require.Len(t, config.Lists, 1)
401 list := config.Lists[0]
402 assert.Equal(t, "stts-ud", list.ID)
403 assert.Equal(t, "opennlp", list.FoundryA)
404 assert.Equal(t, "p", list.LayerA)
405 assert.Equal(t, "upos", list.FoundryB)
406 assert.Equal(t, "p", list.LayerB)
407 require.Len(t, list.Mappings, 6)
408
409 // First, test individual mappings to isolate the issue
410 t.Run("parenthesis mapping", func(t *testing.T) {
411 singleRule := &MappingList{
412 ID: "test-paren",
413 FoundryA: "opennlp",
414 LayerA: "p",
415 FoundryB: "upos",
416 LayerB: "p",
417 Mappings: []MappingRule{"[$\\(] <> [PUNCT & PunctType=Brck]"},
418 }
419 results, err := singleRule.ParseMappings()
420 require.NoError(t, err)
421 require.Len(t, results, 1)
422
423 upperTerm := results[0].Upper.Wrap.(*ast.Term)
424 assert.Equal(t, "$(", upperTerm.Key)
425 })
426
427 t.Run("comma mapping", func(t *testing.T) {
428 singleRule := &MappingList{
429 ID: "test-comma",
430 FoundryA: "opennlp",
431 LayerA: "p",
432 FoundryB: "upos",
433 LayerB: "p",
434 Mappings: []MappingRule{"[$,] <> [PUNCT & PunctType=Comm]"},
435 }
436 results, err := singleRule.ParseMappings()
437 require.NoError(t, err)
438 require.Len(t, results, 1)
439
440 upperTerm := results[0].Upper.Wrap.(*ast.Term)
441 assert.Equal(t, "$,", upperTerm.Key)
442 })
443
444 t.Run("period mapping", func(t *testing.T) {
445 singleRule := &MappingList{
446 ID: "test-period",
447 FoundryA: "opennlp",
448 LayerA: "p",
449 FoundryB: "upos",
450 LayerB: "p",
451 Mappings: []MappingRule{"[$.] <> [PUNCT & PunctType=Peri]"},
452 }
453 results, err := singleRule.ParseMappings()
454 require.NoError(t, err)
455 require.Len(t, results, 1)
456
457 upperTerm := results[0].Upper.Wrap.(*ast.Term)
458 assert.Equal(t, "$.", upperTerm.Key)
459 })
460
461 // Test that all mapping rules can be parsed successfully
462 results, err := list.ParseMappings()
463 require.NoError(t, err)
464 require.Len(t, results, 6)
465
466 // Verify specific parsing of the special character mapping
467 // The first mapping "[$\\(] <> [PUNCT & PunctType=Brck]" should parse correctly
468 firstMapping := results[0]
469 require.NotNil(t, firstMapping.Upper)
470 upperTerm := firstMapping.Upper.Wrap.(*ast.Term)
471 assert.Equal(t, "$(", upperTerm.Key) // The actual parsed key should be "$("
472 assert.Equal(t, "opennlp", upperTerm.Foundry)
473 assert.Equal(t, "p", upperTerm.Layer)
474
475 require.NotNil(t, firstMapping.Lower)
476 lowerGroup := firstMapping.Lower.Wrap.(*ast.TermGroup)
477 require.Len(t, lowerGroup.Operands, 2)
478 assert.Equal(t, ast.AndRelation, lowerGroup.Relation)
479
480 // Check the PUNCT term
481 punctTerm := lowerGroup.Operands[0].(*ast.Term)
482 assert.Equal(t, "PUNCT", punctTerm.Key)
483 assert.Equal(t, "upos", punctTerm.Foundry)
484 assert.Equal(t, "p", punctTerm.Layer)
485
486 // Check the PunctType term
487 punctTypeTerm := lowerGroup.Operands[1].(*ast.Term)
488 assert.Equal(t, "PunctType", punctTypeTerm.Layer)
489 assert.Equal(t, "Brck", punctTypeTerm.Key)
490 assert.Equal(t, "upos", punctTypeTerm.Foundry)
491
492 // Verify the comma mapping as well
493 secondMapping := results[1]
494 upperTerm2 := secondMapping.Upper.Wrap.(*ast.Term)
495 assert.Equal(t, "$,", upperTerm2.Key)
496
497 // Verify the period mapping
498 thirdMapping := results[2]
499 upperTerm3 := thirdMapping.Upper.Wrap.(*ast.Term)
500 assert.Equal(t, "$.", upperTerm3.Key)
501
502 // Verify basic mappings without special characters
503 fourthMapping := results[3]
504 upperTerm4 := fourthMapping.Upper.Wrap.(*ast.Term)
505 assert.Equal(t, "ADJA", upperTerm4.Key)
506 lowerTerm4 := fourthMapping.Lower.Wrap.(*ast.Term)
507 assert.Equal(t, "ADJ", lowerTerm4.Key)
508}
509
Akron06d21f02025-06-04 14:36:07 +0200510func TestConfigWithSdkAndServer(t *testing.T) {
511 tests := []struct {
512 name string
513 content string
514 expectedSDK string
515 expectedServer string
516 wantErr bool
517 }{
518 {
519 name: "Configuration with SDK and Server values",
520 content: `
521sdk: "https://custom.example.com/sdk.js"
522server: "https://custom.example.com/"
523lists:
524- id: test-mapper
525 foundryA: opennlp
526 layerA: p
527 foundryB: upos
528 layerB: p
529 mappings:
530 - "[A] <> [B]"
531`,
532 expectedSDK: "https://custom.example.com/sdk.js",
533 expectedServer: "https://custom.example.com/",
534 wantErr: false,
535 },
536 {
537 name: "Configuration with only SDK value",
538 content: `
539sdk: "https://custom.example.com/sdk.js"
540lists:
541- id: test-mapper
542 mappings:
543 - "[A] <> [B]"
544`,
545 expectedSDK: "https://custom.example.com/sdk.js",
546 expectedServer: "https://korap.ids-mannheim.de/", // default applied
547 wantErr: false,
548 },
549 {
550 name: "Configuration with only Server value",
551 content: `
552server: "https://custom.example.com/"
553lists:
554- id: test-mapper
555 mappings:
556 - "[A] <> [B]"
557`,
558 expectedSDK: "https://korap.ids-mannheim.de/js/korap-plugin-latest.js", // default applied
559 expectedServer: "https://custom.example.com/",
560 wantErr: false,
561 },
562 {
563 name: "Configuration without SDK and Server (old format with defaults applied)",
564 content: `
565- id: test-mapper
566 mappings:
567 - "[A] <> [B]"
568`,
569 expectedSDK: "https://korap.ids-mannheim.de/js/korap-plugin-latest.js", // default applied
570 expectedServer: "https://korap.ids-mannheim.de/", // default applied
571 wantErr: false,
572 },
573 {
574 name: "Configuration with lists field explicitly",
575 content: `
576sdk: "https://custom.example.com/sdk.js"
577server: "https://custom.example.com/"
578lists:
579- id: test-mapper-1
580 mappings:
581 - "[A] <> [B]"
582- id: test-mapper-2
583 mappings:
584 - "[C] <> [D]"
585`,
586 expectedSDK: "https://custom.example.com/sdk.js",
587 expectedServer: "https://custom.example.com/",
588 wantErr: false,
589 },
590 }
591
592 for _, tt := range tests {
593 t.Run(tt.name, func(t *testing.T) {
594 tmpfile, err := os.CreateTemp("", "config-*.yaml")
595 require.NoError(t, err)
596 defer os.Remove(tmpfile.Name())
597
598 _, err = tmpfile.WriteString(tt.content)
599 require.NoError(t, err)
600 err = tmpfile.Close()
601 require.NoError(t, err)
602
Akron585f50f2025-07-03 13:55:47 +0200603 config, err := LoadFromSources(tmpfile.Name(), nil)
Akron06d21f02025-06-04 14:36:07 +0200604 if tt.wantErr {
605 require.Error(t, err)
606 return
607 }
608
609 require.NoError(t, err)
610 require.NotNil(t, config)
611
612 // Check SDK and Server values
613 assert.Equal(t, tt.expectedSDK, config.SDK)
614 assert.Equal(t, tt.expectedServer, config.Server)
615
616 // Ensure lists are still loaded correctly
617 require.Greater(t, len(config.Lists), 0)
618
619 // Verify first mapping list
620 firstList := config.Lists[0]
621 assert.NotEmpty(t, firstList.ID)
622 assert.Greater(t, len(firstList.Mappings), 0)
623 })
624 }
625}
Akrone1cff7c2025-06-04 18:43:32 +0200626
627func TestLoadFromSources(t *testing.T) {
628 // Create main config file
629 mainConfigContent := `
630sdk: "https://custom.example.com/sdk.js"
631server: "https://custom.example.com/"
632lists:
633- id: main-mapper
634 mappings:
635 - "[A] <> [B]"
636`
637 mainConfigFile, err := os.CreateTemp("", "main-config-*.yaml")
638 require.NoError(t, err)
639 defer os.Remove(mainConfigFile.Name())
640
641 _, err = mainConfigFile.WriteString(mainConfigContent)
642 require.NoError(t, err)
643 err = mainConfigFile.Close()
644 require.NoError(t, err)
645
646 // Create individual mapping files
647 mappingFile1Content := `
648id: mapper-1
649foundryA: opennlp
650layerA: p
651mappings:
652 - "[C] <> [D]"
653`
654 mappingFile1, err := os.CreateTemp("", "mapping1-*.yaml")
655 require.NoError(t, err)
656 defer os.Remove(mappingFile1.Name())
657
658 _, err = mappingFile1.WriteString(mappingFile1Content)
659 require.NoError(t, err)
660 err = mappingFile1.Close()
661 require.NoError(t, err)
662
663 mappingFile2Content := `
664id: mapper-2
665foundryB: upos
666layerB: p
667mappings:
668 - "[E] <> [F]"
669`
670 mappingFile2, err := os.CreateTemp("", "mapping2-*.yaml")
671 require.NoError(t, err)
672 defer os.Remove(mappingFile2.Name())
673
674 _, err = mappingFile2.WriteString(mappingFile2Content)
675 require.NoError(t, err)
676 err = mappingFile2.Close()
677 require.NoError(t, err)
678
679 tests := []struct {
680 name string
681 configFile string
682 mappingFiles []string
683 wantErr bool
684 expectedIDs []string
685 }{
686 {
687 name: "Main config only",
688 configFile: mainConfigFile.Name(),
689 mappingFiles: []string{},
690 wantErr: false,
691 expectedIDs: []string{"main-mapper"},
692 },
693 {
694 name: "Mapping files only",
695 configFile: "",
696 mappingFiles: []string{mappingFile1.Name(), mappingFile2.Name()},
697 wantErr: false,
698 expectedIDs: []string{"mapper-1", "mapper-2"},
699 },
700 {
701 name: "Main config and mapping files",
702 configFile: mainConfigFile.Name(),
703 mappingFiles: []string{mappingFile1.Name(), mappingFile2.Name()},
704 wantErr: false,
705 expectedIDs: []string{"main-mapper", "mapper-1", "mapper-2"},
706 },
707 {
708 name: "No configuration sources",
709 configFile: "",
710 mappingFiles: []string{},
711 wantErr: true,
712 },
713 }
714
715 for _, tt := range tests {
716 t.Run(tt.name, func(t *testing.T) {
717 config, err := LoadFromSources(tt.configFile, tt.mappingFiles)
718 if tt.wantErr {
719 require.Error(t, err)
720 return
721 }
722
723 require.NoError(t, err)
724 require.NotNil(t, config)
725
726 // Check that all expected mapping IDs are present
727 require.Len(t, config.Lists, len(tt.expectedIDs))
728 actualIDs := make([]string, len(config.Lists))
729 for i, list := range config.Lists {
730 actualIDs[i] = list.ID
731 }
732 for _, expectedID := range tt.expectedIDs {
733 assert.Contains(t, actualIDs, expectedID)
734 }
735
736 // Check that SDK and Server are set (either from config or defaults)
737 assert.NotEmpty(t, config.SDK)
Akron43fb1022026-02-20 11:38:49 +0100738 assert.NotEmpty(t, config.Stylesheet)
Akrone1cff7c2025-06-04 18:43:32 +0200739 assert.NotEmpty(t, config.Server)
740 })
741 }
742}
743
744func TestLoadFromSourcesWithDefaults(t *testing.T) {
745 // Test that defaults are applied when loading only mapping files
746 mappingFileContent := `
747id: test-mapper
748mappings:
749 - "[A] <> [B]"
750`
751 mappingFile, err := os.CreateTemp("", "mapping-*.yaml")
752 require.NoError(t, err)
753 defer os.Remove(mappingFile.Name())
754
755 _, err = mappingFile.WriteString(mappingFileContent)
756 require.NoError(t, err)
757 err = mappingFile.Close()
758 require.NoError(t, err)
759
760 config, err := LoadFromSources("", []string{mappingFile.Name()})
761 require.NoError(t, err)
762
763 // Check that defaults are applied
764 assert.Equal(t, defaultSDK, config.SDK)
Akron43fb1022026-02-20 11:38:49 +0100765 assert.Equal(t, defaultStylesheet, config.Stylesheet)
Akrone1cff7c2025-06-04 18:43:32 +0200766 assert.Equal(t, defaultServer, config.Server)
767 require.Len(t, config.Lists, 1)
768 assert.Equal(t, "test-mapper", config.Lists[0].ID)
769}
770
771func TestLoadFromSourcesDuplicateIDs(t *testing.T) {
Akron7e8da932025-07-01 11:56:46 +0200772 // Set up a buffer to capture log output
773 var buf bytes.Buffer
774 originalLogger := log.Logger
775 defer func() {
776 log.Logger = originalLogger
777 }()
778 log.Logger = log.Logger.Output(&buf)
779
Akrone1cff7c2025-06-04 18:43:32 +0200780 // Create config with duplicate IDs across sources
781 configContent := `
782lists:
783- id: duplicate-id
784 mappings:
785 - "[A] <> [B]"
786`
787 configFile, err := os.CreateTemp("", "config-*.yaml")
788 require.NoError(t, err)
789 defer os.Remove(configFile.Name())
790
791 _, err = configFile.WriteString(configContent)
792 require.NoError(t, err)
793 err = configFile.Close()
794 require.NoError(t, err)
795
796 mappingContent := `
797id: duplicate-id
798mappings:
799 - "[C] <> [D]"
800`
801 mappingFile, err := os.CreateTemp("", "mapping-*.yaml")
802 require.NoError(t, err)
803 defer os.Remove(mappingFile.Name())
804
805 _, err = mappingFile.WriteString(mappingContent)
806 require.NoError(t, err)
807 err = mappingFile.Close()
808 require.NoError(t, err)
809
Akron7e8da932025-07-01 11:56:46 +0200810 // The function should now succeed but log the duplicate ID error
811 config, err := LoadFromSources(configFile.Name(), []string{mappingFile.Name()})
812 require.NoError(t, err)
813 require.NotNil(t, config)
814
815 // Check that the duplicate ID error was logged
816 logOutput := buf.String()
817 assert.Contains(t, logOutput, "Duplicate mapping list ID found")
818 assert.Contains(t, logOutput, "duplicate-id")
819
820 // Only the first mapping list (from config file) should be loaded
821 require.Len(t, config.Lists, 1)
822 assert.Equal(t, "duplicate-id", config.Lists[0].ID)
823 // Check that it's the one from the config file (has mapping "[A] <> [B]")
824 assert.Equal(t, "[A] <> [B]", string(config.Lists[0].Mappings[0]))
Akrone1cff7c2025-06-04 18:43:32 +0200825}
Akron813780f2025-06-05 15:44:28 +0200826
827func TestLoadFromSourcesConfigWithOnlyPort(t *testing.T) {
828 // Create config file with only port (no lists)
829 configContent := `
830port: 8080
831loglevel: debug
832`
833 configFile, err := os.CreateTemp("", "config-*.yaml")
834 require.NoError(t, err)
835 defer os.Remove(configFile.Name())
836
837 _, err = configFile.WriteString(configContent)
838 require.NoError(t, err)
839 err = configFile.Close()
840 require.NoError(t, err)
841
842 // Create mapping file
843 mappingContent := `
844id: test-mapper
845mappings:
846 - "[A] <> [B]"
847`
848 mappingFile, err := os.CreateTemp("", "mapping-*.yaml")
849 require.NoError(t, err)
850 defer os.Remove(mappingFile.Name())
851
852 _, err = mappingFile.WriteString(mappingContent)
853 require.NoError(t, err)
854 err = mappingFile.Close()
855 require.NoError(t, err)
856
857 // This should work: config file has only port, mapping file provides the mapping list
858 config, err := LoadFromSources(configFile.Name(), []string{mappingFile.Name()})
859 require.NoError(t, err)
860 require.NotNil(t, config)
861
862 // Check that port and log level from config file are preserved
863 assert.Equal(t, 8080, config.Port)
864 assert.Equal(t, "debug", config.LogLevel)
865
866 // Check that mapping from mapping file is loaded
867 require.Len(t, config.Lists, 1)
868 assert.Equal(t, "test-mapper", config.Lists[0].ID)
869
870 // Check that defaults are applied for other fields
871 assert.Equal(t, defaultSDK, config.SDK)
Akron43fb1022026-02-20 11:38:49 +0100872 assert.Equal(t, defaultStylesheet, config.Stylesheet)
Akron813780f2025-06-05 15:44:28 +0200873 assert.Equal(t, defaultServer, config.Server)
874 assert.Equal(t, defaultServiceURL, config.ServiceURL)
875}
Akron2f93c582026-02-19 16:49:13 +0100876
877func TestCorpusMappingListType(t *testing.T) {
878 content := `
879lists:
880- id: corpus-class-mapping
881 type: corpus
882 desc: Maps textClass values to genre field
883 mappings:
884 - "textClass=novel <> genre=fiction"
885 - "textClass=science <> genre=nonfiction"
886- id: annotation-mapper
887 mappings:
888 - "[A] <> [B]"
889`
890 tmpfile, err := os.CreateTemp("", "config-corpus-*.yaml")
891 require.NoError(t, err)
892 defer os.Remove(tmpfile.Name())
893
894 _, err = tmpfile.WriteString(content)
895 require.NoError(t, err)
896 err = tmpfile.Close()
897 require.NoError(t, err)
898
899 config, err := LoadFromSources(tmpfile.Name(), nil)
900 require.NoError(t, err)
901 require.Len(t, config.Lists, 2)
902
903 assert.Equal(t, "corpus", config.Lists[0].Type)
904 assert.True(t, config.Lists[0].IsCorpus())
905
906 assert.Equal(t, "", config.Lists[1].Type)
907 assert.False(t, config.Lists[1].IsCorpus())
908}
909
910func TestParseCorpusMappings(t *testing.T) {
911 list := &MappingList{
912 ID: "test-corpus",
913 Type: "corpus",
914 Mappings: []MappingRule{
915 "textClass=novel <> genre=fiction",
916 "(textClass=novel & pubDate=2020:geq#date) <> genre=recentfiction",
917 },
918 }
919
920 results, err := list.ParseCorpusMappings()
921 require.NoError(t, err)
922 require.Len(t, results, 2)
923
924 // Verify simple field rule
925 require.NotNil(t, results[0].Upper)
926 require.NotNil(t, results[0].Lower)
927
928 // Verify group rule
929 require.NotNil(t, results[1].Upper)
930 require.NotNil(t, results[1].Lower)
931}
932
933func TestParseCorpusMappingsErrors(t *testing.T) {
934 list := &MappingList{
935 ID: "test-corpus",
936 Type: "corpus",
937 Mappings: []MappingRule{""},
938 }
939
940 _, err := list.ParseCorpusMappings()
941 assert.Error(t, err)
942 assert.Contains(t, err.Error(), "empty corpus mapping rule")
943
944 list2 := &MappingList{
945 ID: "test-corpus",
946 Type: "corpus",
947 Mappings: []MappingRule{"invalid rule without separator"},
948 }
949
950 _, err = list2.ParseCorpusMappings()
951 assert.Error(t, err)
952 assert.Contains(t, err.Error(), "failed to parse corpus mapping rule")
953}
Akrona67de8f2026-02-23 17:54:26 +0100954
Akronf98ba282026-02-24 11:13:30 +0100955func TestApplyEnvOverrides(t *testing.T) {
956 envKeys := []string{
957 "KORAL_MAPPER_SERVER",
958 "KORAL_MAPPER_SDK",
959 "KORAL_MAPPER_STYLESHEET",
960 "KORAL_MAPPER_SERVICE_URL",
961 "KORAL_MAPPER_COOKIE_NAME",
962 "KORAL_MAPPER_PORT",
963 "KORAL_MAPPER_LOG_LEVEL",
Akronf1ca8822026-05-20 15:44:00 +0200964 "KORAL_MAPPER_ALLOW_ORIGINS",
Akronf7bba072026-05-21 12:36:19 +0200965 "KORAL_MAPPER_REWRITES",
Akronf98ba282026-02-24 11:13:30 +0100966 }
967
968 clearEnv := func() {
969 for _, key := range envKeys {
970 os.Unsetenv(key)
971 }
972 }
973
974 t.Run("ENV overrides config values", func(t *testing.T) {
975 clearEnv()
976 defer clearEnv()
977
978 cfg := &MappingConfig{
979 Server: "from-config",
980 SDK: "from-config",
981 Stylesheet: "from-config",
982 ServiceURL: "from-config",
983 CookieName: "from-config",
984 Port: 1234,
985 LogLevel: "warn",
986 }
987
988 os.Setenv("KORAL_MAPPER_SERVER", "from-env-server")
989 os.Setenv("KORAL_MAPPER_SDK", "from-env-sdk")
990 os.Setenv("KORAL_MAPPER_STYLESHEET", "from-env-style")
991 os.Setenv("KORAL_MAPPER_SERVICE_URL", "from-env-url")
992 os.Setenv("KORAL_MAPPER_COOKIE_NAME", "from-env-cookie")
993 os.Setenv("KORAL_MAPPER_PORT", "9999")
994 os.Setenv("KORAL_MAPPER_LOG_LEVEL", "debug")
995
996 ApplyEnvOverrides(cfg)
997
998 assert.Equal(t, "from-env-server", cfg.Server)
999 assert.Equal(t, "from-env-sdk", cfg.SDK)
1000 assert.Equal(t, "from-env-style", cfg.Stylesheet)
1001 assert.Equal(t, "from-env-url", cfg.ServiceURL)
1002 assert.Equal(t, "from-env-cookie", cfg.CookieName)
1003 assert.Equal(t, 9999, cfg.Port)
1004 assert.Equal(t, "debug", cfg.LogLevel)
1005 })
1006
1007 t.Run("Empty ENV does not override", func(t *testing.T) {
1008 clearEnv()
1009 defer clearEnv()
1010
1011 cfg := &MappingConfig{
1012 Server: "original-server",
1013 SDK: "original-sdk",
1014 Stylesheet: "original-style",
1015 ServiceURL: "original-url",
1016 CookieName: "original-cookie",
1017 Port: 1234,
1018 LogLevel: "info",
1019 }
1020
1021 ApplyEnvOverrides(cfg)
1022
1023 assert.Equal(t, "original-server", cfg.Server)
1024 assert.Equal(t, "original-sdk", cfg.SDK)
1025 assert.Equal(t, "original-style", cfg.Stylesheet)
1026 assert.Equal(t, "original-url", cfg.ServiceURL)
1027 assert.Equal(t, "original-cookie", cfg.CookieName)
1028 assert.Equal(t, 1234, cfg.Port)
1029 assert.Equal(t, "info", cfg.LogLevel)
1030 })
1031
1032 t.Run("Invalid port ENV is ignored", func(t *testing.T) {
1033 clearEnv()
1034 defer clearEnv()
1035
1036 cfg := &MappingConfig{Port: 5725}
1037 os.Setenv("KORAL_MAPPER_PORT", "not-a-number")
1038
1039 ApplyEnvOverrides(cfg)
1040
1041 assert.Equal(t, 5725, cfg.Port)
1042 })
1043
1044 t.Run("Partial ENV overrides", func(t *testing.T) {
1045 clearEnv()
1046 defer clearEnv()
1047
1048 cfg := &MappingConfig{
1049 Server: "from-config",
1050 SDK: "from-config",
1051 Port: 1234,
1052 LogLevel: "warn",
1053 }
1054
1055 os.Setenv("KORAL_MAPPER_SERVER", "from-env")
1056 os.Setenv("KORAL_MAPPER_PORT", "8080")
1057
1058 ApplyEnvOverrides(cfg)
1059
1060 assert.Equal(t, "from-env", cfg.Server)
1061 assert.Equal(t, "from-config", cfg.SDK)
1062 assert.Equal(t, 8080, cfg.Port)
1063 assert.Equal(t, "warn", cfg.LogLevel)
1064 })
1065}
1066
Akroned787d02026-05-20 12:31:07 +02001067func TestBasePathEnvOverride(t *testing.T) {
1068 t.Setenv("KORAL_MAPPER_BASE_PATH", "/custom/base/path")
1069
1070 cfg := &MappingConfig{BasePath: "from-config"}
1071 ApplyEnvOverrides(cfg)
1072
1073 assert.Equal(t, "/custom/base/path", cfg.BasePath)
1074}
1075
1076func TestBasePathFromYAML(t *testing.T) {
1077 content := `
1078basePath: "/opt/koralmapper"
1079lists:
1080 - id: test-mapper
1081 mappings:
1082 - "[A] <> [B]"
1083`
1084 tmpfile, err := os.CreateTemp("", "config-basepath-*.yaml")
1085 require.NoError(t, err)
1086 defer os.Remove(tmpfile.Name())
1087
1088 _, err = tmpfile.WriteString(content)
1089 require.NoError(t, err)
1090 require.NoError(t, tmpfile.Close())
1091
1092 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1093 require.NoError(t, err)
1094 assert.Equal(t, "/opt/koralmapper", cfg.BasePath)
1095}
1096
Akronf98ba282026-02-24 11:13:30 +01001097func TestEnvOverridesInLoadFromSources(t *testing.T) {
1098 envKeys := []string{
1099 "KORAL_MAPPER_SERVER",
1100 "KORAL_MAPPER_SDK",
1101 "KORAL_MAPPER_PORT",
1102 "KORAL_MAPPER_LOG_LEVEL",
1103 "KORAL_MAPPER_STYLESHEET",
1104 "KORAL_MAPPER_SERVICE_URL",
1105 "KORAL_MAPPER_COOKIE_NAME",
Akronf1ca8822026-05-20 15:44:00 +02001106 "KORAL_MAPPER_ALLOW_ORIGINS",
Akronf7bba072026-05-21 12:36:19 +02001107 "KORAL_MAPPER_REWRITES",
Akronf98ba282026-02-24 11:13:30 +01001108 }
1109 clearEnv := func() {
1110 for _, key := range envKeys {
1111 os.Unsetenv(key)
1112 }
1113 }
1114 clearEnv()
1115 defer clearEnv()
1116
1117 configContent := `
1118sdk: "https://custom.example.com/sdk.js"
1119server: "https://custom.example.com/"
1120port: 3000
1121lists:
1122- id: test-mapper
1123 mappings:
1124 - "[A] <> [B]"
1125`
1126 tmpfile, err := os.CreateTemp("", "config-env-*.yaml")
1127 require.NoError(t, err)
1128 defer os.Remove(tmpfile.Name())
1129
1130 _, err = tmpfile.WriteString(configContent)
1131 require.NoError(t, err)
1132 require.NoError(t, tmpfile.Close())
1133
1134 os.Setenv("KORAL_MAPPER_SERVER", "https://env-override.example.com/")
1135 os.Setenv("KORAL_MAPPER_PORT", "7777")
1136
1137 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1138 require.NoError(t, err)
1139
1140 // ENV overrides YAML values
1141 assert.Equal(t, "https://env-override.example.com/", cfg.Server)
1142 assert.Equal(t, 7777, cfg.Port)
1143
1144 // Non-overridden values preserved from YAML
1145 assert.Equal(t, "https://custom.example.com/sdk.js", cfg.SDK)
1146
1147 // Defaults applied for unset fields
1148 assert.Equal(t, defaultStylesheet, cfg.Stylesheet)
1149 assert.Equal(t, defaultServiceURL, cfg.ServiceURL)
1150 assert.Equal(t, defaultCookieName, cfg.CookieName)
1151 assert.Equal(t, defaultLogLevel, cfg.LogLevel)
1152}
1153
Akron8414ae52026-05-19 13:31:14 +02001154func TestRewritesYAMLField(t *testing.T) {
1155 content := `
1156lists:
1157 - id: rewrite-on
1158 rewrites: true
1159 mappings:
1160 - "[A] <> [B]"
1161 - id: rewrite-off
1162 rewrites: false
1163 mappings:
1164 - "[C] <> [D]"
1165 - id: rewrite-default
1166 mappings:
1167 - "[E] <> [F]"
1168`
1169 tmpfile, err := os.CreateTemp("", "config-rewrites-*.yaml")
1170 require.NoError(t, err)
1171 defer os.Remove(tmpfile.Name())
1172
1173 _, err = tmpfile.WriteString(content)
1174 require.NoError(t, err)
1175 require.NoError(t, tmpfile.Close())
1176
1177 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1178 require.NoError(t, err)
1179 require.Len(t, cfg.Lists, 3)
1180
Akronf7bba072026-05-21 12:36:19 +02001181 require.NotNil(t, cfg.Lists[0].Rewrites, "rewrites should be set when specified as true")
1182 assert.True(t, *cfg.Lists[0].Rewrites, "rewrites should be true when set to true")
1183 require.NotNil(t, cfg.Lists[1].Rewrites, "rewrites should be set when specified as false")
1184 assert.False(t, *cfg.Lists[1].Rewrites, "rewrites should be false when set to false")
1185 assert.Nil(t, cfg.Lists[2].Rewrites, "rewrites should be nil when not specified")
1186}
1187
1188func TestEffectiveRewrites(t *testing.T) {
1189 trueVal := true
1190 falseVal := false
1191
1192 tests := []struct {
1193 name string
1194 listRewrites *bool
1195 globalDefault bool
1196 expected bool
1197 }{
1198 {
1199 name: "nil per-list, global false",
1200 listRewrites: nil,
1201 globalDefault: false,
1202 expected: false,
1203 },
1204 {
1205 name: "nil per-list, global true",
1206 listRewrites: nil,
1207 globalDefault: true,
1208 expected: true,
1209 },
1210 {
1211 name: "per-list true, global false",
1212 listRewrites: &trueVal,
1213 globalDefault: false,
1214 expected: true,
1215 },
1216 {
1217 name: "per-list false, global true",
1218 listRewrites: &falseVal,
1219 globalDefault: true,
1220 expected: false,
1221 },
1222 }
1223
1224 for _, tt := range tests {
1225 t.Run(tt.name, func(t *testing.T) {
1226 list := &MappingList{
1227 ID: "test",
1228 Rewrites: tt.listRewrites,
1229 Mappings: []MappingRule{"[A] <> [B]"},
1230 }
1231 assert.Equal(t, tt.expected, list.EffectiveRewrites(tt.globalDefault))
1232 })
1233 }
1234}
1235
1236func TestGlobalRewritesYAMLField(t *testing.T) {
1237 content := `
1238rewrites: true
1239lists:
1240 - id: inherits-global
1241 mappings:
1242 - "[A] <> [B]"
1243 - id: overrides-global
1244 rewrites: false
1245 mappings:
1246 - "[C] <> [D]"
1247`
1248 tmpfile, err := os.CreateTemp("", "config-global-rewrites-*.yaml")
1249 require.NoError(t, err)
1250 defer os.Remove(tmpfile.Name())
1251
1252 _, err = tmpfile.WriteString(content)
1253 require.NoError(t, err)
1254 require.NoError(t, tmpfile.Close())
1255
1256 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1257 require.NoError(t, err)
1258
1259 assert.True(t, cfg.Rewrites, "global rewrites should be true")
1260
1261 assert.Nil(t, cfg.Lists[0].Rewrites, "per-list rewrites should be nil when not specified")
1262 assert.True(t, cfg.Lists[0].EffectiveRewrites(cfg.Rewrites),
1263 "list should inherit global rewrites=true")
1264
1265 require.NotNil(t, cfg.Lists[1].Rewrites)
1266 assert.False(t, *cfg.Lists[1].Rewrites,
1267 "per-list rewrites should be false when explicitly set")
1268 assert.False(t, cfg.Lists[1].EffectiveRewrites(cfg.Rewrites),
1269 "list should override global rewrites=true with per-list false")
1270}
1271
1272func TestGlobalRewritesDefaultFalse(t *testing.T) {
1273 content := `
1274lists:
1275 - id: test-mapper
1276 mappings:
1277 - "[A] <> [B]"
1278`
1279 tmpfile, err := os.CreateTemp("", "config-global-rewrites-default-*.yaml")
1280 require.NoError(t, err)
1281 defer os.Remove(tmpfile.Name())
1282
1283 _, err = tmpfile.WriteString(content)
1284 require.NoError(t, err)
1285 require.NoError(t, tmpfile.Close())
1286
1287 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1288 require.NoError(t, err)
1289
1290 assert.False(t, cfg.Rewrites, "global rewrites should default to false")
1291}
1292
1293func TestGlobalRewritesEnvOverride(t *testing.T) {
1294 t.Setenv("KORAL_MAPPER_REWRITES", "true")
1295
1296 content := `
1297lists:
1298 - id: test-mapper
1299 mappings:
1300 - "[A] <> [B]"
1301`
1302 tmpfile, err := os.CreateTemp("", "config-rewrites-env-*.yaml")
1303 require.NoError(t, err)
1304 defer os.Remove(tmpfile.Name())
1305
1306 _, err = tmpfile.WriteString(content)
1307 require.NoError(t, err)
1308 require.NoError(t, tmpfile.Close())
1309
1310 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1311 require.NoError(t, err)
1312
1313 assert.True(t, cfg.Rewrites,
1314 "KORAL_MAPPER_REWRITES=true env var should override default")
1315}
1316
1317func TestGlobalRewritesEnvOverridesYAML(t *testing.T) {
1318 t.Setenv("KORAL_MAPPER_REWRITES", "false")
1319
1320 content := `
1321rewrites: true
1322lists:
1323 - id: test-mapper
1324 mappings:
1325 - "[A] <> [B]"
1326`
1327 tmpfile, err := os.CreateTemp("", "config-rewrites-env-yaml-*.yaml")
1328 require.NoError(t, err)
1329 defer os.Remove(tmpfile.Name())
1330
1331 _, err = tmpfile.WriteString(content)
1332 require.NoError(t, err)
1333 require.NoError(t, tmpfile.Close())
1334
1335 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1336 require.NoError(t, err)
1337
1338 assert.False(t, cfg.Rewrites,
1339 "KORAL_MAPPER_REWRITES=false env var should override YAML rewrites=true")
Akron8414ae52026-05-19 13:31:14 +02001340}
1341
Akrona67de8f2026-02-23 17:54:26 +01001342func TestParseCorpusMappingsWithFieldAFieldB(t *testing.T) {
1343 list := &MappingList{
1344 ID: "test-keyed",
1345 Type: "corpus",
1346 FieldA: "wikiCat",
1347 FieldB: "textClass",
1348 Mappings: []MappingRule{
1349 "Entertainment <> ((kultur & musik) | (kultur & film))",
1350 },
1351 }
1352
1353 results, err := list.ParseCorpusMappings()
1354 require.NoError(t, err)
1355 require.Len(t, results, 1)
1356
1357 upper := results[0].Upper.(*parser.CorpusField)
1358 assert.Equal(t, "wikiCat", upper.Key)
1359 assert.Equal(t, "Entertainment", upper.Value)
1360
1361 group := results[0].Lower.(*parser.CorpusGroup)
1362 assert.Equal(t, "or", group.Operation)
1363 require.Len(t, group.Operands, 2)
1364
1365 and1 := group.Operands[0].(*parser.CorpusGroup)
1366 assert.Equal(t, "textClass", and1.Operands[0].(*parser.CorpusField).Key)
1367 assert.Equal(t, "kultur", and1.Operands[0].(*parser.CorpusField).Value)
1368 assert.Equal(t, "textClass", and1.Operands[1].(*parser.CorpusField).Key)
1369 assert.Equal(t, "musik", and1.Operands[1].(*parser.CorpusField).Value)
1370}
Akrone6767de2026-05-20 10:06:24 +02001371
1372func TestRateLimitConfigField(t *testing.T) {
1373 content := `
1374rateLimit: 50
1375lists:
1376 - id: test-mapper
1377 mappings:
1378 - "[A] <> [B]"
1379`
1380 tmpfile, err := os.CreateTemp("", "config-ratelimit-*.yaml")
1381 require.NoError(t, err)
1382 defer os.Remove(tmpfile.Name())
1383
1384 _, err = tmpfile.WriteString(content)
1385 require.NoError(t, err)
1386 require.NoError(t, tmpfile.Close())
1387
1388 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1389 require.NoError(t, err)
1390 assert.Equal(t, 50, cfg.RateLimit, "rateLimit should be loaded from YAML")
1391}
1392
1393func TestRateLimitDefaultApplied(t *testing.T) {
1394 cfg := &MappingConfig{}
1395 ApplyDefaults(cfg)
1396 assert.Equal(t, defaultRateLimit, cfg.RateLimit,
1397 "default rate limit should be applied when not specified")
1398}
1399
1400func TestRateLimitEnvOverride(t *testing.T) {
1401 t.Setenv("KORAL_MAPPER_RATE_LIMIT", "200")
1402
1403 content := `
1404rateLimit: 50
1405lists:
1406 - id: test-mapper
1407 mappings:
1408 - "[A] <> [B]"
1409`
1410 tmpfile, err := os.CreateTemp("", "config-ratelimit-env-*.yaml")
1411 require.NoError(t, err)
1412 defer os.Remove(tmpfile.Name())
1413
1414 _, err = tmpfile.WriteString(content)
1415 require.NoError(t, err)
1416 require.NoError(t, tmpfile.Close())
1417
1418 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1419 require.NoError(t, err)
1420 assert.Equal(t, 200, cfg.RateLimit,
1421 "KORAL_MAPPER_RATE_LIMIT env var should override YAML value")
1422}
Akroned787d02026-05-20 12:31:07 +02001423
Akronf1ca8822026-05-20 15:44:00 +02001424func TestAllowOriginsDefault(t *testing.T) {
1425 cfg := &MappingConfig{}
1426 ApplyDefaults(cfg)
1427 // AllowOrigins should derive from the Server default (trailing slash stripped)
1428 assert.Equal(t, "https://korap.ids-mannheim.de", cfg.AllowOrigins,
1429 "default AllowOrigins should derive from defaultServer")
1430}
1431
1432func TestAllowOriginsDerivedFromCustomServer(t *testing.T) {
1433 cfg := &MappingConfig{
1434 Server: "https://custom.example.com/",
1435 }
1436 ApplyDefaults(cfg)
1437 assert.Equal(t, "https://custom.example.com", cfg.AllowOrigins,
1438 "AllowOrigins should derive from the configured Server (trailing slash stripped)")
1439}
1440
1441func TestAllowOriginsExplicitNotOverriddenByServer(t *testing.T) {
1442 cfg := &MappingConfig{
1443 Server: "https://custom.example.com/",
1444 AllowOrigins: "https://explicit-origin.example.com",
1445 }
1446 ApplyDefaults(cfg)
1447 assert.Equal(t, "https://explicit-origin.example.com", cfg.AllowOrigins,
1448 "explicit AllowOrigins should not be overridden by Server default")
1449}
1450
1451func TestAllowOriginsFromYAML(t *testing.T) {
1452 content := `
1453allowOrigins: "https://custom.example.com,https://other.example.com"
1454lists:
1455 - id: test-mapper
1456 mappings:
1457 - "[A] <> [B]"
1458`
1459 tmpfile, err := os.CreateTemp("", "config-cors-*.yaml")
1460 require.NoError(t, err)
1461 defer os.Remove(tmpfile.Name())
1462
1463 _, err = tmpfile.WriteString(content)
1464 require.NoError(t, err)
1465 require.NoError(t, tmpfile.Close())
1466
1467 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1468 require.NoError(t, err)
1469 assert.Equal(t, "https://custom.example.com,https://other.example.com",
1470 cfg.AllowOrigins)
1471}
1472
1473func TestAllowOriginsEnvOverride(t *testing.T) {
1474 t.Setenv("KORAL_MAPPER_ALLOW_ORIGINS", "https://env-origin.example.com")
1475
1476 content := `
1477allowOrigins: "https://yaml-origin.example.com"
1478lists:
1479 - id: test-mapper
1480 mappings:
1481 - "[A] <> [B]"
1482`
1483 tmpfile, err := os.CreateTemp("", "config-cors-env-*.yaml")
1484 require.NoError(t, err)
1485 defer os.Remove(tmpfile.Name())
1486
1487 _, err = tmpfile.WriteString(content)
1488 require.NoError(t, err)
1489 require.NoError(t, tmpfile.Close())
1490
1491 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1492 require.NoError(t, err)
1493 assert.Equal(t, "https://env-origin.example.com", cfg.AllowOrigins,
1494 "KORAL_MAPPER_ALLOW_ORIGINS env var should override YAML value")
1495}
1496
Akroned787d02026-05-20 12:31:07 +02001497func TestSanitizeFilePathRejectsOutsideBase(t *testing.T) {
1498 // Set base to a specific directory and verify paths outside are rejected
1499 tmpDir, err := os.MkdirTemp("", "koral-base-*")
1500 require.NoError(t, err)
1501 defer os.RemoveAll(tmpDir)
1502
1503 origBase := AllowedBasePath
1504 defer func() { AllowedBasePath = origBase }()
1505 AllowedBasePath = tmpDir
1506
1507 tests := []struct {
1508 name string
1509 input string
1510 wantErr bool
1511 }{
1512 {
1513 name: "Path within base is accepted",
1514 input: filepath.Join(tmpDir, "config.yaml"),
1515 wantErr: false,
1516 },
1517 {
1518 name: "Path outside base is rejected",
1519 input: "/etc/passwd",
1520 wantErr: true,
1521 },
1522 {
1523 name: "Traversal escaping base and tmp is rejected",
1524 input: "/etc/passwd",
1525 wantErr: true,
1526 },
1527 {
1528 name: "Empty path is rejected",
1529 input: "",
1530 wantErr: true,
1531 },
1532 {
1533 name: "Subdirectory within base is accepted",
1534 input: filepath.Join(tmpDir, "sub", "dir", "file.yaml"),
1535 wantErr: false,
1536 },
1537 {
1538 name: "Relative path within base is rejected when CWD differs",
1539 input: "config.yaml",
1540 wantErr: true, // resolves against CWD, not base
1541 },
1542 }
1543
1544 for _, tt := range tests {
1545 t.Run(tt.name, func(t *testing.T) {
1546 result, err := sanitizeFilePath(tt.input)
1547 if tt.wantErr {
1548 require.Error(t, err)
1549 return
1550 }
1551 require.NoError(t, err)
1552 assert.True(t, filepath.IsAbs(result),
1553 "sanitized path should be absolute, got: %s", result)
1554 assert.NotContains(t, result, "..")
1555 })
1556 }
1557}
1558
1559func TestSanitizeFilePathTraversalToPasswd(t *testing.T) {
1560 // Verify /etc/passwd cannot be accessed via traversal
1561 cwd, err := os.Getwd()
1562 require.NoError(t, err)
1563
1564 origBase := AllowedBasePath
1565 defer func() { AllowedBasePath = origBase }()
1566 AllowedBasePath = cwd
1567
1568 _, err = sanitizeFilePath("../../../etc/passwd")
1569 require.Error(t, err)
1570 assert.Contains(t, err.Error(), "path traversal detected")
1571}
1572
1573func TestSanitizeFilePathWithDockerRoot(t *testing.T) {
1574 // In Docker the WORKDIR is "/" -- all absolute paths should be valid
1575 origBase := AllowedBasePath
1576 defer func() { AllowedBasePath = origBase }()
1577 AllowedBasePath = "/"
1578
1579 result, err := sanitizeFilePath("/mappings/stts-upos.yaml")
1580 require.NoError(t, err)
1581 assert.Equal(t, "/mappings/stts-upos.yaml", result)
1582
1583 // Even deeply nested paths work when base is /
1584 result, err = sanitizeFilePath("/etc/ssl/certs/ca-certificates.crt")
1585 require.NoError(t, err)
1586 assert.Equal(t, "/etc/ssl/certs/ca-certificates.crt", result)
1587}
1588
1589func TestSanitizeFilePathPrefixFalsePositive(t *testing.T) {
1590 // Ensure /home/user does not match /home/username
1591 origBase := AllowedBasePath
1592 defer func() { AllowedBasePath = origBase }()
1593 AllowedBasePath = "/home/user"
1594
1595 _, err := sanitizeFilePath("/home/username/secret.yaml")
1596 require.Error(t, err)
1597 assert.Contains(t, err.Error(), "path traversal detected")
1598}
1599
1600func TestLoadFromSourcesRejectsTraversal(t *testing.T) {
1601 origBase := AllowedBasePath
1602 defer func() { AllowedBasePath = origBase }()
1603
1604 cwd, err := os.Getwd()
1605 require.NoError(t, err)
1606 AllowedBasePath = cwd
1607
1608 // Config file traversal should be rejected
1609 _, err = LoadFromSources("../../../etc/passwd", nil)
1610 require.Error(t, err)
1611 assert.Contains(t, err.Error(), "path traversal detected")
1612
1613 // Mapping file traversal should be rejected
1614 _, err = LoadFromSources("", []string{"../../../etc/passwd"})
1615 require.Error(t, err)
1616 assert.Contains(t, err.Error(), "path traversal detected")
1617}
1618
1619func TestValidPathsStillWork(t *testing.T) {
1620 content := `
1621id: test-mapper
1622mappings:
1623 - "[A] <> [B]"
1624`
1625 tmpDir, err := os.MkdirTemp("", "koral-test-*")
1626 require.NoError(t, err)
1627 defer os.RemoveAll(tmpDir)
1628
1629 origBase := AllowedBasePath
1630 defer func() { AllowedBasePath = origBase }()
1631 AllowedBasePath = tmpDir
1632
1633 subDir := filepath.Join(tmpDir, "subdir")
1634 require.NoError(t, os.Mkdir(subDir, 0755))
1635
1636 tmpfile, err := os.CreateTemp(subDir, "mapping-*.yaml")
1637 require.NoError(t, err)
1638
1639 _, err = tmpfile.WriteString(content)
1640 require.NoError(t, err)
1641 require.NoError(t, tmpfile.Close())
1642
1643 cfg, err := LoadFromSources("", []string{tmpfile.Name()})
1644 require.NoError(t, err)
1645 require.Len(t, cfg.Lists, 1)
1646 assert.Equal(t, "test-mapper", cfg.Lists[0].ID)
1647}
1648
1649func TestRelativePathWithTraversalWithinBase(t *testing.T) {
1650 // Paths with ".." that still resolve within the base should work
1651 content := `
1652id: traversal-test-mapper
1653mappings:
1654 - "[A] <> [B]"
1655`
1656 tmpDir, err := os.MkdirTemp("", "koral-traversal-*")
1657 require.NoError(t, err)
1658 defer os.RemoveAll(tmpDir)
1659
1660 origBase := AllowedBasePath
1661 defer func() { AllowedBasePath = origBase }()
1662 AllowedBasePath = tmpDir
1663
1664 // Create file at tmpDir/config.yaml
1665 configPath := filepath.Join(tmpDir, "config.yaml")
1666 require.NoError(t, os.WriteFile(configPath, []byte(content), 0644))
1667
1668 // Reference via a traversal path: tmpDir/subdir/../config.yaml
1669 // This resolves to tmpDir/config.yaml which is within the base
1670 subDir := filepath.Join(tmpDir, "subdir")
1671 require.NoError(t, os.Mkdir(subDir, 0755))
1672 traversalPath := filepath.Join(subDir, "..", "config.yaml")
1673
1674 cfg, err := LoadFromSources("", []string{traversalPath})
1675 require.NoError(t, err)
1676 require.Len(t, cfg.Lists, 1)
1677 assert.Equal(t, "traversal-test-mapper", cfg.Lists[0].ID)
1678}