blob: bac6df039faf21c8a831799ce98da17813c6ed4c [file] [log] [blame]
Akron57ee5582025-05-21 15:25:13 +02001package config
2
3import (
Akron7e8da932025-07-01 11:56:46 +02004 "bytes"
Akron57ee5582025-05-21 15:25:13 +02005 "os"
Akroned787d02026-05-20 12:31:07 +02006 "path/filepath"
Akron57ee5582025-05-21 15:25:13 +02007 "testing"
8
Akron2ef703c2025-07-03 15:57:42 +02009 "github.com/KorAP/Koral-Mapper/ast"
Akrona67de8f2026-02-23 17:54:26 +010010 "github.com/KorAP/Koral-Mapper/parser"
Akron7e8da932025-07-01 11:56:46 +020011 "github.com/rs/zerolog/log"
Akron57ee5582025-05-21 15:25:13 +020012 "github.com/stretchr/testify/assert"
13 "github.com/stretchr/testify/require"
14)
15
16func TestLoadConfig(t *testing.T) {
17 // Create a temporary YAML file
18 content := `
19- id: opennlp-mapper
20 foundryA: opennlp
21 layerA: p
22 foundryB: upos
23 layerB: p
24 mappings:
25 - "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]"
26 - "[PAV] <> [ADV & PronType:Dem]"
27
28- id: simple-mapper
29 mappings:
30 - "[A] <> [B]"
31`
32 tmpfile, err := os.CreateTemp("", "config-*.yaml")
33 require.NoError(t, err)
34 defer os.Remove(tmpfile.Name())
35
36 _, err = tmpfile.WriteString(content)
37 require.NoError(t, err)
38 err = tmpfile.Close()
39 require.NoError(t, err)
40
41 // Test loading the configuration
Akron585f50f2025-07-03 13:55:47 +020042 config, err := LoadFromSources(tmpfile.Name(), nil)
Akron57ee5582025-05-21 15:25:13 +020043 require.NoError(t, err)
44
45 // Verify the configuration
46 require.Len(t, config.Lists, 2)
47
48 // Check first mapping list
49 list1 := config.Lists[0]
50 assert.Equal(t, "opennlp-mapper", list1.ID)
51 assert.Equal(t, "opennlp", list1.FoundryA)
52 assert.Equal(t, "p", list1.LayerA)
53 assert.Equal(t, "upos", list1.FoundryB)
54 assert.Equal(t, "p", list1.LayerB)
55 require.Len(t, list1.Mappings, 2)
56 assert.Equal(t, "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]", string(list1.Mappings[0]))
57 assert.Equal(t, "[PAV] <> [ADV & PronType:Dem]", string(list1.Mappings[1]))
58
59 // Check second mapping list
60 list2 := config.Lists[1]
61 assert.Equal(t, "simple-mapper", list2.ID)
62 assert.Empty(t, list2.FoundryA)
63 assert.Empty(t, list2.LayerA)
64 assert.Empty(t, list2.FoundryB)
65 assert.Empty(t, list2.LayerB)
66 require.Len(t, list2.Mappings, 1)
67 assert.Equal(t, "[A] <> [B]", string(list2.Mappings[0]))
68}
69
70func TestParseMappings(t *testing.T) {
71 list := &MappingList{
72 ID: "test-mapper",
73 FoundryA: "opennlp",
74 LayerA: "p",
75 FoundryB: "upos",
76 LayerB: "p",
77 Mappings: []MappingRule{
78 "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
79 },
80 }
81
82 results, err := list.ParseMappings()
83 require.NoError(t, err)
84 require.Len(t, results, 1)
85
86 // Check the parsed upper pattern
87 upper := results[0].Upper
88 require.NotNil(t, upper)
89 require.IsType(t, &ast.Token{}, upper)
90 upperTerm := upper.Wrap.(*ast.Term)
91 assert.Equal(t, "opennlp", upperTerm.Foundry)
92 assert.Equal(t, "p", upperTerm.Layer)
93 assert.Equal(t, "PIDAT", upperTerm.Key)
94
95 // Check the parsed lower pattern
96 lower := results[0].Lower
97 require.NotNil(t, lower)
98 require.IsType(t, &ast.Token{}, lower)
99 lowerGroup := lower.Wrap.(*ast.TermGroup)
100 require.Len(t, lowerGroup.Operands, 2)
101 assert.Equal(t, ast.AndRelation, lowerGroup.Relation)
102
103 // Check first operand
104 term1 := lowerGroup.Operands[0].(*ast.Term)
105 assert.Equal(t, "opennlp", term1.Foundry)
106 assert.Equal(t, "p", term1.Layer)
107 assert.Equal(t, "PIDAT", term1.Key)
108
109 // Check second operand
110 term2 := lowerGroup.Operands[1].(*ast.Term)
111 assert.Equal(t, "opennlp", term2.Foundry)
112 assert.Equal(t, "p", term2.Layer)
113 assert.Equal(t, "AdjType", term2.Key)
114 assert.Equal(t, "Pdt", term2.Value)
115}
116
117func TestLoadConfigValidation(t *testing.T) {
118 tests := []struct {
119 name string
120 content string
121 wantErr string
122 }{
123 {
124 name: "Missing ID",
125 content: `
126- foundryA: opennlp
127 mappings:
128 - "[A] <> [B]"
129`,
130 wantErr: "mapping list at index 0 is missing an ID",
131 },
132 {
133 name: "Empty mappings",
134 content: `
135- id: test
136 foundryA: opennlp
137 mappings: []
138`,
139 wantErr: "mapping list 'test' has no mapping rules",
140 },
141 {
142 name: "Empty rule",
143 content: `
144- id: test
145 mappings:
146 - ""
147`,
148 wantErr: "mapping list 'test' rule at index 0 is empty",
149 },
150 }
151
152 for _, tt := range tests {
153 t.Run(tt.name, func(t *testing.T) {
154 tmpfile, err := os.CreateTemp("", "config-*.yaml")
155 require.NoError(t, err)
156 defer os.Remove(tmpfile.Name())
157
158 _, err = tmpfile.WriteString(tt.content)
159 require.NoError(t, err)
160 err = tmpfile.Close()
161 require.NoError(t, err)
162
Akron585f50f2025-07-03 13:55:47 +0200163 _, err = LoadFromSources(tmpfile.Name(), nil)
Akron57ee5582025-05-21 15:25:13 +0200164 require.Error(t, err)
165 assert.Contains(t, err.Error(), tt.wantErr)
166 })
167 }
168}
Akrona5d88142025-05-22 14:42:09 +0200169
170func TestLoadConfigEdgeCases(t *testing.T) {
171 tests := []struct {
172 name string
173 content string
174 wantErr string
175 }{
176 {
177 name: "Duplicate mapping list IDs",
178 content: `
179- id: test
180 mappings:
181 - "[A] <> [B]"
182- id: test
183 mappings:
184 - "[C] <> [D]"`,
185 wantErr: "duplicate mapping list ID found: test",
186 },
187 {
188 name: "Invalid YAML syntax",
189 content: `
190- id: test
191 mappings:
192 - [A] <> [B] # Unquoted special characters
193`,
194 wantErr: "yaml",
195 },
196 {
197 name: "Empty file",
198 content: "",
199 wantErr: "EOF",
200 },
201 {
202 name: "Non-list YAML",
203 content: `
204id: test
205mappings:
206 - "[A] <> [B]"`,
Akron813780f2025-06-05 15:44:28 +0200207 wantErr: "no mapping lists found",
Akrona5d88142025-05-22 14:42:09 +0200208 },
209 {
210 name: "Missing required fields",
211 content: `
212- mappings:
213 - "[A] <> [B]"
214- id: test2
215 foundryA: opennlp`,
216 wantErr: "missing an ID",
217 },
218 {
219 name: "Empty mappings list",
220 content: `
221- id: test
222 foundryA: opennlp
223 mappings: []`,
224 wantErr: "has no mapping rules",
225 },
226 {
227 name: "Null values in optional fields",
228 content: `
229- id: test
230 foundryA: null
231 layerA: null
232 foundryB: null
233 layerB: null
234 mappings:
235 - "[A] <> [B]"`,
236 wantErr: "",
237 },
238 {
239 name: "Special characters in IDs",
240 content: `
241- id: "test/special@chars#1"
242 mappings:
243 - "[A] <> [B]"`,
244 wantErr: "",
245 },
246 {
247 name: "Unicode characters in mappings",
248 content: `
249- id: test
250 mappings:
251 - "[ß] <> [ss]"
252 - "[é] <> [e]"`,
253 wantErr: "",
254 },
255 }
256
257 for _, tt := range tests {
258 t.Run(tt.name, func(t *testing.T) {
259 tmpfile, err := os.CreateTemp("", "config-*.yaml")
260 require.NoError(t, err)
261 defer os.Remove(tmpfile.Name())
262
263 _, err = tmpfile.WriteString(tt.content)
264 require.NoError(t, err)
265 err = tmpfile.Close()
266 require.NoError(t, err)
267
Akron585f50f2025-07-03 13:55:47 +0200268 config, err := LoadFromSources(tmpfile.Name(), nil)
Akrona5d88142025-05-22 14:42:09 +0200269 if tt.wantErr != "" {
270 require.Error(t, err)
271 assert.Contains(t, err.Error(), tt.wantErr)
272 return
273 }
274 require.NoError(t, err)
275 require.NotNil(t, config)
276 })
277 }
278}
279
280func TestParseMappingsEdgeCases(t *testing.T) {
281 tests := []struct {
282 name string
283 list *MappingList
284 wantErr bool
285 errCheck func(t *testing.T, err error)
286 }{
287 {
288 name: "Empty mapping rule",
289 list: &MappingList{
290 ID: "test",
291 Mappings: []MappingRule{""},
292 },
293 wantErr: true,
294 errCheck: func(t *testing.T, err error) {
295 assert.Contains(t, err.Error(), "empty")
296 },
297 },
298 {
299 name: "Invalid mapping syntax",
300 list: &MappingList{
301 ID: "test",
302 Mappings: []MappingRule{"[A] -> [B]"},
303 },
304 wantErr: true,
305 errCheck: func(t *testing.T, err error) {
306 assert.Contains(t, err.Error(), "failed to parse")
307 },
308 },
309 {
310 name: "Missing brackets",
311 list: &MappingList{
312 ID: "test",
313 Mappings: []MappingRule{"A <> B"},
314 },
315 wantErr: true,
316 errCheck: func(t *testing.T, err error) {
317 assert.Contains(t, err.Error(), "failed to parse")
318 },
319 },
320 {
321 name: "Complex nested expressions",
322 list: &MappingList{
323 ID: "test",
324 Mappings: []MappingRule{
325 "[A & (B | C) & (D | (E & F))] <> [X & (Y | Z)]",
326 },
327 },
328 wantErr: false,
329 },
330 {
331 name: "Multiple foundry/layer combinations",
332 list: &MappingList{
333 ID: "test",
334 Mappings: []MappingRule{
335 "[foundry1/layer1=A & foundry2/layer2=B] <> [foundry3/layer3=C]",
336 },
337 },
338 wantErr: false,
339 },
340 {
341 name: "Default foundry/layer override",
342 list: &MappingList{
343 ID: "test",
344 FoundryA: "defaultFoundry",
345 LayerA: "defaultLayer",
346 Mappings: []MappingRule{
347 "[A] <> [B]", // Should use defaults
348 },
349 },
350 wantErr: false,
351 },
352 }
353
354 for _, tt := range tests {
355 t.Run(tt.name, func(t *testing.T) {
356 results, err := tt.list.ParseMappings()
357 if tt.wantErr {
358 require.Error(t, err)
359 if tt.errCheck != nil {
360 tt.errCheck(t, err)
361 }
362 return
363 }
364 require.NoError(t, err)
365 require.NotNil(t, results)
366 })
367 }
368}
Akroncc25e932025-06-02 19:39:43 +0200369
370func TestUserProvidedMappingRules(t *testing.T) {
371 // Test the exact YAML mapping rules provided by the user
372 content := `
373- id: stts-ud
374 foundryA: opennlp
375 layerA: p
376 foundryB: upos
377 layerB: p
378 mappings:
379 - "[$\\(] <> [PUNCT & PunctType=Brck]"
380 - "[$,] <> [PUNCT & PunctType=Comm]"
381 - "[$.] <> [PUNCT & PunctType=Peri]"
382 - "[ADJA] <> [ADJ]"
383 - "[ADJD] <> [ADJ & Variant=Short]"
384 - "[ADV] <> [ADV]"
385`
386 tmpfile, err := os.CreateTemp("", "user-config-*.yaml")
387 require.NoError(t, err)
388 defer os.Remove(tmpfile.Name())
389
390 _, err = tmpfile.WriteString(content)
391 require.NoError(t, err)
392 err = tmpfile.Close()
393 require.NoError(t, err)
394
395 // Test loading the configuration
Akron585f50f2025-07-03 13:55:47 +0200396 config, err := LoadFromSources(tmpfile.Name(), nil)
Akroncc25e932025-06-02 19:39:43 +0200397 require.NoError(t, err)
398
399 // Verify the configuration loaded correctly
400 require.Len(t, config.Lists, 1)
401 list := config.Lists[0]
402 assert.Equal(t, "stts-ud", list.ID)
403 assert.Equal(t, "opennlp", list.FoundryA)
404 assert.Equal(t, "p", list.LayerA)
405 assert.Equal(t, "upos", list.FoundryB)
406 assert.Equal(t, "p", list.LayerB)
407 require.Len(t, list.Mappings, 6)
408
409 // First, test individual mappings to isolate the issue
410 t.Run("parenthesis mapping", func(t *testing.T) {
411 singleRule := &MappingList{
412 ID: "test-paren",
413 FoundryA: "opennlp",
414 LayerA: "p",
415 FoundryB: "upos",
416 LayerB: "p",
417 Mappings: []MappingRule{"[$\\(] <> [PUNCT & PunctType=Brck]"},
418 }
419 results, err := singleRule.ParseMappings()
420 require.NoError(t, err)
421 require.Len(t, results, 1)
422
423 upperTerm := results[0].Upper.Wrap.(*ast.Term)
424 assert.Equal(t, "$(", upperTerm.Key)
425 })
426
427 t.Run("comma mapping", func(t *testing.T) {
428 singleRule := &MappingList{
429 ID: "test-comma",
430 FoundryA: "opennlp",
431 LayerA: "p",
432 FoundryB: "upos",
433 LayerB: "p",
434 Mappings: []MappingRule{"[$,] <> [PUNCT & PunctType=Comm]"},
435 }
436 results, err := singleRule.ParseMappings()
437 require.NoError(t, err)
438 require.Len(t, results, 1)
439
440 upperTerm := results[0].Upper.Wrap.(*ast.Term)
441 assert.Equal(t, "$,", upperTerm.Key)
442 })
443
444 t.Run("period mapping", func(t *testing.T) {
445 singleRule := &MappingList{
446 ID: "test-period",
447 FoundryA: "opennlp",
448 LayerA: "p",
449 FoundryB: "upos",
450 LayerB: "p",
451 Mappings: []MappingRule{"[$.] <> [PUNCT & PunctType=Peri]"},
452 }
453 results, err := singleRule.ParseMappings()
454 require.NoError(t, err)
455 require.Len(t, results, 1)
456
457 upperTerm := results[0].Upper.Wrap.(*ast.Term)
458 assert.Equal(t, "$.", upperTerm.Key)
459 })
460
461 // Test that all mapping rules can be parsed successfully
462 results, err := list.ParseMappings()
463 require.NoError(t, err)
464 require.Len(t, results, 6)
465
466 // Verify specific parsing of the special character mapping
467 // The first mapping "[$\\(] <> [PUNCT & PunctType=Brck]" should parse correctly
468 firstMapping := results[0]
469 require.NotNil(t, firstMapping.Upper)
470 upperTerm := firstMapping.Upper.Wrap.(*ast.Term)
471 assert.Equal(t, "$(", upperTerm.Key) // The actual parsed key should be "$("
472 assert.Equal(t, "opennlp", upperTerm.Foundry)
473 assert.Equal(t, "p", upperTerm.Layer)
474
475 require.NotNil(t, firstMapping.Lower)
476 lowerGroup := firstMapping.Lower.Wrap.(*ast.TermGroup)
477 require.Len(t, lowerGroup.Operands, 2)
478 assert.Equal(t, ast.AndRelation, lowerGroup.Relation)
479
480 // Check the PUNCT term
481 punctTerm := lowerGroup.Operands[0].(*ast.Term)
482 assert.Equal(t, "PUNCT", punctTerm.Key)
483 assert.Equal(t, "upos", punctTerm.Foundry)
484 assert.Equal(t, "p", punctTerm.Layer)
485
486 // Check the PunctType term
487 punctTypeTerm := lowerGroup.Operands[1].(*ast.Term)
488 assert.Equal(t, "PunctType", punctTypeTerm.Layer)
489 assert.Equal(t, "Brck", punctTypeTerm.Key)
490 assert.Equal(t, "upos", punctTypeTerm.Foundry)
491
492 // Verify the comma mapping as well
493 secondMapping := results[1]
494 upperTerm2 := secondMapping.Upper.Wrap.(*ast.Term)
495 assert.Equal(t, "$,", upperTerm2.Key)
496
497 // Verify the period mapping
498 thirdMapping := results[2]
499 upperTerm3 := thirdMapping.Upper.Wrap.(*ast.Term)
500 assert.Equal(t, "$.", upperTerm3.Key)
501
502 // Verify basic mappings without special characters
503 fourthMapping := results[3]
504 upperTerm4 := fourthMapping.Upper.Wrap.(*ast.Term)
505 assert.Equal(t, "ADJA", upperTerm4.Key)
506 lowerTerm4 := fourthMapping.Lower.Wrap.(*ast.Term)
507 assert.Equal(t, "ADJ", lowerTerm4.Key)
508}
509
Akron06d21f02025-06-04 14:36:07 +0200510func TestConfigWithSdkAndServer(t *testing.T) {
511 tests := []struct {
512 name string
513 content string
514 expectedSDK string
515 expectedServer string
516 wantErr bool
517 }{
518 {
519 name: "Configuration with SDK and Server values",
520 content: `
521sdk: "https://custom.example.com/sdk.js"
522server: "https://custom.example.com/"
523lists:
524- id: test-mapper
525 foundryA: opennlp
526 layerA: p
527 foundryB: upos
528 layerB: p
529 mappings:
530 - "[A] <> [B]"
531`,
532 expectedSDK: "https://custom.example.com/sdk.js",
533 expectedServer: "https://custom.example.com/",
534 wantErr: false,
535 },
536 {
537 name: "Configuration with only SDK value",
538 content: `
539sdk: "https://custom.example.com/sdk.js"
540lists:
541- id: test-mapper
542 mappings:
543 - "[A] <> [B]"
544`,
545 expectedSDK: "https://custom.example.com/sdk.js",
546 expectedServer: "https://korap.ids-mannheim.de/", // default applied
547 wantErr: false,
548 },
549 {
550 name: "Configuration with only Server value",
551 content: `
552server: "https://custom.example.com/"
553lists:
554- id: test-mapper
555 mappings:
556 - "[A] <> [B]"
557`,
558 expectedSDK: "https://korap.ids-mannheim.de/js/korap-plugin-latest.js", // default applied
559 expectedServer: "https://custom.example.com/",
560 wantErr: false,
561 },
562 {
563 name: "Configuration without SDK and Server (old format with defaults applied)",
564 content: `
565- id: test-mapper
566 mappings:
567 - "[A] <> [B]"
568`,
569 expectedSDK: "https://korap.ids-mannheim.de/js/korap-plugin-latest.js", // default applied
570 expectedServer: "https://korap.ids-mannheim.de/", // default applied
571 wantErr: false,
572 },
573 {
574 name: "Configuration with lists field explicitly",
575 content: `
576sdk: "https://custom.example.com/sdk.js"
577server: "https://custom.example.com/"
578lists:
579- id: test-mapper-1
580 mappings:
581 - "[A] <> [B]"
582- id: test-mapper-2
583 mappings:
584 - "[C] <> [D]"
585`,
586 expectedSDK: "https://custom.example.com/sdk.js",
587 expectedServer: "https://custom.example.com/",
588 wantErr: false,
589 },
590 }
591
592 for _, tt := range tests {
593 t.Run(tt.name, func(t *testing.T) {
594 tmpfile, err := os.CreateTemp("", "config-*.yaml")
595 require.NoError(t, err)
596 defer os.Remove(tmpfile.Name())
597
598 _, err = tmpfile.WriteString(tt.content)
599 require.NoError(t, err)
600 err = tmpfile.Close()
601 require.NoError(t, err)
602
Akron585f50f2025-07-03 13:55:47 +0200603 config, err := LoadFromSources(tmpfile.Name(), nil)
Akron06d21f02025-06-04 14:36:07 +0200604 if tt.wantErr {
605 require.Error(t, err)
606 return
607 }
608
609 require.NoError(t, err)
610 require.NotNil(t, config)
611
612 // Check SDK and Server values
613 assert.Equal(t, tt.expectedSDK, config.SDK)
614 assert.Equal(t, tt.expectedServer, config.Server)
615
616 // Ensure lists are still loaded correctly
617 require.Greater(t, len(config.Lists), 0)
618
619 // Verify first mapping list
620 firstList := config.Lists[0]
621 assert.NotEmpty(t, firstList.ID)
622 assert.Greater(t, len(firstList.Mappings), 0)
623 })
624 }
625}
Akrone1cff7c2025-06-04 18:43:32 +0200626
627func TestLoadFromSources(t *testing.T) {
628 // Create main config file
629 mainConfigContent := `
630sdk: "https://custom.example.com/sdk.js"
631server: "https://custom.example.com/"
632lists:
633- id: main-mapper
634 mappings:
635 - "[A] <> [B]"
636`
637 mainConfigFile, err := os.CreateTemp("", "main-config-*.yaml")
638 require.NoError(t, err)
639 defer os.Remove(mainConfigFile.Name())
640
641 _, err = mainConfigFile.WriteString(mainConfigContent)
642 require.NoError(t, err)
643 err = mainConfigFile.Close()
644 require.NoError(t, err)
645
646 // Create individual mapping files
647 mappingFile1Content := `
648id: mapper-1
649foundryA: opennlp
650layerA: p
651mappings:
652 - "[C] <> [D]"
653`
654 mappingFile1, err := os.CreateTemp("", "mapping1-*.yaml")
655 require.NoError(t, err)
656 defer os.Remove(mappingFile1.Name())
657
658 _, err = mappingFile1.WriteString(mappingFile1Content)
659 require.NoError(t, err)
660 err = mappingFile1.Close()
661 require.NoError(t, err)
662
663 mappingFile2Content := `
664id: mapper-2
665foundryB: upos
666layerB: p
667mappings:
668 - "[E] <> [F]"
669`
670 mappingFile2, err := os.CreateTemp("", "mapping2-*.yaml")
671 require.NoError(t, err)
672 defer os.Remove(mappingFile2.Name())
673
674 _, err = mappingFile2.WriteString(mappingFile2Content)
675 require.NoError(t, err)
676 err = mappingFile2.Close()
677 require.NoError(t, err)
678
679 tests := []struct {
680 name string
681 configFile string
682 mappingFiles []string
683 wantErr bool
684 expectedIDs []string
685 }{
686 {
687 name: "Main config only",
688 configFile: mainConfigFile.Name(),
689 mappingFiles: []string{},
690 wantErr: false,
691 expectedIDs: []string{"main-mapper"},
692 },
693 {
694 name: "Mapping files only",
695 configFile: "",
696 mappingFiles: []string{mappingFile1.Name(), mappingFile2.Name()},
697 wantErr: false,
698 expectedIDs: []string{"mapper-1", "mapper-2"},
699 },
700 {
701 name: "Main config and mapping files",
702 configFile: mainConfigFile.Name(),
703 mappingFiles: []string{mappingFile1.Name(), mappingFile2.Name()},
704 wantErr: false,
705 expectedIDs: []string{"main-mapper", "mapper-1", "mapper-2"},
706 },
707 {
708 name: "No configuration sources",
709 configFile: "",
710 mappingFiles: []string{},
711 wantErr: true,
712 },
713 }
714
715 for _, tt := range tests {
716 t.Run(tt.name, func(t *testing.T) {
717 config, err := LoadFromSources(tt.configFile, tt.mappingFiles)
718 if tt.wantErr {
719 require.Error(t, err)
720 return
721 }
722
723 require.NoError(t, err)
724 require.NotNil(t, config)
725
726 // Check that all expected mapping IDs are present
727 require.Len(t, config.Lists, len(tt.expectedIDs))
728 actualIDs := make([]string, len(config.Lists))
729 for i, list := range config.Lists {
730 actualIDs[i] = list.ID
731 }
732 for _, expectedID := range tt.expectedIDs {
733 assert.Contains(t, actualIDs, expectedID)
734 }
735
736 // Check that SDK and Server are set (either from config or defaults)
737 assert.NotEmpty(t, config.SDK)
Akron43fb1022026-02-20 11:38:49 +0100738 assert.NotEmpty(t, config.Stylesheet)
Akrone1cff7c2025-06-04 18:43:32 +0200739 assert.NotEmpty(t, config.Server)
740 })
741 }
742}
743
744func TestLoadFromSourcesWithDefaults(t *testing.T) {
745 // Test that defaults are applied when loading only mapping files
746 mappingFileContent := `
747id: test-mapper
748mappings:
749 - "[A] <> [B]"
750`
751 mappingFile, err := os.CreateTemp("", "mapping-*.yaml")
752 require.NoError(t, err)
753 defer os.Remove(mappingFile.Name())
754
755 _, err = mappingFile.WriteString(mappingFileContent)
756 require.NoError(t, err)
757 err = mappingFile.Close()
758 require.NoError(t, err)
759
760 config, err := LoadFromSources("", []string{mappingFile.Name()})
761 require.NoError(t, err)
762
763 // Check that defaults are applied
764 assert.Equal(t, defaultSDK, config.SDK)
Akron43fb1022026-02-20 11:38:49 +0100765 assert.Equal(t, defaultStylesheet, config.Stylesheet)
Akrone1cff7c2025-06-04 18:43:32 +0200766 assert.Equal(t, defaultServer, config.Server)
767 require.Len(t, config.Lists, 1)
768 assert.Equal(t, "test-mapper", config.Lists[0].ID)
769}
770
771func TestLoadFromSourcesDuplicateIDs(t *testing.T) {
Akron7e8da932025-07-01 11:56:46 +0200772 // Set up a buffer to capture log output
773 var buf bytes.Buffer
774 originalLogger := log.Logger
775 defer func() {
776 log.Logger = originalLogger
777 }()
778 log.Logger = log.Logger.Output(&buf)
779
Akrone1cff7c2025-06-04 18:43:32 +0200780 // Create config with duplicate IDs across sources
781 configContent := `
782lists:
783- id: duplicate-id
784 mappings:
785 - "[A] <> [B]"
786`
787 configFile, err := os.CreateTemp("", "config-*.yaml")
788 require.NoError(t, err)
789 defer os.Remove(configFile.Name())
790
791 _, err = configFile.WriteString(configContent)
792 require.NoError(t, err)
793 err = configFile.Close()
794 require.NoError(t, err)
795
796 mappingContent := `
797id: duplicate-id
798mappings:
799 - "[C] <> [D]"
800`
801 mappingFile, err := os.CreateTemp("", "mapping-*.yaml")
802 require.NoError(t, err)
803 defer os.Remove(mappingFile.Name())
804
805 _, err = mappingFile.WriteString(mappingContent)
806 require.NoError(t, err)
807 err = mappingFile.Close()
808 require.NoError(t, err)
809
Akron7e8da932025-07-01 11:56:46 +0200810 // The function should now succeed but log the duplicate ID error
811 config, err := LoadFromSources(configFile.Name(), []string{mappingFile.Name()})
812 require.NoError(t, err)
813 require.NotNil(t, config)
814
815 // Check that the duplicate ID error was logged
816 logOutput := buf.String()
817 assert.Contains(t, logOutput, "Duplicate mapping list ID found")
818 assert.Contains(t, logOutput, "duplicate-id")
819
820 // Only the first mapping list (from config file) should be loaded
821 require.Len(t, config.Lists, 1)
822 assert.Equal(t, "duplicate-id", config.Lists[0].ID)
823 // Check that it's the one from the config file (has mapping "[A] <> [B]")
824 assert.Equal(t, "[A] <> [B]", string(config.Lists[0].Mappings[0]))
Akrone1cff7c2025-06-04 18:43:32 +0200825}
Akron813780f2025-06-05 15:44:28 +0200826
827func TestLoadFromSourcesConfigWithOnlyPort(t *testing.T) {
828 // Create config file with only port (no lists)
829 configContent := `
830port: 8080
831loglevel: debug
832`
833 configFile, err := os.CreateTemp("", "config-*.yaml")
834 require.NoError(t, err)
835 defer os.Remove(configFile.Name())
836
837 _, err = configFile.WriteString(configContent)
838 require.NoError(t, err)
839 err = configFile.Close()
840 require.NoError(t, err)
841
842 // Create mapping file
843 mappingContent := `
844id: test-mapper
845mappings:
846 - "[A] <> [B]"
847`
848 mappingFile, err := os.CreateTemp("", "mapping-*.yaml")
849 require.NoError(t, err)
850 defer os.Remove(mappingFile.Name())
851
852 _, err = mappingFile.WriteString(mappingContent)
853 require.NoError(t, err)
854 err = mappingFile.Close()
855 require.NoError(t, err)
856
857 // This should work: config file has only port, mapping file provides the mapping list
858 config, err := LoadFromSources(configFile.Name(), []string{mappingFile.Name()})
859 require.NoError(t, err)
860 require.NotNil(t, config)
861
862 // Check that port and log level from config file are preserved
863 assert.Equal(t, 8080, config.Port)
864 assert.Equal(t, "debug", config.LogLevel)
865
866 // Check that mapping from mapping file is loaded
867 require.Len(t, config.Lists, 1)
868 assert.Equal(t, "test-mapper", config.Lists[0].ID)
869
870 // Check that defaults are applied for other fields
871 assert.Equal(t, defaultSDK, config.SDK)
Akron43fb1022026-02-20 11:38:49 +0100872 assert.Equal(t, defaultStylesheet, config.Stylesheet)
Akron813780f2025-06-05 15:44:28 +0200873 assert.Equal(t, defaultServer, config.Server)
874 assert.Equal(t, defaultServiceURL, config.ServiceURL)
875}
Akron2f93c582026-02-19 16:49:13 +0100876
877func TestCorpusMappingListType(t *testing.T) {
878 content := `
879lists:
880- id: corpus-class-mapping
881 type: corpus
882 desc: Maps textClass values to genre field
883 mappings:
884 - "textClass=novel <> genre=fiction"
885 - "textClass=science <> genre=nonfiction"
886- id: annotation-mapper
887 mappings:
888 - "[A] <> [B]"
889`
890 tmpfile, err := os.CreateTemp("", "config-corpus-*.yaml")
891 require.NoError(t, err)
892 defer os.Remove(tmpfile.Name())
893
894 _, err = tmpfile.WriteString(content)
895 require.NoError(t, err)
896 err = tmpfile.Close()
897 require.NoError(t, err)
898
899 config, err := LoadFromSources(tmpfile.Name(), nil)
900 require.NoError(t, err)
901 require.Len(t, config.Lists, 2)
902
903 assert.Equal(t, "corpus", config.Lists[0].Type)
904 assert.True(t, config.Lists[0].IsCorpus())
905
906 assert.Equal(t, "", config.Lists[1].Type)
907 assert.False(t, config.Lists[1].IsCorpus())
908}
909
910func TestParseCorpusMappings(t *testing.T) {
911 list := &MappingList{
912 ID: "test-corpus",
913 Type: "corpus",
914 Mappings: []MappingRule{
915 "textClass=novel <> genre=fiction",
916 "(textClass=novel & pubDate=2020:geq#date) <> genre=recentfiction",
917 },
918 }
919
920 results, err := list.ParseCorpusMappings()
921 require.NoError(t, err)
922 require.Len(t, results, 2)
923
924 // Verify simple field rule
925 require.NotNil(t, results[0].Upper)
926 require.NotNil(t, results[0].Lower)
927
928 // Verify group rule
929 require.NotNil(t, results[1].Upper)
930 require.NotNil(t, results[1].Lower)
931}
932
933func TestParseCorpusMappingsErrors(t *testing.T) {
934 list := &MappingList{
935 ID: "test-corpus",
936 Type: "corpus",
937 Mappings: []MappingRule{""},
938 }
939
940 _, err := list.ParseCorpusMappings()
941 assert.Error(t, err)
942 assert.Contains(t, err.Error(), "empty corpus mapping rule")
943
944 list2 := &MappingList{
945 ID: "test-corpus",
946 Type: "corpus",
947 Mappings: []MappingRule{"invalid rule without separator"},
948 }
949
950 _, err = list2.ParseCorpusMappings()
951 assert.Error(t, err)
952 assert.Contains(t, err.Error(), "failed to parse corpus mapping rule")
953}
Akrona67de8f2026-02-23 17:54:26 +0100954
Akronf98ba282026-02-24 11:13:30 +0100955func TestApplyEnvOverrides(t *testing.T) {
956 envKeys := []string{
957 "KORAL_MAPPER_SERVER",
958 "KORAL_MAPPER_SDK",
959 "KORAL_MAPPER_STYLESHEET",
960 "KORAL_MAPPER_SERVICE_URL",
961 "KORAL_MAPPER_COOKIE_NAME",
962 "KORAL_MAPPER_PORT",
963 "KORAL_MAPPER_LOG_LEVEL",
Akronf1ca8822026-05-20 15:44:00 +0200964 "KORAL_MAPPER_ALLOW_ORIGINS",
Akronf98ba282026-02-24 11:13:30 +0100965 }
966
967 clearEnv := func() {
968 for _, key := range envKeys {
969 os.Unsetenv(key)
970 }
971 }
972
973 t.Run("ENV overrides config values", func(t *testing.T) {
974 clearEnv()
975 defer clearEnv()
976
977 cfg := &MappingConfig{
978 Server: "from-config",
979 SDK: "from-config",
980 Stylesheet: "from-config",
981 ServiceURL: "from-config",
982 CookieName: "from-config",
983 Port: 1234,
984 LogLevel: "warn",
985 }
986
987 os.Setenv("KORAL_MAPPER_SERVER", "from-env-server")
988 os.Setenv("KORAL_MAPPER_SDK", "from-env-sdk")
989 os.Setenv("KORAL_MAPPER_STYLESHEET", "from-env-style")
990 os.Setenv("KORAL_MAPPER_SERVICE_URL", "from-env-url")
991 os.Setenv("KORAL_MAPPER_COOKIE_NAME", "from-env-cookie")
992 os.Setenv("KORAL_MAPPER_PORT", "9999")
993 os.Setenv("KORAL_MAPPER_LOG_LEVEL", "debug")
994
995 ApplyEnvOverrides(cfg)
996
997 assert.Equal(t, "from-env-server", cfg.Server)
998 assert.Equal(t, "from-env-sdk", cfg.SDK)
999 assert.Equal(t, "from-env-style", cfg.Stylesheet)
1000 assert.Equal(t, "from-env-url", cfg.ServiceURL)
1001 assert.Equal(t, "from-env-cookie", cfg.CookieName)
1002 assert.Equal(t, 9999, cfg.Port)
1003 assert.Equal(t, "debug", cfg.LogLevel)
1004 })
1005
1006 t.Run("Empty ENV does not override", func(t *testing.T) {
1007 clearEnv()
1008 defer clearEnv()
1009
1010 cfg := &MappingConfig{
1011 Server: "original-server",
1012 SDK: "original-sdk",
1013 Stylesheet: "original-style",
1014 ServiceURL: "original-url",
1015 CookieName: "original-cookie",
1016 Port: 1234,
1017 LogLevel: "info",
1018 }
1019
1020 ApplyEnvOverrides(cfg)
1021
1022 assert.Equal(t, "original-server", cfg.Server)
1023 assert.Equal(t, "original-sdk", cfg.SDK)
1024 assert.Equal(t, "original-style", cfg.Stylesheet)
1025 assert.Equal(t, "original-url", cfg.ServiceURL)
1026 assert.Equal(t, "original-cookie", cfg.CookieName)
1027 assert.Equal(t, 1234, cfg.Port)
1028 assert.Equal(t, "info", cfg.LogLevel)
1029 })
1030
1031 t.Run("Invalid port ENV is ignored", func(t *testing.T) {
1032 clearEnv()
1033 defer clearEnv()
1034
1035 cfg := &MappingConfig{Port: 5725}
1036 os.Setenv("KORAL_MAPPER_PORT", "not-a-number")
1037
1038 ApplyEnvOverrides(cfg)
1039
1040 assert.Equal(t, 5725, cfg.Port)
1041 })
1042
1043 t.Run("Partial ENV overrides", func(t *testing.T) {
1044 clearEnv()
1045 defer clearEnv()
1046
1047 cfg := &MappingConfig{
1048 Server: "from-config",
1049 SDK: "from-config",
1050 Port: 1234,
1051 LogLevel: "warn",
1052 }
1053
1054 os.Setenv("KORAL_MAPPER_SERVER", "from-env")
1055 os.Setenv("KORAL_MAPPER_PORT", "8080")
1056
1057 ApplyEnvOverrides(cfg)
1058
1059 assert.Equal(t, "from-env", cfg.Server)
1060 assert.Equal(t, "from-config", cfg.SDK)
1061 assert.Equal(t, 8080, cfg.Port)
1062 assert.Equal(t, "warn", cfg.LogLevel)
1063 })
1064}
1065
Akroned787d02026-05-20 12:31:07 +02001066func TestBasePathEnvOverride(t *testing.T) {
1067 t.Setenv("KORAL_MAPPER_BASE_PATH", "/custom/base/path")
1068
1069 cfg := &MappingConfig{BasePath: "from-config"}
1070 ApplyEnvOverrides(cfg)
1071
1072 assert.Equal(t, "/custom/base/path", cfg.BasePath)
1073}
1074
1075func TestBasePathFromYAML(t *testing.T) {
1076 content := `
1077basePath: "/opt/koralmapper"
1078lists:
1079 - id: test-mapper
1080 mappings:
1081 - "[A] <> [B]"
1082`
1083 tmpfile, err := os.CreateTemp("", "config-basepath-*.yaml")
1084 require.NoError(t, err)
1085 defer os.Remove(tmpfile.Name())
1086
1087 _, err = tmpfile.WriteString(content)
1088 require.NoError(t, err)
1089 require.NoError(t, tmpfile.Close())
1090
1091 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1092 require.NoError(t, err)
1093 assert.Equal(t, "/opt/koralmapper", cfg.BasePath)
1094}
1095
Akronf98ba282026-02-24 11:13:30 +01001096func TestEnvOverridesInLoadFromSources(t *testing.T) {
1097 envKeys := []string{
1098 "KORAL_MAPPER_SERVER",
1099 "KORAL_MAPPER_SDK",
1100 "KORAL_MAPPER_PORT",
1101 "KORAL_MAPPER_LOG_LEVEL",
1102 "KORAL_MAPPER_STYLESHEET",
1103 "KORAL_MAPPER_SERVICE_URL",
1104 "KORAL_MAPPER_COOKIE_NAME",
Akronf1ca8822026-05-20 15:44:00 +02001105 "KORAL_MAPPER_ALLOW_ORIGINS",
Akronf98ba282026-02-24 11:13:30 +01001106 }
1107 clearEnv := func() {
1108 for _, key := range envKeys {
1109 os.Unsetenv(key)
1110 }
1111 }
1112 clearEnv()
1113 defer clearEnv()
1114
1115 configContent := `
1116sdk: "https://custom.example.com/sdk.js"
1117server: "https://custom.example.com/"
1118port: 3000
1119lists:
1120- id: test-mapper
1121 mappings:
1122 - "[A] <> [B]"
1123`
1124 tmpfile, err := os.CreateTemp("", "config-env-*.yaml")
1125 require.NoError(t, err)
1126 defer os.Remove(tmpfile.Name())
1127
1128 _, err = tmpfile.WriteString(configContent)
1129 require.NoError(t, err)
1130 require.NoError(t, tmpfile.Close())
1131
1132 os.Setenv("KORAL_MAPPER_SERVER", "https://env-override.example.com/")
1133 os.Setenv("KORAL_MAPPER_PORT", "7777")
1134
1135 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1136 require.NoError(t, err)
1137
1138 // ENV overrides YAML values
1139 assert.Equal(t, "https://env-override.example.com/", cfg.Server)
1140 assert.Equal(t, 7777, cfg.Port)
1141
1142 // Non-overridden values preserved from YAML
1143 assert.Equal(t, "https://custom.example.com/sdk.js", cfg.SDK)
1144
1145 // Defaults applied for unset fields
1146 assert.Equal(t, defaultStylesheet, cfg.Stylesheet)
1147 assert.Equal(t, defaultServiceURL, cfg.ServiceURL)
1148 assert.Equal(t, defaultCookieName, cfg.CookieName)
1149 assert.Equal(t, defaultLogLevel, cfg.LogLevel)
1150}
1151
Akron8414ae52026-05-19 13:31:14 +02001152func TestRewritesYAMLField(t *testing.T) {
1153 content := `
1154lists:
1155 - id: rewrite-on
1156 rewrites: true
1157 mappings:
1158 - "[A] <> [B]"
1159 - id: rewrite-off
1160 rewrites: false
1161 mappings:
1162 - "[C] <> [D]"
1163 - id: rewrite-default
1164 mappings:
1165 - "[E] <> [F]"
1166`
1167 tmpfile, err := os.CreateTemp("", "config-rewrites-*.yaml")
1168 require.NoError(t, err)
1169 defer os.Remove(tmpfile.Name())
1170
1171 _, err = tmpfile.WriteString(content)
1172 require.NoError(t, err)
1173 require.NoError(t, tmpfile.Close())
1174
1175 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1176 require.NoError(t, err)
1177 require.Len(t, cfg.Lists, 3)
1178
1179 assert.True(t, cfg.Lists[0].Rewrites, "rewrites should be true when set to true")
1180 assert.False(t, cfg.Lists[1].Rewrites, "rewrites should be false when set to false")
1181 assert.False(t, cfg.Lists[2].Rewrites, "rewrites should default to false")
1182}
1183
Akrona67de8f2026-02-23 17:54:26 +01001184func TestParseCorpusMappingsWithFieldAFieldB(t *testing.T) {
1185 list := &MappingList{
1186 ID: "test-keyed",
1187 Type: "corpus",
1188 FieldA: "wikiCat",
1189 FieldB: "textClass",
1190 Mappings: []MappingRule{
1191 "Entertainment <> ((kultur & musik) | (kultur & film))",
1192 },
1193 }
1194
1195 results, err := list.ParseCorpusMappings()
1196 require.NoError(t, err)
1197 require.Len(t, results, 1)
1198
1199 upper := results[0].Upper.(*parser.CorpusField)
1200 assert.Equal(t, "wikiCat", upper.Key)
1201 assert.Equal(t, "Entertainment", upper.Value)
1202
1203 group := results[0].Lower.(*parser.CorpusGroup)
1204 assert.Equal(t, "or", group.Operation)
1205 require.Len(t, group.Operands, 2)
1206
1207 and1 := group.Operands[0].(*parser.CorpusGroup)
1208 assert.Equal(t, "textClass", and1.Operands[0].(*parser.CorpusField).Key)
1209 assert.Equal(t, "kultur", and1.Operands[0].(*parser.CorpusField).Value)
1210 assert.Equal(t, "textClass", and1.Operands[1].(*parser.CorpusField).Key)
1211 assert.Equal(t, "musik", and1.Operands[1].(*parser.CorpusField).Value)
1212}
Akrone6767de2026-05-20 10:06:24 +02001213
1214func TestRateLimitConfigField(t *testing.T) {
1215 content := `
1216rateLimit: 50
1217lists:
1218 - id: test-mapper
1219 mappings:
1220 - "[A] <> [B]"
1221`
1222 tmpfile, err := os.CreateTemp("", "config-ratelimit-*.yaml")
1223 require.NoError(t, err)
1224 defer os.Remove(tmpfile.Name())
1225
1226 _, err = tmpfile.WriteString(content)
1227 require.NoError(t, err)
1228 require.NoError(t, tmpfile.Close())
1229
1230 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1231 require.NoError(t, err)
1232 assert.Equal(t, 50, cfg.RateLimit, "rateLimit should be loaded from YAML")
1233}
1234
1235func TestRateLimitDefaultApplied(t *testing.T) {
1236 cfg := &MappingConfig{}
1237 ApplyDefaults(cfg)
1238 assert.Equal(t, defaultRateLimit, cfg.RateLimit,
1239 "default rate limit should be applied when not specified")
1240}
1241
1242func TestRateLimitEnvOverride(t *testing.T) {
1243 t.Setenv("KORAL_MAPPER_RATE_LIMIT", "200")
1244
1245 content := `
1246rateLimit: 50
1247lists:
1248 - id: test-mapper
1249 mappings:
1250 - "[A] <> [B]"
1251`
1252 tmpfile, err := os.CreateTemp("", "config-ratelimit-env-*.yaml")
1253 require.NoError(t, err)
1254 defer os.Remove(tmpfile.Name())
1255
1256 _, err = tmpfile.WriteString(content)
1257 require.NoError(t, err)
1258 require.NoError(t, tmpfile.Close())
1259
1260 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1261 require.NoError(t, err)
1262 assert.Equal(t, 200, cfg.RateLimit,
1263 "KORAL_MAPPER_RATE_LIMIT env var should override YAML value")
1264}
Akroned787d02026-05-20 12:31:07 +02001265
Akronf1ca8822026-05-20 15:44:00 +02001266func TestAllowOriginsDefault(t *testing.T) {
1267 cfg := &MappingConfig{}
1268 ApplyDefaults(cfg)
1269 // AllowOrigins should derive from the Server default (trailing slash stripped)
1270 assert.Equal(t, "https://korap.ids-mannheim.de", cfg.AllowOrigins,
1271 "default AllowOrigins should derive from defaultServer")
1272}
1273
1274func TestAllowOriginsDerivedFromCustomServer(t *testing.T) {
1275 cfg := &MappingConfig{
1276 Server: "https://custom.example.com/",
1277 }
1278 ApplyDefaults(cfg)
1279 assert.Equal(t, "https://custom.example.com", cfg.AllowOrigins,
1280 "AllowOrigins should derive from the configured Server (trailing slash stripped)")
1281}
1282
1283func TestAllowOriginsExplicitNotOverriddenByServer(t *testing.T) {
1284 cfg := &MappingConfig{
1285 Server: "https://custom.example.com/",
1286 AllowOrigins: "https://explicit-origin.example.com",
1287 }
1288 ApplyDefaults(cfg)
1289 assert.Equal(t, "https://explicit-origin.example.com", cfg.AllowOrigins,
1290 "explicit AllowOrigins should not be overridden by Server default")
1291}
1292
1293func TestAllowOriginsFromYAML(t *testing.T) {
1294 content := `
1295allowOrigins: "https://custom.example.com,https://other.example.com"
1296lists:
1297 - id: test-mapper
1298 mappings:
1299 - "[A] <> [B]"
1300`
1301 tmpfile, err := os.CreateTemp("", "config-cors-*.yaml")
1302 require.NoError(t, err)
1303 defer os.Remove(tmpfile.Name())
1304
1305 _, err = tmpfile.WriteString(content)
1306 require.NoError(t, err)
1307 require.NoError(t, tmpfile.Close())
1308
1309 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1310 require.NoError(t, err)
1311 assert.Equal(t, "https://custom.example.com,https://other.example.com",
1312 cfg.AllowOrigins)
1313}
1314
1315func TestAllowOriginsEnvOverride(t *testing.T) {
1316 t.Setenv("KORAL_MAPPER_ALLOW_ORIGINS", "https://env-origin.example.com")
1317
1318 content := `
1319allowOrigins: "https://yaml-origin.example.com"
1320lists:
1321 - id: test-mapper
1322 mappings:
1323 - "[A] <> [B]"
1324`
1325 tmpfile, err := os.CreateTemp("", "config-cors-env-*.yaml")
1326 require.NoError(t, err)
1327 defer os.Remove(tmpfile.Name())
1328
1329 _, err = tmpfile.WriteString(content)
1330 require.NoError(t, err)
1331 require.NoError(t, tmpfile.Close())
1332
1333 cfg, err := LoadFromSources(tmpfile.Name(), nil)
1334 require.NoError(t, err)
1335 assert.Equal(t, "https://env-origin.example.com", cfg.AllowOrigins,
1336 "KORAL_MAPPER_ALLOW_ORIGINS env var should override YAML value")
1337}
1338
Akroned787d02026-05-20 12:31:07 +02001339func TestSanitizeFilePathRejectsOutsideBase(t *testing.T) {
1340 // Set base to a specific directory and verify paths outside are rejected
1341 tmpDir, err := os.MkdirTemp("", "koral-base-*")
1342 require.NoError(t, err)
1343 defer os.RemoveAll(tmpDir)
1344
1345 origBase := AllowedBasePath
1346 defer func() { AllowedBasePath = origBase }()
1347 AllowedBasePath = tmpDir
1348
1349 tests := []struct {
1350 name string
1351 input string
1352 wantErr bool
1353 }{
1354 {
1355 name: "Path within base is accepted",
1356 input: filepath.Join(tmpDir, "config.yaml"),
1357 wantErr: false,
1358 },
1359 {
1360 name: "Path outside base is rejected",
1361 input: "/etc/passwd",
1362 wantErr: true,
1363 },
1364 {
1365 name: "Traversal escaping base and tmp is rejected",
1366 input: "/etc/passwd",
1367 wantErr: true,
1368 },
1369 {
1370 name: "Empty path is rejected",
1371 input: "",
1372 wantErr: true,
1373 },
1374 {
1375 name: "Subdirectory within base is accepted",
1376 input: filepath.Join(tmpDir, "sub", "dir", "file.yaml"),
1377 wantErr: false,
1378 },
1379 {
1380 name: "Relative path within base is rejected when CWD differs",
1381 input: "config.yaml",
1382 wantErr: true, // resolves against CWD, not base
1383 },
1384 }
1385
1386 for _, tt := range tests {
1387 t.Run(tt.name, func(t *testing.T) {
1388 result, err := sanitizeFilePath(tt.input)
1389 if tt.wantErr {
1390 require.Error(t, err)
1391 return
1392 }
1393 require.NoError(t, err)
1394 assert.True(t, filepath.IsAbs(result),
1395 "sanitized path should be absolute, got: %s", result)
1396 assert.NotContains(t, result, "..")
1397 })
1398 }
1399}
1400
1401func TestSanitizeFilePathTraversalToPasswd(t *testing.T) {
1402 // Verify /etc/passwd cannot be accessed via traversal
1403 cwd, err := os.Getwd()
1404 require.NoError(t, err)
1405
1406 origBase := AllowedBasePath
1407 defer func() { AllowedBasePath = origBase }()
1408 AllowedBasePath = cwd
1409
1410 _, err = sanitizeFilePath("../../../etc/passwd")
1411 require.Error(t, err)
1412 assert.Contains(t, err.Error(), "path traversal detected")
1413}
1414
1415func TestSanitizeFilePathWithDockerRoot(t *testing.T) {
1416 // In Docker the WORKDIR is "/" -- all absolute paths should be valid
1417 origBase := AllowedBasePath
1418 defer func() { AllowedBasePath = origBase }()
1419 AllowedBasePath = "/"
1420
1421 result, err := sanitizeFilePath("/mappings/stts-upos.yaml")
1422 require.NoError(t, err)
1423 assert.Equal(t, "/mappings/stts-upos.yaml", result)
1424
1425 // Even deeply nested paths work when base is /
1426 result, err = sanitizeFilePath("/etc/ssl/certs/ca-certificates.crt")
1427 require.NoError(t, err)
1428 assert.Equal(t, "/etc/ssl/certs/ca-certificates.crt", result)
1429}
1430
1431func TestSanitizeFilePathPrefixFalsePositive(t *testing.T) {
1432 // Ensure /home/user does not match /home/username
1433 origBase := AllowedBasePath
1434 defer func() { AllowedBasePath = origBase }()
1435 AllowedBasePath = "/home/user"
1436
1437 _, err := sanitizeFilePath("/home/username/secret.yaml")
1438 require.Error(t, err)
1439 assert.Contains(t, err.Error(), "path traversal detected")
1440}
1441
1442func TestLoadFromSourcesRejectsTraversal(t *testing.T) {
1443 origBase := AllowedBasePath
1444 defer func() { AllowedBasePath = origBase }()
1445
1446 cwd, err := os.Getwd()
1447 require.NoError(t, err)
1448 AllowedBasePath = cwd
1449
1450 // Config file traversal should be rejected
1451 _, err = LoadFromSources("../../../etc/passwd", nil)
1452 require.Error(t, err)
1453 assert.Contains(t, err.Error(), "path traversal detected")
1454
1455 // Mapping file traversal should be rejected
1456 _, err = LoadFromSources("", []string{"../../../etc/passwd"})
1457 require.Error(t, err)
1458 assert.Contains(t, err.Error(), "path traversal detected")
1459}
1460
1461func TestValidPathsStillWork(t *testing.T) {
1462 content := `
1463id: test-mapper
1464mappings:
1465 - "[A] <> [B]"
1466`
1467 tmpDir, err := os.MkdirTemp("", "koral-test-*")
1468 require.NoError(t, err)
1469 defer os.RemoveAll(tmpDir)
1470
1471 origBase := AllowedBasePath
1472 defer func() { AllowedBasePath = origBase }()
1473 AllowedBasePath = tmpDir
1474
1475 subDir := filepath.Join(tmpDir, "subdir")
1476 require.NoError(t, os.Mkdir(subDir, 0755))
1477
1478 tmpfile, err := os.CreateTemp(subDir, "mapping-*.yaml")
1479 require.NoError(t, err)
1480
1481 _, err = tmpfile.WriteString(content)
1482 require.NoError(t, err)
1483 require.NoError(t, tmpfile.Close())
1484
1485 cfg, err := LoadFromSources("", []string{tmpfile.Name()})
1486 require.NoError(t, err)
1487 require.Len(t, cfg.Lists, 1)
1488 assert.Equal(t, "test-mapper", cfg.Lists[0].ID)
1489}
1490
1491func TestRelativePathWithTraversalWithinBase(t *testing.T) {
1492 // Paths with ".." that still resolve within the base should work
1493 content := `
1494id: traversal-test-mapper
1495mappings:
1496 - "[A] <> [B]"
1497`
1498 tmpDir, err := os.MkdirTemp("", "koral-traversal-*")
1499 require.NoError(t, err)
1500 defer os.RemoveAll(tmpDir)
1501
1502 origBase := AllowedBasePath
1503 defer func() { AllowedBasePath = origBase }()
1504 AllowedBasePath = tmpDir
1505
1506 // Create file at tmpDir/config.yaml
1507 configPath := filepath.Join(tmpDir, "config.yaml")
1508 require.NoError(t, os.WriteFile(configPath, []byte(content), 0644))
1509
1510 // Reference via a traversal path: tmpDir/subdir/../config.yaml
1511 // This resolves to tmpDir/config.yaml which is within the base
1512 subDir := filepath.Join(tmpDir, "subdir")
1513 require.NoError(t, os.Mkdir(subDir, 0755))
1514 traversalPath := filepath.Join(subDir, "..", "config.yaml")
1515
1516 cfg, err := LoadFromSources("", []string{traversalPath})
1517 require.NoError(t, err)
1518 require.Len(t, cfg.Lists, 1)
1519 assert.Equal(t, "traversal-test-mapper", cfg.Lists[0].ID)
1520}