blob: b9c2678a95511c593e73a7f7c55b46f2d8c8e2c0 [file] [log] [blame]
Akron57ee5582025-05-21 15:25:13 +02001package config
2
3import (
4 "fmt"
5 "os"
6
Akronfa55bb22025-05-26 15:10:42 +02007 "github.com/KorAP/KoralPipe-TermMapper/ast"
8 "github.com/KorAP/KoralPipe-TermMapper/parser"
Akron7e8da932025-07-01 11:56:46 +02009 "github.com/rs/zerolog/log"
Akron57ee5582025-05-21 15:25:13 +020010 "gopkg.in/yaml.v3"
11)
12
Akron06d21f02025-06-04 14:36:07 +020013const (
Akron2ac2ec02025-06-05 15:26:42 +020014 defaultServer = "https://korap.ids-mannheim.de/"
15 defaultSDK = "https://korap.ids-mannheim.de/js/korap-plugin-latest.js"
16 defaultServiceURL = "https://korap.ids-mannheim.de/plugin/termmapper"
Akron14c13a52025-06-06 15:36:23 +020017 defaultPort = 5725
Akron2ac2ec02025-06-05 15:26:42 +020018 defaultLogLevel = "warn"
Akron06d21f02025-06-04 14:36:07 +020019)
20
Akron57ee5582025-05-21 15:25:13 +020021// MappingRule represents a single mapping rule in the configuration
22type MappingRule string
23
24// MappingList represents a list of mapping rules with metadata
25type MappingList struct {
Akrondab27112025-06-05 13:52:43 +020026 ID string `yaml:"id"`
27 Description string `yaml:"desc,omitempty"`
28 FoundryA string `yaml:"foundryA,omitempty"`
29 LayerA string `yaml:"layerA,omitempty"`
30 FoundryB string `yaml:"foundryB,omitempty"`
31 LayerB string `yaml:"layerB,omitempty"`
32 Mappings []MappingRule `yaml:"mappings"`
Akron57ee5582025-05-21 15:25:13 +020033}
34
Akron06d21f02025-06-04 14:36:07 +020035// MappingConfig represents the root configuration containing multiple mapping lists
36type MappingConfig struct {
Akron2ac2ec02025-06-05 15:26:42 +020037 SDK string `yaml:"sdk,omitempty"`
38 Server string `yaml:"server,omitempty"`
39 ServiceURL string `yaml:"serviceURL,omitempty"`
40 Port int `yaml:"port,omitempty"`
41 LogLevel string `yaml:"loglevel,omitempty"`
42 Lists []MappingList `yaml:"lists,omitempty"`
Akron57ee5582025-05-21 15:25:13 +020043}
44
Akrone1cff7c2025-06-04 18:43:32 +020045// LoadFromSources loads configuration from multiple sources and merges them:
46// - A main configuration file (optional) containing global settings and lists
47// - Individual mapping files (optional) containing single mapping lists each
48// At least one source must be provided
49func LoadFromSources(configFile string, mappingFiles []string) (*MappingConfig, error) {
50 var allLists []MappingList
51 var globalConfig MappingConfig
Akron57ee5582025-05-21 15:25:13 +020052
Akrone1cff7c2025-06-04 18:43:32 +020053 // Track seen IDs across all sources to detect duplicates
54 seenIDs := make(map[string]bool)
Akrona5d88142025-05-22 14:42:09 +020055
Akrone1cff7c2025-06-04 18:43:32 +020056 // Load main configuration file if provided
57 if configFile != "" {
58 data, err := os.ReadFile(configFile)
59 if err != nil {
60 return nil, fmt.Errorf("failed to read config file '%s': %w", configFile, err)
Akron06d21f02025-06-04 14:36:07 +020061 }
Akrone1cff7c2025-06-04 18:43:32 +020062
63 if len(data) == 0 {
64 return nil, fmt.Errorf("EOF: config file '%s' is empty", configFile)
65 }
66
67 // Try to unmarshal as new format first (object with optional sdk/server and lists)
Akron813780f2025-06-05 15:44:28 +020068 if err := yaml.Unmarshal(data, &globalConfig); err == nil {
69 // Successfully parsed as new format - accept it regardless of whether it has lists
Akrone1cff7c2025-06-04 18:43:32 +020070 for _, list := range globalConfig.Lists {
71 if seenIDs[list.ID] {
72 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
73 }
74 seenIDs[list.ID] = true
75 }
76 allLists = append(allLists, globalConfig.Lists...)
77 } else {
78 // Fall back to old format (direct list)
79 var lists []MappingList
80 if err := yaml.Unmarshal(data, &lists); err != nil {
81 return nil, fmt.Errorf("failed to parse YAML config file '%s': %w", configFile, err)
82 }
83
84 for _, list := range lists {
85 if seenIDs[list.ID] {
86 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
87 }
88 seenIDs[list.ID] = true
89 }
90 allLists = append(allLists, lists...)
91 // Clear the lists from globalConfig since we got them from the old format
92 globalConfig.Lists = nil
93 }
Akron06d21f02025-06-04 14:36:07 +020094 }
95
Akrone1cff7c2025-06-04 18:43:32 +020096 // Load individual mapping files
97 for _, file := range mappingFiles {
98 data, err := os.ReadFile(file)
99 if err != nil {
Akron7e8da932025-07-01 11:56:46 +0200100 log.Error().Err(err).Str("file", file).Msg("Failed to read mapping file")
101 continue
Akrone1cff7c2025-06-04 18:43:32 +0200102 }
103
104 if len(data) == 0 {
Akron7e8da932025-07-01 11:56:46 +0200105 log.Error().Err(err).Str("file", file).Msg("EOF: mapping file is empty")
106 continue
Akrone1cff7c2025-06-04 18:43:32 +0200107 }
108
109 var list MappingList
110 if err := yaml.Unmarshal(data, &list); err != nil {
Akron7e8da932025-07-01 11:56:46 +0200111 log.Error().Err(err).Str("file", file).Msg("Failed to parse YAML mapping file")
112 continue
Akrone1cff7c2025-06-04 18:43:32 +0200113 }
114
115 if seenIDs[list.ID] {
Akron7e8da932025-07-01 11:56:46 +0200116 log.Error().Err(err).Str("file", file).Str("list-id", list.ID).Msg("Duplicate mapping list ID found")
117 continue
Akrone1cff7c2025-06-04 18:43:32 +0200118 }
119 seenIDs[list.ID] = true
120 allLists = append(allLists, list)
Akron57ee5582025-05-21 15:25:13 +0200121 }
122
Akrone1cff7c2025-06-04 18:43:32 +0200123 // Ensure we have at least some configuration
124 if len(allLists) == 0 {
125 return nil, fmt.Errorf("no mapping lists found: provide either a config file (-c) with lists or mapping files (-m)")
126 }
127
Akron585f50f2025-07-03 13:55:47 +0200128 // Validate all mapping lists (skip duplicate ID check since we already did it)
Akrone1cff7c2025-06-04 18:43:32 +0200129 if err := validateMappingLists(allLists); err != nil {
Akron06d21f02025-06-04 14:36:07 +0200130 return nil, err
131 }
132
Akrone1cff7c2025-06-04 18:43:32 +0200133 // Create final configuration
134 result := &MappingConfig{
Akron2ac2ec02025-06-05 15:26:42 +0200135 SDK: globalConfig.SDK,
136 Server: globalConfig.Server,
137 ServiceURL: globalConfig.ServiceURL,
138 Port: globalConfig.Port,
139 LogLevel: globalConfig.LogLevel,
140 Lists: allLists,
Akrone1cff7c2025-06-04 18:43:32 +0200141 }
142
Akron06d21f02025-06-04 14:36:07 +0200143 // Apply defaults if not specified
Akron2ac2ec02025-06-05 15:26:42 +0200144 ApplyDefaults(result)
Akrone1cff7c2025-06-04 18:43:32 +0200145
146 return result, nil
147}
148
Akron585f50f2025-07-03 13:55:47 +0200149// ApplyDefaults sets default values for configuration fields if they are empty
Akron2ac2ec02025-06-05 15:26:42 +0200150func ApplyDefaults(config *MappingConfig) {
Akron585f50f2025-07-03 13:55:47 +0200151 defaults := map[*string]string{
152 &config.SDK: defaultSDK,
153 &config.Server: defaultServer,
154 &config.ServiceURL: defaultServiceURL,
155 &config.LogLevel: defaultLogLevel,
Akron06d21f02025-06-04 14:36:07 +0200156 }
Akron585f50f2025-07-03 13:55:47 +0200157
158 for field, defaultValue := range defaults {
159 if *field == "" {
160 *field = defaultValue
161 }
Akron06d21f02025-06-04 14:36:07 +0200162 }
Akron585f50f2025-07-03 13:55:47 +0200163
Akrona8a66ce2025-06-05 10:50:17 +0200164 if config.Port == 0 {
165 config.Port = defaultPort
166 }
Akron06d21f02025-06-04 14:36:07 +0200167}
168
Akron585f50f2025-07-03 13:55:47 +0200169// validateMappingLists validates a slice of mapping lists (without duplicate ID checking)
Akron06d21f02025-06-04 14:36:07 +0200170func validateMappingLists(lists []MappingList) error {
Akron57ee5582025-05-21 15:25:13 +0200171 for i, list := range lists {
172 if list.ID == "" {
Akron06d21f02025-06-04 14:36:07 +0200173 return fmt.Errorf("mapping list at index %d is missing an ID", i)
Akron57ee5582025-05-21 15:25:13 +0200174 }
Akrona5d88142025-05-22 14:42:09 +0200175
Akron57ee5582025-05-21 15:25:13 +0200176 if len(list.Mappings) == 0 {
Akron06d21f02025-06-04 14:36:07 +0200177 return fmt.Errorf("mapping list '%s' has no mapping rules", list.ID)
Akron57ee5582025-05-21 15:25:13 +0200178 }
179
180 // Validate each mapping rule
181 for j, rule := range list.Mappings {
182 if rule == "" {
Akron06d21f02025-06-04 14:36:07 +0200183 return fmt.Errorf("mapping list '%s' rule at index %d is empty", list.ID, j)
Akron57ee5582025-05-21 15:25:13 +0200184 }
185 }
186 }
Akron06d21f02025-06-04 14:36:07 +0200187 return nil
Akron57ee5582025-05-21 15:25:13 +0200188}
189
190// ParseMappings parses all mapping rules in a list and returns a slice of parsed rules
191func (list *MappingList) ParseMappings() ([]*parser.MappingResult, error) {
192 // Create a grammar parser with the list's default foundries and layers
193 grammarParser, err := parser.NewGrammarParser("", "")
194 if err != nil {
195 return nil, fmt.Errorf("failed to create grammar parser: %w", err)
196 }
197
198 results := make([]*parser.MappingResult, len(list.Mappings))
199 for i, rule := range list.Mappings {
Akrona5d88142025-05-22 14:42:09 +0200200 // Check for empty rules first
201 if rule == "" {
202 return nil, fmt.Errorf("empty mapping rule at index %d in list '%s'", i, list.ID)
203 }
204
Akron57ee5582025-05-21 15:25:13 +0200205 // Parse the mapping rule
206 result, err := grammarParser.ParseMapping(string(rule))
207 if err != nil {
208 return nil, fmt.Errorf("failed to parse mapping rule %d in list '%s': %w", i, list.ID, err)
209 }
210
211 // Apply default foundries and layers if not specified in the rule
212 if list.FoundryA != "" {
213 applyDefaultFoundryAndLayer(result.Upper.Wrap, list.FoundryA, list.LayerA)
214 }
215 if list.FoundryB != "" {
216 applyDefaultFoundryAndLayer(result.Lower.Wrap, list.FoundryB, list.LayerB)
217 }
218
219 results[i] = result
220 }
221
222 return results, nil
223}
224
225// applyDefaultFoundryAndLayer recursively applies default foundry and layer to terms that don't have them specified
226func applyDefaultFoundryAndLayer(node ast.Node, defaultFoundry, defaultLayer string) {
227 switch n := node.(type) {
228 case *ast.Term:
Akron585f50f2025-07-03 13:55:47 +0200229 if n.Foundry == "" && defaultFoundry != "" {
Akron57ee5582025-05-21 15:25:13 +0200230 n.Foundry = defaultFoundry
231 }
Akron585f50f2025-07-03 13:55:47 +0200232 if n.Layer == "" && defaultLayer != "" {
Akron57ee5582025-05-21 15:25:13 +0200233 n.Layer = defaultLayer
234 }
235 case *ast.TermGroup:
236 for _, op := range n.Operands {
237 applyDefaultFoundryAndLayer(op, defaultFoundry, defaultLayer)
238 }
239 }
240}