blob: 2ad6c43996a84b323867e7e0c446481fc036e6c6 [file] [log] [blame]
Akron57ee5582025-05-21 15:25:13 +02001package config
2
3import (
4 "fmt"
5 "os"
Akronf98ba282026-02-24 11:13:30 +01006 "strconv"
Akron57ee5582025-05-21 15:25:13 +02007
Akron2ef703c2025-07-03 15:57:42 +02008 "github.com/KorAP/Koral-Mapper/ast"
9 "github.com/KorAP/Koral-Mapper/parser"
Akron7e8da932025-07-01 11:56:46 +020010 "github.com/rs/zerolog/log"
Akron57ee5582025-05-21 15:25:13 +020011 "gopkg.in/yaml.v3"
12)
13
Akron06d21f02025-06-04 14:36:07 +020014const (
Akron2ac2ec02025-06-05 15:26:42 +020015 defaultServer = "https://korap.ids-mannheim.de/"
16 defaultSDK = "https://korap.ids-mannheim.de/js/korap-plugin-latest.js"
Akron43fb1022026-02-20 11:38:49 +010017 defaultStylesheet = "https://korap.ids-mannheim.de/css/kalamar-plugin-latest.css"
Akron2ef703c2025-07-03 15:57:42 +020018 defaultServiceURL = "https://korap.ids-mannheim.de/plugin/koralmapper"
Akron43fb1022026-02-20 11:38:49 +010019 defaultCookieName = "km-config"
Akron14c13a52025-06-06 15:36:23 +020020 defaultPort = 5725
Akron2ac2ec02025-06-05 15:26:42 +020021 defaultLogLevel = "warn"
Akrone6767de2026-05-20 10:06:24 +020022 defaultRateLimit = 100
Akron06d21f02025-06-04 14:36:07 +020023)
24
Akron57ee5582025-05-21 15:25:13 +020025// MappingRule represents a single mapping rule in the configuration
26type MappingRule string
27
28// MappingList represents a list of mapping rules with metadata
29type MappingList struct {
Akrondab27112025-06-05 13:52:43 +020030 ID string `yaml:"id"`
Akron2f93c582026-02-19 16:49:13 +010031 Type string `yaml:"type,omitempty"` // "annotation" (default) or "corpus"
Akrondab27112025-06-05 13:52:43 +020032 Description string `yaml:"desc,omitempty"`
33 FoundryA string `yaml:"foundryA,omitempty"`
34 LayerA string `yaml:"layerA,omitempty"`
35 FoundryB string `yaml:"foundryB,omitempty"`
36 LayerB string `yaml:"layerB,omitempty"`
Akrona67de8f2026-02-23 17:54:26 +010037 FieldA string `yaml:"fieldA,omitempty"`
38 FieldB string `yaml:"fieldB,omitempty"`
Akron8414ae52026-05-19 13:31:14 +020039 Rewrites bool `yaml:"rewrites,omitempty"`
Akrondab27112025-06-05 13:52:43 +020040 Mappings []MappingRule `yaml:"mappings"`
Akron57ee5582025-05-21 15:25:13 +020041}
42
Akron2f93c582026-02-19 16:49:13 +010043// IsCorpus returns true if the mapping list type is "corpus".
44func (list *MappingList) IsCorpus() bool {
45 return list.Type == "corpus"
46}
47
48// ParseCorpusMappings parses all mapping rules as corpus rules.
Akrona67de8f2026-02-23 17:54:26 +010049// Bare values (without key=) are always allowed and receive the default
50// field name from the mapping list header (FieldA/FieldB) when set.
Akron2f93c582026-02-19 16:49:13 +010051func (list *MappingList) ParseCorpusMappings() ([]*parser.CorpusMappingResult, error) {
52 corpusParser := parser.NewCorpusParser()
Akrona67de8f2026-02-23 17:54:26 +010053 corpusParser.AllowBareValues = true
54
Akron2f93c582026-02-19 16:49:13 +010055 results := make([]*parser.CorpusMappingResult, len(list.Mappings))
56 for i, rule := range list.Mappings {
57 if rule == "" {
58 return nil, fmt.Errorf("empty corpus mapping rule at index %d in list '%s'", i, list.ID)
59 }
60 result, err := corpusParser.ParseMapping(string(rule))
61 if err != nil {
62 return nil, fmt.Errorf("failed to parse corpus mapping rule %d in list '%s': %w", i, list.ID, err)
63 }
Akrona67de8f2026-02-23 17:54:26 +010064
65 if list.FieldA != "" {
66 applyDefaultCorpusKey(result.Upper, list.FieldA)
67 }
68 if list.FieldB != "" {
69 applyDefaultCorpusKey(result.Lower, list.FieldB)
70 }
71
Akron2f93c582026-02-19 16:49:13 +010072 results[i] = result
73 }
74 return results, nil
75}
76
Akrona67de8f2026-02-23 17:54:26 +010077// applyDefaultCorpusKey recursively fills in empty keys on CorpusField nodes.
78func applyDefaultCorpusKey(node parser.CorpusNode, defaultKey string) {
79 switch n := node.(type) {
80 case *parser.CorpusField:
81 if n.Key == "" {
82 n.Key = defaultKey
83 }
84 case *parser.CorpusGroup:
85 for _, op := range n.Operands {
86 applyDefaultCorpusKey(op, defaultKey)
87 }
88 }
89}
90
Akron06d21f02025-06-04 14:36:07 +020091// MappingConfig represents the root configuration containing multiple mapping lists
92type MappingConfig struct {
Akron2ac2ec02025-06-05 15:26:42 +020093 SDK string `yaml:"sdk,omitempty"`
Akron43fb1022026-02-20 11:38:49 +010094 Stylesheet string `yaml:"stylesheet,omitempty"`
Akron2ac2ec02025-06-05 15:26:42 +020095 Server string `yaml:"server,omitempty"`
96 ServiceURL string `yaml:"serviceURL,omitempty"`
Akron43fb1022026-02-20 11:38:49 +010097 CookieName string `yaml:"cookieName,omitempty"`
Akron2ac2ec02025-06-05 15:26:42 +020098 Port int `yaml:"port,omitempty"`
99 LogLevel string `yaml:"loglevel,omitempty"`
Akrone6767de2026-05-20 10:06:24 +0200100 RateLimit int `yaml:"rateLimit,omitempty"` // max requests per minute per IP (0 = use default 100)
Akron2ac2ec02025-06-05 15:26:42 +0200101 Lists []MappingList `yaml:"lists,omitempty"`
Akron57ee5582025-05-21 15:25:13 +0200102}
103
Akrone1cff7c2025-06-04 18:43:32 +0200104// LoadFromSources loads configuration from multiple sources and merges them:
105// - A main configuration file (optional) containing global settings and lists
106// - Individual mapping files (optional) containing single mapping lists each
107// At least one source must be provided
108func LoadFromSources(configFile string, mappingFiles []string) (*MappingConfig, error) {
109 var allLists []MappingList
110 var globalConfig MappingConfig
Akron57ee5582025-05-21 15:25:13 +0200111
Akrone1cff7c2025-06-04 18:43:32 +0200112 // Track seen IDs across all sources to detect duplicates
113 seenIDs := make(map[string]bool)
Akrona5d88142025-05-22 14:42:09 +0200114
Akrone1cff7c2025-06-04 18:43:32 +0200115 // Load main configuration file if provided
116 if configFile != "" {
117 data, err := os.ReadFile(configFile)
118 if err != nil {
119 return nil, fmt.Errorf("failed to read config file '%s': %w", configFile, err)
Akron06d21f02025-06-04 14:36:07 +0200120 }
Akrone1cff7c2025-06-04 18:43:32 +0200121
122 if len(data) == 0 {
123 return nil, fmt.Errorf("EOF: config file '%s' is empty", configFile)
124 }
125
126 // Try to unmarshal as new format first (object with optional sdk/server and lists)
Akron813780f2025-06-05 15:44:28 +0200127 if err := yaml.Unmarshal(data, &globalConfig); err == nil {
128 // Successfully parsed as new format - accept it regardless of whether it has lists
Akrone1cff7c2025-06-04 18:43:32 +0200129 for _, list := range globalConfig.Lists {
130 if seenIDs[list.ID] {
131 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
132 }
133 seenIDs[list.ID] = true
134 }
135 allLists = append(allLists, globalConfig.Lists...)
136 } else {
137 // Fall back to old format (direct list)
138 var lists []MappingList
139 if err := yaml.Unmarshal(data, &lists); err != nil {
140 return nil, fmt.Errorf("failed to parse YAML config file '%s': %w", configFile, err)
141 }
142
143 for _, list := range lists {
144 if seenIDs[list.ID] {
145 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
146 }
147 seenIDs[list.ID] = true
148 }
149 allLists = append(allLists, lists...)
150 // Clear the lists from globalConfig since we got them from the old format
151 globalConfig.Lists = nil
152 }
Akron06d21f02025-06-04 14:36:07 +0200153 }
154
Akrone1cff7c2025-06-04 18:43:32 +0200155 // Load individual mapping files
156 for _, file := range mappingFiles {
157 data, err := os.ReadFile(file)
158 if err != nil {
Akron7e8da932025-07-01 11:56:46 +0200159 log.Error().Err(err).Str("file", file).Msg("Failed to read mapping file")
160 continue
Akrone1cff7c2025-06-04 18:43:32 +0200161 }
162
163 if len(data) == 0 {
Akron7e8da932025-07-01 11:56:46 +0200164 log.Error().Err(err).Str("file", file).Msg("EOF: mapping file is empty")
165 continue
Akrone1cff7c2025-06-04 18:43:32 +0200166 }
167
168 var list MappingList
169 if err := yaml.Unmarshal(data, &list); err != nil {
Akron7e8da932025-07-01 11:56:46 +0200170 log.Error().Err(err).Str("file", file).Msg("Failed to parse YAML mapping file")
171 continue
Akrone1cff7c2025-06-04 18:43:32 +0200172 }
173
174 if seenIDs[list.ID] {
Akron7e8da932025-07-01 11:56:46 +0200175 log.Error().Err(err).Str("file", file).Str("list-id", list.ID).Msg("Duplicate mapping list ID found")
176 continue
Akrone1cff7c2025-06-04 18:43:32 +0200177 }
178 seenIDs[list.ID] = true
179 allLists = append(allLists, list)
Akron57ee5582025-05-21 15:25:13 +0200180 }
181
Akrone1cff7c2025-06-04 18:43:32 +0200182 // Ensure we have at least some configuration
183 if len(allLists) == 0 {
184 return nil, fmt.Errorf("no mapping lists found: provide either a config file (-c) with lists or mapping files (-m)")
185 }
186
Akron585f50f2025-07-03 13:55:47 +0200187 // Validate all mapping lists (skip duplicate ID check since we already did it)
Akrone1cff7c2025-06-04 18:43:32 +0200188 if err := validateMappingLists(allLists); err != nil {
Akron06d21f02025-06-04 14:36:07 +0200189 return nil, err
190 }
191
Akrone1cff7c2025-06-04 18:43:32 +0200192 // Create final configuration
193 result := &MappingConfig{
Akron2ac2ec02025-06-05 15:26:42 +0200194 SDK: globalConfig.SDK,
Akron43fb1022026-02-20 11:38:49 +0100195 Stylesheet: globalConfig.Stylesheet,
Akron2ac2ec02025-06-05 15:26:42 +0200196 Server: globalConfig.Server,
197 ServiceURL: globalConfig.ServiceURL,
198 Port: globalConfig.Port,
199 LogLevel: globalConfig.LogLevel,
Akrone6767de2026-05-20 10:06:24 +0200200 RateLimit: globalConfig.RateLimit,
Akron2ac2ec02025-06-05 15:26:42 +0200201 Lists: allLists,
Akrone1cff7c2025-06-04 18:43:32 +0200202 }
203
Akronf98ba282026-02-24 11:13:30 +0100204 // Apply environment variable overrides (ENV > config file)
205 ApplyEnvOverrides(result)
206
Akron06d21f02025-06-04 14:36:07 +0200207 // Apply defaults if not specified
Akron2ac2ec02025-06-05 15:26:42 +0200208 ApplyDefaults(result)
Akrone1cff7c2025-06-04 18:43:32 +0200209
210 return result, nil
211}
212
Akron585f50f2025-07-03 13:55:47 +0200213// ApplyDefaults sets default values for configuration fields if they are empty
Akron2ac2ec02025-06-05 15:26:42 +0200214func ApplyDefaults(config *MappingConfig) {
Akron585f50f2025-07-03 13:55:47 +0200215 defaults := map[*string]string{
216 &config.SDK: defaultSDK,
Akron43fb1022026-02-20 11:38:49 +0100217 &config.Stylesheet: defaultStylesheet,
Akron585f50f2025-07-03 13:55:47 +0200218 &config.Server: defaultServer,
219 &config.ServiceURL: defaultServiceURL,
Akron43fb1022026-02-20 11:38:49 +0100220 &config.CookieName: defaultCookieName,
Akron585f50f2025-07-03 13:55:47 +0200221 &config.LogLevel: defaultLogLevel,
Akron06d21f02025-06-04 14:36:07 +0200222 }
Akron585f50f2025-07-03 13:55:47 +0200223
224 for field, defaultValue := range defaults {
225 if *field == "" {
226 *field = defaultValue
227 }
Akron06d21f02025-06-04 14:36:07 +0200228 }
Akron585f50f2025-07-03 13:55:47 +0200229
Akrona8a66ce2025-06-05 10:50:17 +0200230 if config.Port == 0 {
231 config.Port = defaultPort
232 }
Akrone6767de2026-05-20 10:06:24 +0200233 if config.RateLimit == 0 {
234 config.RateLimit = defaultRateLimit
235 }
Akron06d21f02025-06-04 14:36:07 +0200236}
237
Akronf98ba282026-02-24 11:13:30 +0100238// ApplyEnvOverrides overrides configuration fields from environment variables.
239// All environment variables are uppercase and prefixed with KORAL_MAPPER_.
240// Non-empty environment values override any previously loaded config values.
241func ApplyEnvOverrides(config *MappingConfig) {
242 envMappings := map[string]*string{
243 "KORAL_MAPPER_SERVER": &config.Server,
244 "KORAL_MAPPER_SDK": &config.SDK,
245 "KORAL_MAPPER_STYLESHEET": &config.Stylesheet,
246 "KORAL_MAPPER_SERVICE_URL": &config.ServiceURL,
247 "KORAL_MAPPER_COOKIE_NAME": &config.CookieName,
248 "KORAL_MAPPER_LOG_LEVEL": &config.LogLevel,
249 }
250
251 for envKey, field := range envMappings {
252 if val := os.Getenv(envKey); val != "" {
253 *field = val
254 }
255 }
256
257 if val := os.Getenv("KORAL_MAPPER_PORT"); val != "" {
258 if port, err := strconv.Atoi(val); err == nil {
259 config.Port = port
260 }
261 }
Akrone6767de2026-05-20 10:06:24 +0200262
263 if val := os.Getenv("KORAL_MAPPER_RATE_LIMIT"); val != "" {
264 if rl, err := strconv.Atoi(val); err == nil {
265 config.RateLimit = rl
266 }
267 }
Akronf98ba282026-02-24 11:13:30 +0100268}
269
Akron585f50f2025-07-03 13:55:47 +0200270// validateMappingLists validates a slice of mapping lists (without duplicate ID checking)
Akron06d21f02025-06-04 14:36:07 +0200271func validateMappingLists(lists []MappingList) error {
Akron57ee5582025-05-21 15:25:13 +0200272 for i, list := range lists {
273 if list.ID == "" {
Akron06d21f02025-06-04 14:36:07 +0200274 return fmt.Errorf("mapping list at index %d is missing an ID", i)
Akron57ee5582025-05-21 15:25:13 +0200275 }
Akrona5d88142025-05-22 14:42:09 +0200276
Akron57ee5582025-05-21 15:25:13 +0200277 if len(list.Mappings) == 0 {
Akron06d21f02025-06-04 14:36:07 +0200278 return fmt.Errorf("mapping list '%s' has no mapping rules", list.ID)
Akron57ee5582025-05-21 15:25:13 +0200279 }
280
281 // Validate each mapping rule
282 for j, rule := range list.Mappings {
283 if rule == "" {
Akron06d21f02025-06-04 14:36:07 +0200284 return fmt.Errorf("mapping list '%s' rule at index %d is empty", list.ID, j)
Akron57ee5582025-05-21 15:25:13 +0200285 }
286 }
287 }
Akron06d21f02025-06-04 14:36:07 +0200288 return nil
Akron57ee5582025-05-21 15:25:13 +0200289}
290
291// ParseMappings parses all mapping rules in a list and returns a slice of parsed rules
292func (list *MappingList) ParseMappings() ([]*parser.MappingResult, error) {
293 // Create a grammar parser with the list's default foundries and layers
294 grammarParser, err := parser.NewGrammarParser("", "")
295 if err != nil {
296 return nil, fmt.Errorf("failed to create grammar parser: %w", err)
297 }
298
299 results := make([]*parser.MappingResult, len(list.Mappings))
300 for i, rule := range list.Mappings {
Akrona5d88142025-05-22 14:42:09 +0200301 // Check for empty rules first
302 if rule == "" {
303 return nil, fmt.Errorf("empty mapping rule at index %d in list '%s'", i, list.ID)
304 }
305
Akron57ee5582025-05-21 15:25:13 +0200306 // Parse the mapping rule
307 result, err := grammarParser.ParseMapping(string(rule))
308 if err != nil {
309 return nil, fmt.Errorf("failed to parse mapping rule %d in list '%s': %w", i, list.ID, err)
310 }
311
312 // Apply default foundries and layers if not specified in the rule
313 if list.FoundryA != "" {
314 applyDefaultFoundryAndLayer(result.Upper.Wrap, list.FoundryA, list.LayerA)
315 }
316 if list.FoundryB != "" {
317 applyDefaultFoundryAndLayer(result.Lower.Wrap, list.FoundryB, list.LayerB)
318 }
319
320 results[i] = result
321 }
322
323 return results, nil
324}
325
326// applyDefaultFoundryAndLayer recursively applies default foundry and layer to terms that don't have them specified
327func applyDefaultFoundryAndLayer(node ast.Node, defaultFoundry, defaultLayer string) {
328 switch n := node.(type) {
329 case *ast.Term:
Akron585f50f2025-07-03 13:55:47 +0200330 if n.Foundry == "" && defaultFoundry != "" {
Akron57ee5582025-05-21 15:25:13 +0200331 n.Foundry = defaultFoundry
332 }
Akron585f50f2025-07-03 13:55:47 +0200333 if n.Layer == "" && defaultLayer != "" {
Akron57ee5582025-05-21 15:25:13 +0200334 n.Layer = defaultLayer
335 }
336 case *ast.TermGroup:
337 for _, op := range n.Operands {
338 applyDefaultFoundryAndLayer(op, defaultFoundry, defaultLayer)
339 }
340 }
341}