blob: a4f9f77bb649e0efa18cda7690666316e4763f92 [file] [log] [blame]
Akron57ee5582025-05-21 15:25:13 +02001package config
2
3import (
4 "fmt"
5 "os"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/parser"
Akron7e8da932025-07-01 11:56:46 +02009 "github.com/rs/zerolog/log"
Akron57ee5582025-05-21 15:25:13 +020010 "gopkg.in/yaml.v3"
11)
12
Akron06d21f02025-06-04 14:36:07 +020013const (
Akron2ac2ec02025-06-05 15:26:42 +020014 defaultServer = "https://korap.ids-mannheim.de/"
15 defaultSDK = "https://korap.ids-mannheim.de/js/korap-plugin-latest.js"
Akron43fb1022026-02-20 11:38:49 +010016 defaultStylesheet = "https://korap.ids-mannheim.de/css/kalamar-plugin-latest.css"
Akron2ef703c2025-07-03 15:57:42 +020017 defaultServiceURL = "https://korap.ids-mannheim.de/plugin/koralmapper"
Akron43fb1022026-02-20 11:38:49 +010018 defaultCookieName = "km-config"
Akron14c13a52025-06-06 15:36:23 +020019 defaultPort = 5725
Akron2ac2ec02025-06-05 15:26:42 +020020 defaultLogLevel = "warn"
Akron06d21f02025-06-04 14:36:07 +020021)
22
Akron57ee5582025-05-21 15:25:13 +020023// MappingRule represents a single mapping rule in the configuration
24type MappingRule string
25
26// MappingList represents a list of mapping rules with metadata
27type MappingList struct {
Akrondab27112025-06-05 13:52:43 +020028 ID string `yaml:"id"`
Akron2f93c582026-02-19 16:49:13 +010029 Type string `yaml:"type,omitempty"` // "annotation" (default) or "corpus"
Akrondab27112025-06-05 13:52:43 +020030 Description string `yaml:"desc,omitempty"`
31 FoundryA string `yaml:"foundryA,omitempty"`
32 LayerA string `yaml:"layerA,omitempty"`
33 FoundryB string `yaml:"foundryB,omitempty"`
34 LayerB string `yaml:"layerB,omitempty"`
Akrona67de8f2026-02-23 17:54:26 +010035 FieldA string `yaml:"fieldA,omitempty"`
36 FieldB string `yaml:"fieldB,omitempty"`
Akrondab27112025-06-05 13:52:43 +020037 Mappings []MappingRule `yaml:"mappings"`
Akron57ee5582025-05-21 15:25:13 +020038}
39
Akron2f93c582026-02-19 16:49:13 +010040// IsCorpus returns true if the mapping list type is "corpus".
41func (list *MappingList) IsCorpus() bool {
42 return list.Type == "corpus"
43}
44
45// ParseCorpusMappings parses all mapping rules as corpus rules.
Akrona67de8f2026-02-23 17:54:26 +010046// Bare values (without key=) are always allowed and receive the default
47// field name from the mapping list header (FieldA/FieldB) when set.
Akron2f93c582026-02-19 16:49:13 +010048func (list *MappingList) ParseCorpusMappings() ([]*parser.CorpusMappingResult, error) {
49 corpusParser := parser.NewCorpusParser()
Akrona67de8f2026-02-23 17:54:26 +010050 corpusParser.AllowBareValues = true
51
Akron2f93c582026-02-19 16:49:13 +010052 results := make([]*parser.CorpusMappingResult, len(list.Mappings))
53 for i, rule := range list.Mappings {
54 if rule == "" {
55 return nil, fmt.Errorf("empty corpus mapping rule at index %d in list '%s'", i, list.ID)
56 }
57 result, err := corpusParser.ParseMapping(string(rule))
58 if err != nil {
59 return nil, fmt.Errorf("failed to parse corpus mapping rule %d in list '%s': %w", i, list.ID, err)
60 }
Akrona67de8f2026-02-23 17:54:26 +010061
62 if list.FieldA != "" {
63 applyDefaultCorpusKey(result.Upper, list.FieldA)
64 }
65 if list.FieldB != "" {
66 applyDefaultCorpusKey(result.Lower, list.FieldB)
67 }
68
Akron2f93c582026-02-19 16:49:13 +010069 results[i] = result
70 }
71 return results, nil
72}
73
Akrona67de8f2026-02-23 17:54:26 +010074// applyDefaultCorpusKey recursively fills in empty keys on CorpusField nodes.
75func applyDefaultCorpusKey(node parser.CorpusNode, defaultKey string) {
76 switch n := node.(type) {
77 case *parser.CorpusField:
78 if n.Key == "" {
79 n.Key = defaultKey
80 }
81 case *parser.CorpusGroup:
82 for _, op := range n.Operands {
83 applyDefaultCorpusKey(op, defaultKey)
84 }
85 }
86}
87
Akron06d21f02025-06-04 14:36:07 +020088// MappingConfig represents the root configuration containing multiple mapping lists
89type MappingConfig struct {
Akron2ac2ec02025-06-05 15:26:42 +020090 SDK string `yaml:"sdk,omitempty"`
Akron43fb1022026-02-20 11:38:49 +010091 Stylesheet string `yaml:"stylesheet,omitempty"`
Akron2ac2ec02025-06-05 15:26:42 +020092 Server string `yaml:"server,omitempty"`
93 ServiceURL string `yaml:"serviceURL,omitempty"`
Akron43fb1022026-02-20 11:38:49 +010094 CookieName string `yaml:"cookieName,omitempty"`
Akron2ac2ec02025-06-05 15:26:42 +020095 Port int `yaml:"port,omitempty"`
96 LogLevel string `yaml:"loglevel,omitempty"`
97 Lists []MappingList `yaml:"lists,omitempty"`
Akron57ee5582025-05-21 15:25:13 +020098}
99
Akrone1cff7c2025-06-04 18:43:32 +0200100// LoadFromSources loads configuration from multiple sources and merges them:
101// - A main configuration file (optional) containing global settings and lists
102// - Individual mapping files (optional) containing single mapping lists each
103// At least one source must be provided
104func LoadFromSources(configFile string, mappingFiles []string) (*MappingConfig, error) {
105 var allLists []MappingList
106 var globalConfig MappingConfig
Akron57ee5582025-05-21 15:25:13 +0200107
Akrone1cff7c2025-06-04 18:43:32 +0200108 // Track seen IDs across all sources to detect duplicates
109 seenIDs := make(map[string]bool)
Akrona5d88142025-05-22 14:42:09 +0200110
Akrone1cff7c2025-06-04 18:43:32 +0200111 // Load main configuration file if provided
112 if configFile != "" {
113 data, err := os.ReadFile(configFile)
114 if err != nil {
115 return nil, fmt.Errorf("failed to read config file '%s': %w", configFile, err)
Akron06d21f02025-06-04 14:36:07 +0200116 }
Akrone1cff7c2025-06-04 18:43:32 +0200117
118 if len(data) == 0 {
119 return nil, fmt.Errorf("EOF: config file '%s' is empty", configFile)
120 }
121
122 // Try to unmarshal as new format first (object with optional sdk/server and lists)
Akron813780f2025-06-05 15:44:28 +0200123 if err := yaml.Unmarshal(data, &globalConfig); err == nil {
124 // Successfully parsed as new format - accept it regardless of whether it has lists
Akrone1cff7c2025-06-04 18:43:32 +0200125 for _, list := range globalConfig.Lists {
126 if seenIDs[list.ID] {
127 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
128 }
129 seenIDs[list.ID] = true
130 }
131 allLists = append(allLists, globalConfig.Lists...)
132 } else {
133 // Fall back to old format (direct list)
134 var lists []MappingList
135 if err := yaml.Unmarshal(data, &lists); err != nil {
136 return nil, fmt.Errorf("failed to parse YAML config file '%s': %w", configFile, err)
137 }
138
139 for _, list := range lists {
140 if seenIDs[list.ID] {
141 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
142 }
143 seenIDs[list.ID] = true
144 }
145 allLists = append(allLists, lists...)
146 // Clear the lists from globalConfig since we got them from the old format
147 globalConfig.Lists = nil
148 }
Akron06d21f02025-06-04 14:36:07 +0200149 }
150
Akrone1cff7c2025-06-04 18:43:32 +0200151 // Load individual mapping files
152 for _, file := range mappingFiles {
153 data, err := os.ReadFile(file)
154 if err != nil {
Akron7e8da932025-07-01 11:56:46 +0200155 log.Error().Err(err).Str("file", file).Msg("Failed to read mapping file")
156 continue
Akrone1cff7c2025-06-04 18:43:32 +0200157 }
158
159 if len(data) == 0 {
Akron7e8da932025-07-01 11:56:46 +0200160 log.Error().Err(err).Str("file", file).Msg("EOF: mapping file is empty")
161 continue
Akrone1cff7c2025-06-04 18:43:32 +0200162 }
163
164 var list MappingList
165 if err := yaml.Unmarshal(data, &list); err != nil {
Akron7e8da932025-07-01 11:56:46 +0200166 log.Error().Err(err).Str("file", file).Msg("Failed to parse YAML mapping file")
167 continue
Akrone1cff7c2025-06-04 18:43:32 +0200168 }
169
170 if seenIDs[list.ID] {
Akron7e8da932025-07-01 11:56:46 +0200171 log.Error().Err(err).Str("file", file).Str("list-id", list.ID).Msg("Duplicate mapping list ID found")
172 continue
Akrone1cff7c2025-06-04 18:43:32 +0200173 }
174 seenIDs[list.ID] = true
175 allLists = append(allLists, list)
Akron57ee5582025-05-21 15:25:13 +0200176 }
177
Akrone1cff7c2025-06-04 18:43:32 +0200178 // Ensure we have at least some configuration
179 if len(allLists) == 0 {
180 return nil, fmt.Errorf("no mapping lists found: provide either a config file (-c) with lists or mapping files (-m)")
181 }
182
Akron585f50f2025-07-03 13:55:47 +0200183 // Validate all mapping lists (skip duplicate ID check since we already did it)
Akrone1cff7c2025-06-04 18:43:32 +0200184 if err := validateMappingLists(allLists); err != nil {
Akron06d21f02025-06-04 14:36:07 +0200185 return nil, err
186 }
187
Akrone1cff7c2025-06-04 18:43:32 +0200188 // Create final configuration
189 result := &MappingConfig{
Akron2ac2ec02025-06-05 15:26:42 +0200190 SDK: globalConfig.SDK,
Akron43fb1022026-02-20 11:38:49 +0100191 Stylesheet: globalConfig.Stylesheet,
Akron2ac2ec02025-06-05 15:26:42 +0200192 Server: globalConfig.Server,
193 ServiceURL: globalConfig.ServiceURL,
194 Port: globalConfig.Port,
195 LogLevel: globalConfig.LogLevel,
196 Lists: allLists,
Akrone1cff7c2025-06-04 18:43:32 +0200197 }
198
Akron06d21f02025-06-04 14:36:07 +0200199 // Apply defaults if not specified
Akron2ac2ec02025-06-05 15:26:42 +0200200 ApplyDefaults(result)
Akrone1cff7c2025-06-04 18:43:32 +0200201
202 return result, nil
203}
204
Akron585f50f2025-07-03 13:55:47 +0200205// ApplyDefaults sets default values for configuration fields if they are empty
Akron2ac2ec02025-06-05 15:26:42 +0200206func ApplyDefaults(config *MappingConfig) {
Akron585f50f2025-07-03 13:55:47 +0200207 defaults := map[*string]string{
208 &config.SDK: defaultSDK,
Akron43fb1022026-02-20 11:38:49 +0100209 &config.Stylesheet: defaultStylesheet,
Akron585f50f2025-07-03 13:55:47 +0200210 &config.Server: defaultServer,
211 &config.ServiceURL: defaultServiceURL,
Akron43fb1022026-02-20 11:38:49 +0100212 &config.CookieName: defaultCookieName,
Akron585f50f2025-07-03 13:55:47 +0200213 &config.LogLevel: defaultLogLevel,
Akron06d21f02025-06-04 14:36:07 +0200214 }
Akron585f50f2025-07-03 13:55:47 +0200215
216 for field, defaultValue := range defaults {
217 if *field == "" {
218 *field = defaultValue
219 }
Akron06d21f02025-06-04 14:36:07 +0200220 }
Akron585f50f2025-07-03 13:55:47 +0200221
Akrona8a66ce2025-06-05 10:50:17 +0200222 if config.Port == 0 {
223 config.Port = defaultPort
224 }
Akron06d21f02025-06-04 14:36:07 +0200225}
226
Akron585f50f2025-07-03 13:55:47 +0200227// validateMappingLists validates a slice of mapping lists (without duplicate ID checking)
Akron06d21f02025-06-04 14:36:07 +0200228func validateMappingLists(lists []MappingList) error {
Akron57ee5582025-05-21 15:25:13 +0200229 for i, list := range lists {
230 if list.ID == "" {
Akron06d21f02025-06-04 14:36:07 +0200231 return fmt.Errorf("mapping list at index %d is missing an ID", i)
Akron57ee5582025-05-21 15:25:13 +0200232 }
Akrona5d88142025-05-22 14:42:09 +0200233
Akron57ee5582025-05-21 15:25:13 +0200234 if len(list.Mappings) == 0 {
Akron06d21f02025-06-04 14:36:07 +0200235 return fmt.Errorf("mapping list '%s' has no mapping rules", list.ID)
Akron57ee5582025-05-21 15:25:13 +0200236 }
237
238 // Validate each mapping rule
239 for j, rule := range list.Mappings {
240 if rule == "" {
Akron06d21f02025-06-04 14:36:07 +0200241 return fmt.Errorf("mapping list '%s' rule at index %d is empty", list.ID, j)
Akron57ee5582025-05-21 15:25:13 +0200242 }
243 }
244 }
Akron06d21f02025-06-04 14:36:07 +0200245 return nil
Akron57ee5582025-05-21 15:25:13 +0200246}
247
248// ParseMappings parses all mapping rules in a list and returns a slice of parsed rules
249func (list *MappingList) ParseMappings() ([]*parser.MappingResult, error) {
250 // Create a grammar parser with the list's default foundries and layers
251 grammarParser, err := parser.NewGrammarParser("", "")
252 if err != nil {
253 return nil, fmt.Errorf("failed to create grammar parser: %w", err)
254 }
255
256 results := make([]*parser.MappingResult, len(list.Mappings))
257 for i, rule := range list.Mappings {
Akrona5d88142025-05-22 14:42:09 +0200258 // Check for empty rules first
259 if rule == "" {
260 return nil, fmt.Errorf("empty mapping rule at index %d in list '%s'", i, list.ID)
261 }
262
Akron57ee5582025-05-21 15:25:13 +0200263 // Parse the mapping rule
264 result, err := grammarParser.ParseMapping(string(rule))
265 if err != nil {
266 return nil, fmt.Errorf("failed to parse mapping rule %d in list '%s': %w", i, list.ID, err)
267 }
268
269 // Apply default foundries and layers if not specified in the rule
270 if list.FoundryA != "" {
271 applyDefaultFoundryAndLayer(result.Upper.Wrap, list.FoundryA, list.LayerA)
272 }
273 if list.FoundryB != "" {
274 applyDefaultFoundryAndLayer(result.Lower.Wrap, list.FoundryB, list.LayerB)
275 }
276
277 results[i] = result
278 }
279
280 return results, nil
281}
282
283// applyDefaultFoundryAndLayer recursively applies default foundry and layer to terms that don't have them specified
284func applyDefaultFoundryAndLayer(node ast.Node, defaultFoundry, defaultLayer string) {
285 switch n := node.(type) {
286 case *ast.Term:
Akron585f50f2025-07-03 13:55:47 +0200287 if n.Foundry == "" && defaultFoundry != "" {
Akron57ee5582025-05-21 15:25:13 +0200288 n.Foundry = defaultFoundry
289 }
Akron585f50f2025-07-03 13:55:47 +0200290 if n.Layer == "" && defaultLayer != "" {
Akron57ee5582025-05-21 15:25:13 +0200291 n.Layer = defaultLayer
292 }
293 case *ast.TermGroup:
294 for _, op := range n.Operands {
295 applyDefaultFoundryAndLayer(op, defaultFoundry, defaultLayer)
296 }
297 }
298}