blob: 73e5697c18d146faf7963f1de6a10f49ca20fd7c [file] [log] [blame]
Akron57ee5582025-05-21 15:25:13 +02001package config
2
3import (
4 "fmt"
5 "os"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/parser"
Akron7e8da932025-07-01 11:56:46 +02009 "github.com/rs/zerolog/log"
Akron57ee5582025-05-21 15:25:13 +020010 "gopkg.in/yaml.v3"
11)
12
Akron06d21f02025-06-04 14:36:07 +020013const (
Akron2ac2ec02025-06-05 15:26:42 +020014 defaultServer = "https://korap.ids-mannheim.de/"
15 defaultSDK = "https://korap.ids-mannheim.de/js/korap-plugin-latest.js"
Akron43fb1022026-02-20 11:38:49 +010016 defaultStylesheet = "https://korap.ids-mannheim.de/css/kalamar-plugin-latest.css"
Akron2ef703c2025-07-03 15:57:42 +020017 defaultServiceURL = "https://korap.ids-mannheim.de/plugin/koralmapper"
Akron43fb1022026-02-20 11:38:49 +010018 defaultCookieName = "km-config"
Akron14c13a52025-06-06 15:36:23 +020019 defaultPort = 5725
Akron2ac2ec02025-06-05 15:26:42 +020020 defaultLogLevel = "warn"
Akron06d21f02025-06-04 14:36:07 +020021)
22
Akron57ee5582025-05-21 15:25:13 +020023// MappingRule represents a single mapping rule in the configuration
24type MappingRule string
25
26// MappingList represents a list of mapping rules with metadata
27type MappingList struct {
Akrondab27112025-06-05 13:52:43 +020028 ID string `yaml:"id"`
Akron2f93c582026-02-19 16:49:13 +010029 Type string `yaml:"type,omitempty"` // "annotation" (default) or "corpus"
Akrondab27112025-06-05 13:52:43 +020030 Description string `yaml:"desc,omitempty"`
31 FoundryA string `yaml:"foundryA,omitempty"`
32 LayerA string `yaml:"layerA,omitempty"`
33 FoundryB string `yaml:"foundryB,omitempty"`
34 LayerB string `yaml:"layerB,omitempty"`
35 Mappings []MappingRule `yaml:"mappings"`
Akron57ee5582025-05-21 15:25:13 +020036}
37
Akron2f93c582026-02-19 16:49:13 +010038// IsCorpus returns true if the mapping list type is "corpus".
39func (list *MappingList) IsCorpus() bool {
40 return list.Type == "corpus"
41}
42
43// ParseCorpusMappings parses all mapping rules as corpus rules.
44func (list *MappingList) ParseCorpusMappings() ([]*parser.CorpusMappingResult, error) {
45 corpusParser := parser.NewCorpusParser()
46 results := make([]*parser.CorpusMappingResult, len(list.Mappings))
47 for i, rule := range list.Mappings {
48 if rule == "" {
49 return nil, fmt.Errorf("empty corpus mapping rule at index %d in list '%s'", i, list.ID)
50 }
51 result, err := corpusParser.ParseMapping(string(rule))
52 if err != nil {
53 return nil, fmt.Errorf("failed to parse corpus mapping rule %d in list '%s': %w", i, list.ID, err)
54 }
55 results[i] = result
56 }
57 return results, nil
58}
59
Akron06d21f02025-06-04 14:36:07 +020060// MappingConfig represents the root configuration containing multiple mapping lists
61type MappingConfig struct {
Akron2ac2ec02025-06-05 15:26:42 +020062 SDK string `yaml:"sdk,omitempty"`
Akron43fb1022026-02-20 11:38:49 +010063 Stylesheet string `yaml:"stylesheet,omitempty"`
Akron2ac2ec02025-06-05 15:26:42 +020064 Server string `yaml:"server,omitempty"`
65 ServiceURL string `yaml:"serviceURL,omitempty"`
Akron43fb1022026-02-20 11:38:49 +010066 CookieName string `yaml:"cookieName,omitempty"`
Akron2ac2ec02025-06-05 15:26:42 +020067 Port int `yaml:"port,omitempty"`
68 LogLevel string `yaml:"loglevel,omitempty"`
69 Lists []MappingList `yaml:"lists,omitempty"`
Akron57ee5582025-05-21 15:25:13 +020070}
71
Akrone1cff7c2025-06-04 18:43:32 +020072// LoadFromSources loads configuration from multiple sources and merges them:
73// - A main configuration file (optional) containing global settings and lists
74// - Individual mapping files (optional) containing single mapping lists each
75// At least one source must be provided
76func LoadFromSources(configFile string, mappingFiles []string) (*MappingConfig, error) {
77 var allLists []MappingList
78 var globalConfig MappingConfig
Akron57ee5582025-05-21 15:25:13 +020079
Akrone1cff7c2025-06-04 18:43:32 +020080 // Track seen IDs across all sources to detect duplicates
81 seenIDs := make(map[string]bool)
Akrona5d88142025-05-22 14:42:09 +020082
Akrone1cff7c2025-06-04 18:43:32 +020083 // Load main configuration file if provided
84 if configFile != "" {
85 data, err := os.ReadFile(configFile)
86 if err != nil {
87 return nil, fmt.Errorf("failed to read config file '%s': %w", configFile, err)
Akron06d21f02025-06-04 14:36:07 +020088 }
Akrone1cff7c2025-06-04 18:43:32 +020089
90 if len(data) == 0 {
91 return nil, fmt.Errorf("EOF: config file '%s' is empty", configFile)
92 }
93
94 // Try to unmarshal as new format first (object with optional sdk/server and lists)
Akron813780f2025-06-05 15:44:28 +020095 if err := yaml.Unmarshal(data, &globalConfig); err == nil {
96 // Successfully parsed as new format - accept it regardless of whether it has lists
Akrone1cff7c2025-06-04 18:43:32 +020097 for _, list := range globalConfig.Lists {
98 if seenIDs[list.ID] {
99 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
100 }
101 seenIDs[list.ID] = true
102 }
103 allLists = append(allLists, globalConfig.Lists...)
104 } else {
105 // Fall back to old format (direct list)
106 var lists []MappingList
107 if err := yaml.Unmarshal(data, &lists); err != nil {
108 return nil, fmt.Errorf("failed to parse YAML config file '%s': %w", configFile, err)
109 }
110
111 for _, list := range lists {
112 if seenIDs[list.ID] {
113 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
114 }
115 seenIDs[list.ID] = true
116 }
117 allLists = append(allLists, lists...)
118 // Clear the lists from globalConfig since we got them from the old format
119 globalConfig.Lists = nil
120 }
Akron06d21f02025-06-04 14:36:07 +0200121 }
122
Akrone1cff7c2025-06-04 18:43:32 +0200123 // Load individual mapping files
124 for _, file := range mappingFiles {
125 data, err := os.ReadFile(file)
126 if err != nil {
Akron7e8da932025-07-01 11:56:46 +0200127 log.Error().Err(err).Str("file", file).Msg("Failed to read mapping file")
128 continue
Akrone1cff7c2025-06-04 18:43:32 +0200129 }
130
131 if len(data) == 0 {
Akron7e8da932025-07-01 11:56:46 +0200132 log.Error().Err(err).Str("file", file).Msg("EOF: mapping file is empty")
133 continue
Akrone1cff7c2025-06-04 18:43:32 +0200134 }
135
136 var list MappingList
137 if err := yaml.Unmarshal(data, &list); err != nil {
Akron7e8da932025-07-01 11:56:46 +0200138 log.Error().Err(err).Str("file", file).Msg("Failed to parse YAML mapping file")
139 continue
Akrone1cff7c2025-06-04 18:43:32 +0200140 }
141
142 if seenIDs[list.ID] {
Akron7e8da932025-07-01 11:56:46 +0200143 log.Error().Err(err).Str("file", file).Str("list-id", list.ID).Msg("Duplicate mapping list ID found")
144 continue
Akrone1cff7c2025-06-04 18:43:32 +0200145 }
146 seenIDs[list.ID] = true
147 allLists = append(allLists, list)
Akron57ee5582025-05-21 15:25:13 +0200148 }
149
Akrone1cff7c2025-06-04 18:43:32 +0200150 // Ensure we have at least some configuration
151 if len(allLists) == 0 {
152 return nil, fmt.Errorf("no mapping lists found: provide either a config file (-c) with lists or mapping files (-m)")
153 }
154
Akron585f50f2025-07-03 13:55:47 +0200155 // Validate all mapping lists (skip duplicate ID check since we already did it)
Akrone1cff7c2025-06-04 18:43:32 +0200156 if err := validateMappingLists(allLists); err != nil {
Akron06d21f02025-06-04 14:36:07 +0200157 return nil, err
158 }
159
Akrone1cff7c2025-06-04 18:43:32 +0200160 // Create final configuration
161 result := &MappingConfig{
Akron2ac2ec02025-06-05 15:26:42 +0200162 SDK: globalConfig.SDK,
Akron43fb1022026-02-20 11:38:49 +0100163 Stylesheet: globalConfig.Stylesheet,
Akron2ac2ec02025-06-05 15:26:42 +0200164 Server: globalConfig.Server,
165 ServiceURL: globalConfig.ServiceURL,
166 Port: globalConfig.Port,
167 LogLevel: globalConfig.LogLevel,
168 Lists: allLists,
Akrone1cff7c2025-06-04 18:43:32 +0200169 }
170
Akron06d21f02025-06-04 14:36:07 +0200171 // Apply defaults if not specified
Akron2ac2ec02025-06-05 15:26:42 +0200172 ApplyDefaults(result)
Akrone1cff7c2025-06-04 18:43:32 +0200173
174 return result, nil
175}
176
Akron585f50f2025-07-03 13:55:47 +0200177// ApplyDefaults sets default values for configuration fields if they are empty
Akron2ac2ec02025-06-05 15:26:42 +0200178func ApplyDefaults(config *MappingConfig) {
Akron585f50f2025-07-03 13:55:47 +0200179 defaults := map[*string]string{
180 &config.SDK: defaultSDK,
Akron43fb1022026-02-20 11:38:49 +0100181 &config.Stylesheet: defaultStylesheet,
Akron585f50f2025-07-03 13:55:47 +0200182 &config.Server: defaultServer,
183 &config.ServiceURL: defaultServiceURL,
Akron43fb1022026-02-20 11:38:49 +0100184 &config.CookieName: defaultCookieName,
Akron585f50f2025-07-03 13:55:47 +0200185 &config.LogLevel: defaultLogLevel,
Akron06d21f02025-06-04 14:36:07 +0200186 }
Akron585f50f2025-07-03 13:55:47 +0200187
188 for field, defaultValue := range defaults {
189 if *field == "" {
190 *field = defaultValue
191 }
Akron06d21f02025-06-04 14:36:07 +0200192 }
Akron585f50f2025-07-03 13:55:47 +0200193
Akrona8a66ce2025-06-05 10:50:17 +0200194 if config.Port == 0 {
195 config.Port = defaultPort
196 }
Akron06d21f02025-06-04 14:36:07 +0200197}
198
Akron585f50f2025-07-03 13:55:47 +0200199// validateMappingLists validates a slice of mapping lists (without duplicate ID checking)
Akron06d21f02025-06-04 14:36:07 +0200200func validateMappingLists(lists []MappingList) error {
Akron57ee5582025-05-21 15:25:13 +0200201 for i, list := range lists {
202 if list.ID == "" {
Akron06d21f02025-06-04 14:36:07 +0200203 return fmt.Errorf("mapping list at index %d is missing an ID", i)
Akron57ee5582025-05-21 15:25:13 +0200204 }
Akrona5d88142025-05-22 14:42:09 +0200205
Akron57ee5582025-05-21 15:25:13 +0200206 if len(list.Mappings) == 0 {
Akron06d21f02025-06-04 14:36:07 +0200207 return fmt.Errorf("mapping list '%s' has no mapping rules", list.ID)
Akron57ee5582025-05-21 15:25:13 +0200208 }
209
210 // Validate each mapping rule
211 for j, rule := range list.Mappings {
212 if rule == "" {
Akron06d21f02025-06-04 14:36:07 +0200213 return fmt.Errorf("mapping list '%s' rule at index %d is empty", list.ID, j)
Akron57ee5582025-05-21 15:25:13 +0200214 }
215 }
216 }
Akron06d21f02025-06-04 14:36:07 +0200217 return nil
Akron57ee5582025-05-21 15:25:13 +0200218}
219
220// ParseMappings parses all mapping rules in a list and returns a slice of parsed rules
221func (list *MappingList) ParseMappings() ([]*parser.MappingResult, error) {
222 // Create a grammar parser with the list's default foundries and layers
223 grammarParser, err := parser.NewGrammarParser("", "")
224 if err != nil {
225 return nil, fmt.Errorf("failed to create grammar parser: %w", err)
226 }
227
228 results := make([]*parser.MappingResult, len(list.Mappings))
229 for i, rule := range list.Mappings {
Akrona5d88142025-05-22 14:42:09 +0200230 // Check for empty rules first
231 if rule == "" {
232 return nil, fmt.Errorf("empty mapping rule at index %d in list '%s'", i, list.ID)
233 }
234
Akron57ee5582025-05-21 15:25:13 +0200235 // Parse the mapping rule
236 result, err := grammarParser.ParseMapping(string(rule))
237 if err != nil {
238 return nil, fmt.Errorf("failed to parse mapping rule %d in list '%s': %w", i, list.ID, err)
239 }
240
241 // Apply default foundries and layers if not specified in the rule
242 if list.FoundryA != "" {
243 applyDefaultFoundryAndLayer(result.Upper.Wrap, list.FoundryA, list.LayerA)
244 }
245 if list.FoundryB != "" {
246 applyDefaultFoundryAndLayer(result.Lower.Wrap, list.FoundryB, list.LayerB)
247 }
248
249 results[i] = result
250 }
251
252 return results, nil
253}
254
255// applyDefaultFoundryAndLayer recursively applies default foundry and layer to terms that don't have them specified
256func applyDefaultFoundryAndLayer(node ast.Node, defaultFoundry, defaultLayer string) {
257 switch n := node.(type) {
258 case *ast.Term:
Akron585f50f2025-07-03 13:55:47 +0200259 if n.Foundry == "" && defaultFoundry != "" {
Akron57ee5582025-05-21 15:25:13 +0200260 n.Foundry = defaultFoundry
261 }
Akron585f50f2025-07-03 13:55:47 +0200262 if n.Layer == "" && defaultLayer != "" {
Akron57ee5582025-05-21 15:25:13 +0200263 n.Layer = defaultLayer
264 }
265 case *ast.TermGroup:
266 for _, op := range n.Operands {
267 applyDefaultFoundryAndLayer(op, defaultFoundry, defaultLayer)
268 }
269 }
270}