blob: cdc1e0f524621c616db0b027bd042ecfe9c49bb4 [file] [log] [blame]
Akron57ee5582025-05-21 15:25:13 +02001package config
2
3import (
4 "fmt"
5 "os"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/parser"
Akron7e8da932025-07-01 11:56:46 +02009 "github.com/rs/zerolog/log"
Akron57ee5582025-05-21 15:25:13 +020010 "gopkg.in/yaml.v3"
11)
12
Akron06d21f02025-06-04 14:36:07 +020013const (
Akron2ac2ec02025-06-05 15:26:42 +020014 defaultServer = "https://korap.ids-mannheim.de/"
15 defaultSDK = "https://korap.ids-mannheim.de/js/korap-plugin-latest.js"
Akron2ef703c2025-07-03 15:57:42 +020016 defaultServiceURL = "https://korap.ids-mannheim.de/plugin/koralmapper"
Akron14c13a52025-06-06 15:36:23 +020017 defaultPort = 5725
Akron2ac2ec02025-06-05 15:26:42 +020018 defaultLogLevel = "warn"
Akron06d21f02025-06-04 14:36:07 +020019)
20
Akron57ee5582025-05-21 15:25:13 +020021// MappingRule represents a single mapping rule in the configuration
22type MappingRule string
23
24// MappingList represents a list of mapping rules with metadata
25type MappingList struct {
Akrondab27112025-06-05 13:52:43 +020026 ID string `yaml:"id"`
Akron2f93c582026-02-19 16:49:13 +010027 Type string `yaml:"type,omitempty"` // "annotation" (default) or "corpus"
Akrondab27112025-06-05 13:52:43 +020028 Description string `yaml:"desc,omitempty"`
29 FoundryA string `yaml:"foundryA,omitempty"`
30 LayerA string `yaml:"layerA,omitempty"`
31 FoundryB string `yaml:"foundryB,omitempty"`
32 LayerB string `yaml:"layerB,omitempty"`
33 Mappings []MappingRule `yaml:"mappings"`
Akron57ee5582025-05-21 15:25:13 +020034}
35
Akron2f93c582026-02-19 16:49:13 +010036// IsCorpus returns true if the mapping list type is "corpus".
37func (list *MappingList) IsCorpus() bool {
38 return list.Type == "corpus"
39}
40
41// ParseCorpusMappings parses all mapping rules as corpus rules.
42func (list *MappingList) ParseCorpusMappings() ([]*parser.CorpusMappingResult, error) {
43 corpusParser := parser.NewCorpusParser()
44 results := make([]*parser.CorpusMappingResult, len(list.Mappings))
45 for i, rule := range list.Mappings {
46 if rule == "" {
47 return nil, fmt.Errorf("empty corpus mapping rule at index %d in list '%s'", i, list.ID)
48 }
49 result, err := corpusParser.ParseMapping(string(rule))
50 if err != nil {
51 return nil, fmt.Errorf("failed to parse corpus mapping rule %d in list '%s': %w", i, list.ID, err)
52 }
53 results[i] = result
54 }
55 return results, nil
56}
57
Akron06d21f02025-06-04 14:36:07 +020058// MappingConfig represents the root configuration containing multiple mapping lists
59type MappingConfig struct {
Akron2ac2ec02025-06-05 15:26:42 +020060 SDK string `yaml:"sdk,omitempty"`
61 Server string `yaml:"server,omitempty"`
62 ServiceURL string `yaml:"serviceURL,omitempty"`
63 Port int `yaml:"port,omitempty"`
64 LogLevel string `yaml:"loglevel,omitempty"`
65 Lists []MappingList `yaml:"lists,omitempty"`
Akron57ee5582025-05-21 15:25:13 +020066}
67
Akrone1cff7c2025-06-04 18:43:32 +020068// LoadFromSources loads configuration from multiple sources and merges them:
69// - A main configuration file (optional) containing global settings and lists
70// - Individual mapping files (optional) containing single mapping lists each
71// At least one source must be provided
72func LoadFromSources(configFile string, mappingFiles []string) (*MappingConfig, error) {
73 var allLists []MappingList
74 var globalConfig MappingConfig
Akron57ee5582025-05-21 15:25:13 +020075
Akrone1cff7c2025-06-04 18:43:32 +020076 // Track seen IDs across all sources to detect duplicates
77 seenIDs := make(map[string]bool)
Akrona5d88142025-05-22 14:42:09 +020078
Akrone1cff7c2025-06-04 18:43:32 +020079 // Load main configuration file if provided
80 if configFile != "" {
81 data, err := os.ReadFile(configFile)
82 if err != nil {
83 return nil, fmt.Errorf("failed to read config file '%s': %w", configFile, err)
Akron06d21f02025-06-04 14:36:07 +020084 }
Akrone1cff7c2025-06-04 18:43:32 +020085
86 if len(data) == 0 {
87 return nil, fmt.Errorf("EOF: config file '%s' is empty", configFile)
88 }
89
90 // Try to unmarshal as new format first (object with optional sdk/server and lists)
Akron813780f2025-06-05 15:44:28 +020091 if err := yaml.Unmarshal(data, &globalConfig); err == nil {
92 // Successfully parsed as new format - accept it regardless of whether it has lists
Akrone1cff7c2025-06-04 18:43:32 +020093 for _, list := range globalConfig.Lists {
94 if seenIDs[list.ID] {
95 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
96 }
97 seenIDs[list.ID] = true
98 }
99 allLists = append(allLists, globalConfig.Lists...)
100 } else {
101 // Fall back to old format (direct list)
102 var lists []MappingList
103 if err := yaml.Unmarshal(data, &lists); err != nil {
104 return nil, fmt.Errorf("failed to parse YAML config file '%s': %w", configFile, err)
105 }
106
107 for _, list := range lists {
108 if seenIDs[list.ID] {
109 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
110 }
111 seenIDs[list.ID] = true
112 }
113 allLists = append(allLists, lists...)
114 // Clear the lists from globalConfig since we got them from the old format
115 globalConfig.Lists = nil
116 }
Akron06d21f02025-06-04 14:36:07 +0200117 }
118
Akrone1cff7c2025-06-04 18:43:32 +0200119 // Load individual mapping files
120 for _, file := range mappingFiles {
121 data, err := os.ReadFile(file)
122 if err != nil {
Akron7e8da932025-07-01 11:56:46 +0200123 log.Error().Err(err).Str("file", file).Msg("Failed to read mapping file")
124 continue
Akrone1cff7c2025-06-04 18:43:32 +0200125 }
126
127 if len(data) == 0 {
Akron7e8da932025-07-01 11:56:46 +0200128 log.Error().Err(err).Str("file", file).Msg("EOF: mapping file is empty")
129 continue
Akrone1cff7c2025-06-04 18:43:32 +0200130 }
131
132 var list MappingList
133 if err := yaml.Unmarshal(data, &list); err != nil {
Akron7e8da932025-07-01 11:56:46 +0200134 log.Error().Err(err).Str("file", file).Msg("Failed to parse YAML mapping file")
135 continue
Akrone1cff7c2025-06-04 18:43:32 +0200136 }
137
138 if seenIDs[list.ID] {
Akron7e8da932025-07-01 11:56:46 +0200139 log.Error().Err(err).Str("file", file).Str("list-id", list.ID).Msg("Duplicate mapping list ID found")
140 continue
Akrone1cff7c2025-06-04 18:43:32 +0200141 }
142 seenIDs[list.ID] = true
143 allLists = append(allLists, list)
Akron57ee5582025-05-21 15:25:13 +0200144 }
145
Akrone1cff7c2025-06-04 18:43:32 +0200146 // Ensure we have at least some configuration
147 if len(allLists) == 0 {
148 return nil, fmt.Errorf("no mapping lists found: provide either a config file (-c) with lists or mapping files (-m)")
149 }
150
Akron585f50f2025-07-03 13:55:47 +0200151 // Validate all mapping lists (skip duplicate ID check since we already did it)
Akrone1cff7c2025-06-04 18:43:32 +0200152 if err := validateMappingLists(allLists); err != nil {
Akron06d21f02025-06-04 14:36:07 +0200153 return nil, err
154 }
155
Akrone1cff7c2025-06-04 18:43:32 +0200156 // Create final configuration
157 result := &MappingConfig{
Akron2ac2ec02025-06-05 15:26:42 +0200158 SDK: globalConfig.SDK,
159 Server: globalConfig.Server,
160 ServiceURL: globalConfig.ServiceURL,
161 Port: globalConfig.Port,
162 LogLevel: globalConfig.LogLevel,
163 Lists: allLists,
Akrone1cff7c2025-06-04 18:43:32 +0200164 }
165
Akron06d21f02025-06-04 14:36:07 +0200166 // Apply defaults if not specified
Akron2ac2ec02025-06-05 15:26:42 +0200167 ApplyDefaults(result)
Akrone1cff7c2025-06-04 18:43:32 +0200168
169 return result, nil
170}
171
Akron585f50f2025-07-03 13:55:47 +0200172// ApplyDefaults sets default values for configuration fields if they are empty
Akron2ac2ec02025-06-05 15:26:42 +0200173func ApplyDefaults(config *MappingConfig) {
Akron585f50f2025-07-03 13:55:47 +0200174 defaults := map[*string]string{
175 &config.SDK: defaultSDK,
176 &config.Server: defaultServer,
177 &config.ServiceURL: defaultServiceURL,
178 &config.LogLevel: defaultLogLevel,
Akron06d21f02025-06-04 14:36:07 +0200179 }
Akron585f50f2025-07-03 13:55:47 +0200180
181 for field, defaultValue := range defaults {
182 if *field == "" {
183 *field = defaultValue
184 }
Akron06d21f02025-06-04 14:36:07 +0200185 }
Akron585f50f2025-07-03 13:55:47 +0200186
Akrona8a66ce2025-06-05 10:50:17 +0200187 if config.Port == 0 {
188 config.Port = defaultPort
189 }
Akron06d21f02025-06-04 14:36:07 +0200190}
191
Akron585f50f2025-07-03 13:55:47 +0200192// validateMappingLists validates a slice of mapping lists (without duplicate ID checking)
Akron06d21f02025-06-04 14:36:07 +0200193func validateMappingLists(lists []MappingList) error {
Akron57ee5582025-05-21 15:25:13 +0200194 for i, list := range lists {
195 if list.ID == "" {
Akron06d21f02025-06-04 14:36:07 +0200196 return fmt.Errorf("mapping list at index %d is missing an ID", i)
Akron57ee5582025-05-21 15:25:13 +0200197 }
Akrona5d88142025-05-22 14:42:09 +0200198
Akron57ee5582025-05-21 15:25:13 +0200199 if len(list.Mappings) == 0 {
Akron06d21f02025-06-04 14:36:07 +0200200 return fmt.Errorf("mapping list '%s' has no mapping rules", list.ID)
Akron57ee5582025-05-21 15:25:13 +0200201 }
202
203 // Validate each mapping rule
204 for j, rule := range list.Mappings {
205 if rule == "" {
Akron06d21f02025-06-04 14:36:07 +0200206 return fmt.Errorf("mapping list '%s' rule at index %d is empty", list.ID, j)
Akron57ee5582025-05-21 15:25:13 +0200207 }
208 }
209 }
Akron06d21f02025-06-04 14:36:07 +0200210 return nil
Akron57ee5582025-05-21 15:25:13 +0200211}
212
213// ParseMappings parses all mapping rules in a list and returns a slice of parsed rules
214func (list *MappingList) ParseMappings() ([]*parser.MappingResult, error) {
215 // Create a grammar parser with the list's default foundries and layers
216 grammarParser, err := parser.NewGrammarParser("", "")
217 if err != nil {
218 return nil, fmt.Errorf("failed to create grammar parser: %w", err)
219 }
220
221 results := make([]*parser.MappingResult, len(list.Mappings))
222 for i, rule := range list.Mappings {
Akrona5d88142025-05-22 14:42:09 +0200223 // Check for empty rules first
224 if rule == "" {
225 return nil, fmt.Errorf("empty mapping rule at index %d in list '%s'", i, list.ID)
226 }
227
Akron57ee5582025-05-21 15:25:13 +0200228 // Parse the mapping rule
229 result, err := grammarParser.ParseMapping(string(rule))
230 if err != nil {
231 return nil, fmt.Errorf("failed to parse mapping rule %d in list '%s': %w", i, list.ID, err)
232 }
233
234 // Apply default foundries and layers if not specified in the rule
235 if list.FoundryA != "" {
236 applyDefaultFoundryAndLayer(result.Upper.Wrap, list.FoundryA, list.LayerA)
237 }
238 if list.FoundryB != "" {
239 applyDefaultFoundryAndLayer(result.Lower.Wrap, list.FoundryB, list.LayerB)
240 }
241
242 results[i] = result
243 }
244
245 return results, nil
246}
247
248// applyDefaultFoundryAndLayer recursively applies default foundry and layer to terms that don't have them specified
249func applyDefaultFoundryAndLayer(node ast.Node, defaultFoundry, defaultLayer string) {
250 switch n := node.(type) {
251 case *ast.Term:
Akron585f50f2025-07-03 13:55:47 +0200252 if n.Foundry == "" && defaultFoundry != "" {
Akron57ee5582025-05-21 15:25:13 +0200253 n.Foundry = defaultFoundry
254 }
Akron585f50f2025-07-03 13:55:47 +0200255 if n.Layer == "" && defaultLayer != "" {
Akron57ee5582025-05-21 15:25:13 +0200256 n.Layer = defaultLayer
257 }
258 case *ast.TermGroup:
259 for _, op := range n.Operands {
260 applyDefaultFoundryAndLayer(op, defaultFoundry, defaultLayer)
261 }
262 }
263}