blob: 6c215000c1ccf0141ec7325d93546b0ab53f81fc [file] [log] [blame]
Akron32d53de2025-05-22 13:45:32 +02001package mapper
2
3import (
Akron32d53de2025-05-22 13:45:32 +02004 "fmt"
Akron496fc0a2026-05-20 09:07:07 +02005 "regexp"
Akron32d53de2025-05-22 13:45:32 +02006
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/config"
8 "github.com/KorAP/Koral-Mapper/parser"
Akron32d53de2025-05-22 13:45:32 +02009)
10
11// Direction represents the mapping direction (A to B or B to A)
Akrona1a183f2025-05-26 17:47:33 +020012type Direction bool
Akron32d53de2025-05-22 13:45:32 +020013
14const (
Akrona1a183f2025-05-26 17:47:33 +020015 AtoB Direction = true
16 BtoA Direction = false
Akron2f93c582026-02-19 16:49:13 +010017
18 RewriteEditor = "Koral-Mapper"
Akron32d53de2025-05-22 13:45:32 +020019)
20
Akrona1a183f2025-05-26 17:47:33 +020021// String converts the Direction to its string representation
22func (d Direction) String() string {
23 if d {
24 return "atob"
25 }
26 return "btoa"
27}
28
29// ParseDirection converts a string direction to Direction type
30func ParseDirection(dir string) (Direction, error) {
31 switch dir {
32 case "atob":
33 return AtoB, nil
34 case "btoa":
35 return BtoA, nil
36 default:
37 return false, fmt.Errorf("invalid direction: %s", dir)
38 }
39}
40
Akron32d53de2025-05-22 13:45:32 +020041// Mapper handles the application of mapping rules to JSON objects
42type Mapper struct {
Akron2f93c582026-02-19 16:49:13 +010043 mappingLists map[string]*config.MappingList
44 parsedQueryRules map[string][]*parser.MappingResult
45 parsedCorpusRules map[string][]*parser.CorpusMappingResult
Akron496fc0a2026-05-20 09:07:07 +020046 compiledRegexes map[string]*regexp.Regexp
Akron32d53de2025-05-22 13:45:32 +020047}
48
Akrona00d4752025-05-26 17:34:36 +020049// NewMapper creates a new Mapper instance from a list of MappingLists
50func NewMapper(lists []config.MappingList) (*Mapper, error) {
Akron32d53de2025-05-22 13:45:32 +020051 m := &Mapper{
Akron2f93c582026-02-19 16:49:13 +010052 mappingLists: make(map[string]*config.MappingList),
53 parsedQueryRules: make(map[string][]*parser.MappingResult),
54 parsedCorpusRules: make(map[string][]*parser.CorpusMappingResult),
Akron496fc0a2026-05-20 09:07:07 +020055 compiledRegexes: make(map[string]*regexp.Regexp),
Akron32d53de2025-05-22 13:45:32 +020056 }
57
Akrona00d4752025-05-26 17:34:36 +020058 for _, list := range lists {
59 if _, exists := m.mappingLists[list.ID]; exists {
60 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
61 }
62
Akrona00d4752025-05-26 17:34:36 +020063 listCopy := list
64 m.mappingLists[list.ID] = &listCopy
65
Akron2f93c582026-02-19 16:49:13 +010066 if list.IsCorpus() {
67 corpusRules, err := list.ParseCorpusMappings()
68 if err != nil {
69 return nil, fmt.Errorf("failed to parse corpus mappings for list %s: %w", list.ID, err)
70 }
Akron496fc0a2026-05-20 09:07:07 +020071 for _, rule := range corpusRules {
72 if err := m.precompileCorpusRegexes(rule.Upper); err != nil {
73 return nil, fmt.Errorf("invalid regex in corpus mapping list %s: %w", list.ID, err)
74 }
75 if err := m.precompileCorpusRegexes(rule.Lower); err != nil {
76 return nil, fmt.Errorf("invalid regex in corpus mapping list %s: %w", list.ID, err)
77 }
78 }
Akron2f93c582026-02-19 16:49:13 +010079 m.parsedCorpusRules[list.ID] = corpusRules
80 } else {
81 queryRules, err := list.ParseMappings()
82 if err != nil {
83 return nil, fmt.Errorf("failed to parse mappings for list %s: %w", list.ID, err)
84 }
85 m.parsedQueryRules[list.ID] = queryRules
Akron32d53de2025-05-22 13:45:32 +020086 }
Akron32d53de2025-05-22 13:45:32 +020087 }
88
89 return m, nil
90}
91
Akron496fc0a2026-05-20 09:07:07 +020092// precompileCorpusRegexes walks a CorpusNode tree and pre-compiles any
93// regex-typed field patterns into the compiledRegexes cache.
94func (m *Mapper) precompileCorpusRegexes(node parser.CorpusNode) error {
95 switch n := node.(type) {
96 case *parser.CorpusField:
97 if n.Type == "regex" {
98 pattern := "^" + n.Value + "$"
99 if _, exists := m.compiledRegexes[pattern]; !exists {
100 re, err := regexp.Compile(pattern)
101 if err != nil {
102 return fmt.Errorf("failed to compile regex %q: %w", n.Value, err)
103 }
104 m.compiledRegexes[pattern] = re
105 }
106 }
107 case *parser.CorpusGroup:
108 for _, op := range n.Operands {
109 if err := m.precompileCorpusRegexes(op); err != nil {
110 return err
111 }
112 }
113 }
114 return nil
115}
116
Akron32d53de2025-05-22 13:45:32 +0200117// MappingOptions contains the options for applying mappings
118type MappingOptions struct {
Akron0d9117c2025-05-27 15:20:21 +0200119 FoundryA string
120 LayerA string
121 FoundryB string
122 LayerB string
Akron41310262026-02-23 18:58:53 +0100123 FieldA string
124 FieldB string
Akron0d9117c2025-05-27 15:20:21 +0200125 Direction Direction
126 AddRewrites bool
Akron32d53de2025-05-22 13:45:32 +0200127}
Akrone4f570d2026-02-20 08:18:06 +0100128
Akron422cd252026-05-19 16:31:19 +0200129// validateEffectiveOptions checks that the resolved source and target
130// identifiers are not identical, which would cause an infinite mapping loop.
131// For annotation mappings it compares the effective foundry+layer pair;
132// for corpus mappings it compares the effective field names.
133// The effective value is: query-parameter override if non-empty, otherwise
134// the YAML list default.
135func (m *Mapper) validateEffectiveOptions(mappingID string, opts MappingOptions) error {
136 list, exists := m.mappingLists[mappingID]
137 if !exists {
138 return nil // will be caught later
139 }
140
141 if list.IsCorpus() {
142 effFieldA := opts.FieldA
143 if effFieldA == "" {
144 effFieldA = list.FieldA
145 }
146 effFieldB := opts.FieldB
147 if effFieldB == "" {
148 effFieldB = list.FieldB
149 }
150 if effFieldA != "" && effFieldA == effFieldB {
151 return fmt.Errorf("identical source and target field (fieldA == fieldB == %q) in mapping list '%s': this would cause an infinite mapping loop", effFieldA, mappingID)
152 }
153 return nil
154 }
155
156 effFoundryA := opts.FoundryA
157 if effFoundryA == "" {
158 effFoundryA = list.FoundryA
159 }
160 effLayerA := opts.LayerA
161 if effLayerA == "" {
162 effLayerA = list.LayerA
163 }
164 effFoundryB := opts.FoundryB
165 if effFoundryB == "" {
166 effFoundryB = list.FoundryB
167 }
168 effLayerB := opts.LayerB
169 if effLayerB == "" {
170 effLayerB = list.LayerB
171 }
172
173 if effFoundryA != "" && effFoundryA == effFoundryB && effLayerA == effLayerB {
174 return fmt.Errorf("identical source and target (foundryA/layerA == foundryB/layerB == %q/%q) in mapping list '%s': this would cause an infinite mapping loop", effFoundryA, effLayerA, mappingID)
175 }
176
177 return nil
178}
179
Akrone4f570d2026-02-20 08:18:06 +0100180// CascadeQueryMappings applies multiple mapping lists sequentially,
181// feeding the output of each into the next. orderedIDs and
182// perMappingOpts must have the same length. An empty list returns
183// jsonData unchanged.
184func (m *Mapper) CascadeQueryMappings(orderedIDs []string, perMappingOpts []MappingOptions, jsonData any) (any, error) {
185 if len(orderedIDs) != len(perMappingOpts) {
186 return nil, fmt.Errorf("orderedIDs length (%d) must match perMappingOpts length (%d)", len(orderedIDs), len(perMappingOpts))
187 }
188
189 result := jsonData
190 for i, id := range orderedIDs {
191 var err error
192 result, err = m.ApplyQueryMappings(id, perMappingOpts[i], result)
193 if err != nil {
194 return nil, fmt.Errorf("cascade step %d (mapping %q): %w", i, id, err)
195 }
196 }
197 return result, nil
198}
199
200// CascadeResponseMappings applies multiple mapping lists sequentially
201// to a response object, feeding the output of each into the next.
202// orderedIDs and perMappingOpts must have the same length. An empty
203// list returns jsonData unchanged.
204func (m *Mapper) CascadeResponseMappings(orderedIDs []string, perMappingOpts []MappingOptions, jsonData any) (any, error) {
205 if len(orderedIDs) != len(perMappingOpts) {
206 return nil, fmt.Errorf("orderedIDs length (%d) must match perMappingOpts length (%d)", len(orderedIDs), len(perMappingOpts))
207 }
208
209 result := jsonData
210 for i, id := range orderedIDs {
211 var err error
212 result, err = m.ApplyResponseMappings(id, perMappingOpts[i], result)
213 if err != nil {
214 return nil, fmt.Errorf("cascade step %d (mapping %q): %w", i, id, err)
215 }
216 }
217 return result, nil
218}