| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 1 | package mapper |
| 2 | |
| 3 | import ( |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 4 | "fmt" |
| Akron | 496fc0a | 2026-05-20 09:07:07 +0200 | [diff] [blame^] | 5 | "regexp" |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 6 | |
| Akron | 2ef703c | 2025-07-03 15:57:42 +0200 | [diff] [blame] | 7 | "github.com/KorAP/Koral-Mapper/config" |
| 8 | "github.com/KorAP/Koral-Mapper/parser" |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 9 | ) |
| 10 | |
| 11 | // Direction represents the mapping direction (A to B or B to A) |
| Akron | a1a183f | 2025-05-26 17:47:33 +0200 | [diff] [blame] | 12 | type Direction bool |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 13 | |
| 14 | const ( |
| Akron | a1a183f | 2025-05-26 17:47:33 +0200 | [diff] [blame] | 15 | AtoB Direction = true |
| 16 | BtoA Direction = false |
| Akron | 2f93c58 | 2026-02-19 16:49:13 +0100 | [diff] [blame] | 17 | |
| 18 | RewriteEditor = "Koral-Mapper" |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 19 | ) |
| 20 | |
| Akron | a1a183f | 2025-05-26 17:47:33 +0200 | [diff] [blame] | 21 | // String converts the Direction to its string representation |
| 22 | func (d Direction) String() string { |
| 23 | if d { |
| 24 | return "atob" |
| 25 | } |
| 26 | return "btoa" |
| 27 | } |
| 28 | |
| 29 | // ParseDirection converts a string direction to Direction type |
| 30 | func ParseDirection(dir string) (Direction, error) { |
| 31 | switch dir { |
| 32 | case "atob": |
| 33 | return AtoB, nil |
| 34 | case "btoa": |
| 35 | return BtoA, nil |
| 36 | default: |
| 37 | return false, fmt.Errorf("invalid direction: %s", dir) |
| 38 | } |
| 39 | } |
| 40 | |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 41 | // Mapper handles the application of mapping rules to JSON objects |
| 42 | type Mapper struct { |
| Akron | 2f93c58 | 2026-02-19 16:49:13 +0100 | [diff] [blame] | 43 | mappingLists map[string]*config.MappingList |
| 44 | parsedQueryRules map[string][]*parser.MappingResult |
| 45 | parsedCorpusRules map[string][]*parser.CorpusMappingResult |
| Akron | 496fc0a | 2026-05-20 09:07:07 +0200 | [diff] [blame^] | 46 | compiledRegexes map[string]*regexp.Regexp |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 47 | } |
| 48 | |
| Akron | a00d475 | 2025-05-26 17:34:36 +0200 | [diff] [blame] | 49 | // NewMapper creates a new Mapper instance from a list of MappingLists |
| 50 | func NewMapper(lists []config.MappingList) (*Mapper, error) { |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 51 | m := &Mapper{ |
| Akron | 2f93c58 | 2026-02-19 16:49:13 +0100 | [diff] [blame] | 52 | mappingLists: make(map[string]*config.MappingList), |
| 53 | parsedQueryRules: make(map[string][]*parser.MappingResult), |
| 54 | parsedCorpusRules: make(map[string][]*parser.CorpusMappingResult), |
| Akron | 496fc0a | 2026-05-20 09:07:07 +0200 | [diff] [blame^] | 55 | compiledRegexes: make(map[string]*regexp.Regexp), |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 56 | } |
| 57 | |
| Akron | a00d475 | 2025-05-26 17:34:36 +0200 | [diff] [blame] | 58 | for _, list := range lists { |
| 59 | if _, exists := m.mappingLists[list.ID]; exists { |
| 60 | return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID) |
| 61 | } |
| 62 | |
| Akron | a00d475 | 2025-05-26 17:34:36 +0200 | [diff] [blame] | 63 | listCopy := list |
| 64 | m.mappingLists[list.ID] = &listCopy |
| 65 | |
| Akron | 2f93c58 | 2026-02-19 16:49:13 +0100 | [diff] [blame] | 66 | if list.IsCorpus() { |
| 67 | corpusRules, err := list.ParseCorpusMappings() |
| 68 | if err != nil { |
| 69 | return nil, fmt.Errorf("failed to parse corpus mappings for list %s: %w", list.ID, err) |
| 70 | } |
| Akron | 496fc0a | 2026-05-20 09:07:07 +0200 | [diff] [blame^] | 71 | for _, rule := range corpusRules { |
| 72 | if err := m.precompileCorpusRegexes(rule.Upper); err != nil { |
| 73 | return nil, fmt.Errorf("invalid regex in corpus mapping list %s: %w", list.ID, err) |
| 74 | } |
| 75 | if err := m.precompileCorpusRegexes(rule.Lower); err != nil { |
| 76 | return nil, fmt.Errorf("invalid regex in corpus mapping list %s: %w", list.ID, err) |
| 77 | } |
| 78 | } |
| Akron | 2f93c58 | 2026-02-19 16:49:13 +0100 | [diff] [blame] | 79 | m.parsedCorpusRules[list.ID] = corpusRules |
| 80 | } else { |
| 81 | queryRules, err := list.ParseMappings() |
| 82 | if err != nil { |
| 83 | return nil, fmt.Errorf("failed to parse mappings for list %s: %w", list.ID, err) |
| 84 | } |
| 85 | m.parsedQueryRules[list.ID] = queryRules |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 86 | } |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 87 | } |
| 88 | |
| 89 | return m, nil |
| 90 | } |
| 91 | |
| Akron | 496fc0a | 2026-05-20 09:07:07 +0200 | [diff] [blame^] | 92 | // precompileCorpusRegexes walks a CorpusNode tree and pre-compiles any |
| 93 | // regex-typed field patterns into the compiledRegexes cache. |
| 94 | func (m *Mapper) precompileCorpusRegexes(node parser.CorpusNode) error { |
| 95 | switch n := node.(type) { |
| 96 | case *parser.CorpusField: |
| 97 | if n.Type == "regex" { |
| 98 | pattern := "^" + n.Value + "$" |
| 99 | if _, exists := m.compiledRegexes[pattern]; !exists { |
| 100 | re, err := regexp.Compile(pattern) |
| 101 | if err != nil { |
| 102 | return fmt.Errorf("failed to compile regex %q: %w", n.Value, err) |
| 103 | } |
| 104 | m.compiledRegexes[pattern] = re |
| 105 | } |
| 106 | } |
| 107 | case *parser.CorpusGroup: |
| 108 | for _, op := range n.Operands { |
| 109 | if err := m.precompileCorpusRegexes(op); err != nil { |
| 110 | return err |
| 111 | } |
| 112 | } |
| 113 | } |
| 114 | return nil |
| 115 | } |
| 116 | |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 117 | // MappingOptions contains the options for applying mappings |
| 118 | type MappingOptions struct { |
| Akron | 0d9117c | 2025-05-27 15:20:21 +0200 | [diff] [blame] | 119 | FoundryA string |
| 120 | LayerA string |
| 121 | FoundryB string |
| 122 | LayerB string |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame] | 123 | FieldA string |
| 124 | FieldB string |
| Akron | 0d9117c | 2025-05-27 15:20:21 +0200 | [diff] [blame] | 125 | Direction Direction |
| 126 | AddRewrites bool |
| Akron | 32d53de | 2025-05-22 13:45:32 +0200 | [diff] [blame] | 127 | } |
| Akron | e4f570d | 2026-02-20 08:18:06 +0100 | [diff] [blame] | 128 | |
| Akron | 422cd25 | 2026-05-19 16:31:19 +0200 | [diff] [blame] | 129 | // validateEffectiveOptions checks that the resolved source and target |
| 130 | // identifiers are not identical, which would cause an infinite mapping loop. |
| 131 | // For annotation mappings it compares the effective foundry+layer pair; |
| 132 | // for corpus mappings it compares the effective field names. |
| 133 | // The effective value is: query-parameter override if non-empty, otherwise |
| 134 | // the YAML list default. |
| 135 | func (m *Mapper) validateEffectiveOptions(mappingID string, opts MappingOptions) error { |
| 136 | list, exists := m.mappingLists[mappingID] |
| 137 | if !exists { |
| 138 | return nil // will be caught later |
| 139 | } |
| 140 | |
| 141 | if list.IsCorpus() { |
| 142 | effFieldA := opts.FieldA |
| 143 | if effFieldA == "" { |
| 144 | effFieldA = list.FieldA |
| 145 | } |
| 146 | effFieldB := opts.FieldB |
| 147 | if effFieldB == "" { |
| 148 | effFieldB = list.FieldB |
| 149 | } |
| 150 | if effFieldA != "" && effFieldA == effFieldB { |
| 151 | return fmt.Errorf("identical source and target field (fieldA == fieldB == %q) in mapping list '%s': this would cause an infinite mapping loop", effFieldA, mappingID) |
| 152 | } |
| 153 | return nil |
| 154 | } |
| 155 | |
| 156 | effFoundryA := opts.FoundryA |
| 157 | if effFoundryA == "" { |
| 158 | effFoundryA = list.FoundryA |
| 159 | } |
| 160 | effLayerA := opts.LayerA |
| 161 | if effLayerA == "" { |
| 162 | effLayerA = list.LayerA |
| 163 | } |
| 164 | effFoundryB := opts.FoundryB |
| 165 | if effFoundryB == "" { |
| 166 | effFoundryB = list.FoundryB |
| 167 | } |
| 168 | effLayerB := opts.LayerB |
| 169 | if effLayerB == "" { |
| 170 | effLayerB = list.LayerB |
| 171 | } |
| 172 | |
| 173 | if effFoundryA != "" && effFoundryA == effFoundryB && effLayerA == effLayerB { |
| 174 | return fmt.Errorf("identical source and target (foundryA/layerA == foundryB/layerB == %q/%q) in mapping list '%s': this would cause an infinite mapping loop", effFoundryA, effLayerA, mappingID) |
| 175 | } |
| 176 | |
| 177 | return nil |
| 178 | } |
| 179 | |
| Akron | e4f570d | 2026-02-20 08:18:06 +0100 | [diff] [blame] | 180 | // CascadeQueryMappings applies multiple mapping lists sequentially, |
| 181 | // feeding the output of each into the next. orderedIDs and |
| 182 | // perMappingOpts must have the same length. An empty list returns |
| 183 | // jsonData unchanged. |
| 184 | func (m *Mapper) CascadeQueryMappings(orderedIDs []string, perMappingOpts []MappingOptions, jsonData any) (any, error) { |
| 185 | if len(orderedIDs) != len(perMappingOpts) { |
| 186 | return nil, fmt.Errorf("orderedIDs length (%d) must match perMappingOpts length (%d)", len(orderedIDs), len(perMappingOpts)) |
| 187 | } |
| 188 | |
| 189 | result := jsonData |
| 190 | for i, id := range orderedIDs { |
| 191 | var err error |
| 192 | result, err = m.ApplyQueryMappings(id, perMappingOpts[i], result) |
| 193 | if err != nil { |
| 194 | return nil, fmt.Errorf("cascade step %d (mapping %q): %w", i, id, err) |
| 195 | } |
| 196 | } |
| 197 | return result, nil |
| 198 | } |
| 199 | |
| 200 | // CascadeResponseMappings applies multiple mapping lists sequentially |
| 201 | // to a response object, feeding the output of each into the next. |
| 202 | // orderedIDs and perMappingOpts must have the same length. An empty |
| 203 | // list returns jsonData unchanged. |
| 204 | func (m *Mapper) CascadeResponseMappings(orderedIDs []string, perMappingOpts []MappingOptions, jsonData any) (any, error) { |
| 205 | if len(orderedIDs) != len(perMappingOpts) { |
| 206 | return nil, fmt.Errorf("orderedIDs length (%d) must match perMappingOpts length (%d)", len(orderedIDs), len(perMappingOpts)) |
| 207 | } |
| 208 | |
| 209 | result := jsonData |
| 210 | for i, id := range orderedIDs { |
| 211 | var err error |
| 212 | result, err = m.ApplyResponseMappings(id, perMappingOpts[i], result) |
| 213 | if err != nil { |
| 214 | return nil, fmt.Errorf("cascade step %d (mapping %q): %w", i, id, err) |
| 215 | } |
| 216 | } |
| 217 | return result, nil |
| 218 | } |