blob: 62022e5904adc6300bf44f1f4e118c04f8f1f42e [file] [log] [blame]
Akron8414ae52026-05-19 13:31:14 +02001package mapper
Akron4de47a92025-06-27 11:58:11 +02002
3import (
4 "encoding/json"
5 "fmt"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/matcher"
9 "github.com/KorAP/Koral-Mapper/parser"
Akron4de47a92025-06-27 11:58:11 +020010)
11
Akron8414ae52026-05-19 13:31:14 +020012// ApplyQueryMappings transforms a JSON query object using the mapping rules
13// identified by mappingID. The input may be a bare query node or a wrapper
14// object containing a "query" field; both forms are accepted.
Akron4de47a92025-06-27 11:58:11 +020015func (m *Mapper) ApplyQueryMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
Akron4de47a92025-06-27 11:58:11 +020016 if _, exists := m.mappingLists[mappingID]; !exists {
17 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
18 }
19
Akron422cd252026-05-19 16:31:19 +020020 if err := m.validateEffectiveOptions(mappingID, opts); err != nil {
21 return nil, err
22 }
23
Akron2f93c582026-02-19 16:49:13 +010024 if m.mappingLists[mappingID].IsCorpus() {
25 return m.applyCorpusQueryMappings(mappingID, opts, jsonData)
26 }
27
Akron2f93c582026-02-19 16:49:13 +010028 rules := m.parsedQueryRules[mappingID]
Akron4de47a92025-06-27 11:58:11 +020029
Akron8414ae52026-05-19 13:31:14 +020030 // Detect wrapper: input may be {"query": ...} or a bare koral:token
Akron4de47a92025-06-27 11:58:11 +020031 var queryData any
32 var hasQueryWrapper bool
33
34 if jsonMap, ok := jsonData.(map[string]any); ok {
35 if query, exists := jsonMap["query"]; exists {
36 queryData = query
37 hasQueryWrapper = true
38 }
39 }
40
Akron4de47a92025-06-27 11:58:11 +020041 if !hasQueryWrapper {
Akron4de47a92025-06-27 11:58:11 +020042 if !isValidQueryObject(jsonData) {
43 return jsonData, nil
44 }
45 queryData = jsonData
46 } else if queryData == nil || !isValidQueryObject(queryData) {
Akron4de47a92025-06-27 11:58:11 +020047 return jsonData, nil
48 }
49
Akron8414ae52026-05-19 13:31:14 +020050 // Strip pre-existing rewrites before AST conversion so they do not
51 // interfere with matching. They are restored after transformation.
Akron4de47a92025-06-27 11:58:11 +020052 var oldRewrites any
53 if queryMap, ok := queryData.(map[string]any); ok {
54 if rewrites, exists := queryMap["rewrites"]; exists {
55 oldRewrites = rewrites
56 delete(queryMap, "rewrites")
57 }
58 }
59
Akron4de47a92025-06-27 11:58:11 +020060 jsonBytes, err := json.Marshal(queryData)
61 if err != nil {
62 return nil, fmt.Errorf("failed to marshal input JSON: %w", err)
63 }
64
65 node, err := parser.ParseJSON(jsonBytes)
66 if err != nil {
67 return nil, fmt.Errorf("failed to parse JSON into AST: %w", err)
68 }
69
Akron8414ae52026-05-19 13:31:14 +020070 // Unwrap Token so matching operates on the inner node; re-wrapped later.
Akron4de47a92025-06-27 11:58:11 +020071 isToken := false
72 var tokenWrap ast.Node
73 if token, ok := node.(*ast.Token); ok {
74 isToken = true
75 tokenWrap = token.Wrap
76 node = tokenWrap
77 }
78
Akron8414ae52026-05-19 13:31:14 +020079 // Resolve foundry/layer overrides per direction once, before the rule loop.
Akron4de47a92025-06-27 11:58:11 +020080 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron8414ae52026-05-19 13:31:14 +020081 if opts.Direction {
Akron4de47a92025-06-27 11:58:11 +020082 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
83 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
84 } else {
85 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
86 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
87 }
88
Akron8414ae52026-05-19 13:31:14 +020089 // patternCache avoids redundant Clone+Override for the same rule index
90 // and foundry/layer combination across repeated calls.
Akron4de47a92025-06-27 11:58:11 +020091 type patternCacheKey struct {
92 ruleIndex int
93 foundry string
94 layer string
95 isReplacement bool
96 }
97 patternCache := make(map[patternCacheKey]ast.Node)
98
Akron4de47a92025-06-27 11:58:11 +020099 for i, rule := range rules {
Akron4de47a92025-06-27 11:58:11 +0200100 var pattern, replacement ast.Node
Akron8414ae52026-05-19 13:31:14 +0200101 if opts.Direction {
Akron4de47a92025-06-27 11:58:11 +0200102 pattern = rule.Upper
103 replacement = rule.Lower
104 } else {
105 pattern = rule.Lower
106 replacement = rule.Upper
107 }
108
Akron4de47a92025-06-27 11:58:11 +0200109 if token, ok := pattern.(*ast.Token); ok {
110 pattern = token.Wrap
111 }
112 if token, ok := replacement.(*ast.Token); ok {
113 replacement = token.Wrap
114 }
115
Akron4de47a92025-06-27 11:58:11 +0200116 patternKey := patternCacheKey{ruleIndex: i, foundry: patternFoundry, layer: patternLayer, isReplacement: false}
117 processedPattern, exists := patternCache[patternKey]
118 if !exists {
Akron4de47a92025-06-27 11:58:11 +0200119 processedPattern = pattern.Clone()
Akron4de47a92025-06-27 11:58:11 +0200120 if patternFoundry != "" || patternLayer != "" {
121 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
122 }
123 patternCache[patternKey] = processedPattern
124 }
125
Akron8414ae52026-05-19 13:31:14 +0200126 // Probe for a match before cloning the replacement (lazy evaluation)
Akron4de47a92025-06-27 11:58:11 +0200127 tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
128 if err != nil {
129 return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
130 }
Akron4de47a92025-06-27 11:58:11 +0200131 if !tempMatcher.Match(node) {
132 continue
133 }
134
Akron4de47a92025-06-27 11:58:11 +0200135 replacementKey := patternCacheKey{ruleIndex: i, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
136 processedReplacement, exists := patternCache[replacementKey]
137 if !exists {
Akron4de47a92025-06-27 11:58:11 +0200138 processedReplacement = replacement.Clone()
Akron4de47a92025-06-27 11:58:11 +0200139 if replacementFoundry != "" || replacementLayer != "" {
140 ast.ApplyFoundryAndLayerOverrides(processedReplacement, replacementFoundry, replacementLayer)
141 }
142 patternCache[replacementKey] = processedReplacement
143 }
144
Akron8414ae52026-05-19 13:31:14 +0200145 var beforeNode ast.Node
146 if opts.AddRewrites {
147 beforeNode = node.Clone()
148 }
149
Akron330c8212026-05-19 14:12:39 +0200150 // Collect pre-existing rewrites before replacement so they
151 // survive when the matcher creates a fresh replacement node.
152 existingRewrites := collectRewrites(node)
153
Akron4de47a92025-06-27 11:58:11 +0200154 actualMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: processedReplacement})
155 if err != nil {
156 return nil, fmt.Errorf("failed to create matcher: %w", err)
157 }
158 node = actualMatcher.Replace(node)
Akron8414ae52026-05-19 13:31:14 +0200159
Akron330c8212026-05-19 14:12:39 +0200160 // Carry forward pre-existing rewrites from earlier cascade steps.
161 if len(existingRewrites) > 0 {
162 prependRewrites(node, existingRewrites)
163 }
164
Akron8414ae52026-05-19 13:31:14 +0200165 if opts.AddRewrites {
166 recordRewrites(node, beforeNode)
167 }
Akron4de47a92025-06-27 11:58:11 +0200168 }
169
Akron4de47a92025-06-27 11:58:11 +0200170 var result ast.Node
171 if isToken {
172 result = &ast.Token{Wrap: node}
173 } else {
174 result = node
175 }
176
Akron4de47a92025-06-27 11:58:11 +0200177 resultBytes, err := parser.SerializeToJSON(result)
178 if err != nil {
179 return nil, fmt.Errorf("failed to serialize AST to JSON: %w", err)
180 }
181
Akron4de47a92025-06-27 11:58:11 +0200182 var resultData any
183 if err := json.Unmarshal(resultBytes, &resultData); err != nil {
184 return nil, fmt.Errorf("failed to parse result JSON: %w", err)
185 }
186
Akron8414ae52026-05-19 13:31:14 +0200187 // Restore pre-existing rewrites. The round-trip through ast.Rewrite
188 // normalizes legacy field names (e.g. "source" -> "editor") so the
189 // output always uses the modern schema.
Akron4de47a92025-06-27 11:58:11 +0200190 if oldRewrites != nil {
Akron4de47a92025-06-27 11:58:11 +0200191 if rewritesList, ok := oldRewrites.([]any); ok {
192 processedRewrites := make([]any, len(rewritesList))
193 for i, rewriteData := range rewritesList {
Akron4de47a92025-06-27 11:58:11 +0200194 rewriteBytes, err := json.Marshal(rewriteData)
195 if err != nil {
196 return nil, fmt.Errorf("failed to marshal old rewrite %d: %w", i, err)
197 }
198 var rewrite ast.Rewrite
199 if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
200 return nil, fmt.Errorf("failed to unmarshal old rewrite %d: %w", i, err)
201 }
Akron4de47a92025-06-27 11:58:11 +0200202 transformedBytes, err := json.Marshal(&rewrite)
203 if err != nil {
204 return nil, fmt.Errorf("failed to marshal transformed rewrite %d: %w", i, err)
205 }
206 var transformedRewrite any
207 if err := json.Unmarshal(transformedBytes, &transformedRewrite); err != nil {
208 return nil, fmt.Errorf("failed to unmarshal transformed rewrite %d: %w", i, err)
209 }
210 processedRewrites[i] = transformedRewrite
211 }
212 if resultMap, ok := resultData.(map[string]any); ok {
213 resultMap["rewrites"] = processedRewrites
214 }
215 } else {
Akron4de47a92025-06-27 11:58:11 +0200216 if resultMap, ok := resultData.(map[string]any); ok {
217 resultMap["rewrites"] = oldRewrites
218 }
219 }
220 }
221
Akron4de47a92025-06-27 11:58:11 +0200222 if hasQueryWrapper {
223 if wrapper, ok := jsonData.(map[string]any); ok {
224 wrapper["query"] = resultData
225 return wrapper, nil
226 }
227 }
228
229 return resultData, nil
230}
231
Akron8414ae52026-05-19 13:31:14 +0200232// recordRewrites compares the new node against the before-snapshot and
233// attaches rewrite entries to any changed nodes. It handles both simple
234// nodes (Term, TermGroup) and container nodes (CatchallNode with operands).
235func recordRewrites(newNode, beforeNode ast.Node) {
236 if ast.NodesEqual(newNode, beforeNode) {
237 return
238 }
239
240 // For CatchallNodes with operands (e.g. token sequences), attach
241 // per-operand rewrites so each changed token gets its own annotation.
242 if newCatchall, ok := newNode.(*ast.CatchallNode); ok {
243 if oldCatchall, ok := beforeNode.(*ast.CatchallNode); ok && len(newCatchall.Operands) > 0 {
244 for i, newOp := range newCatchall.Operands {
245 if i >= len(oldCatchall.Operands) {
246 break
247 }
248 oldOp := oldCatchall.Operands[i]
249 recordRewritesForOperand(newOp, oldOp)
250 }
251 return
252 }
253 }
254
255 addRewriteToNode(newNode, beforeNode)
256}
257
258// recordRewritesForOperand handles rewrite recording for a single operand,
259// unwrapping Token nodes so the rewrite attaches to the inner term/termGroup
260// rather than the token wrapper.
261func recordRewritesForOperand(newOp, oldOp ast.Node) {
262 if ast.NodesEqual(newOp, oldOp) {
263 return
264 }
265
266 newInner := newOp
267 oldInner := oldOp
268 if tok, ok := newOp.(*ast.Token); ok {
269 newInner = tok.Wrap
270 }
271 if tok, ok := oldOp.(*ast.Token); ok {
272 oldInner = tok.Wrap
273 }
274
275 if newInner == nil || ast.NodesEqual(newInner, oldInner) {
276 return
277 }
278
279 addRewriteToNode(newInner, oldInner)
280}
281
Akron958fc472026-05-19 13:58:52 +0200282// addRewriteToNode creates and attaches rewrite entries to a node,
Akron8414ae52026-05-19 13:31:14 +0200283// recording what the node looked like before the change.
284func addRewriteToNode(newNode, originalNode ast.Node) {
Akron958fc472026-05-19 13:58:52 +0200285 for _, rw := range buildRewrites(originalNode, newNode) {
286 ast.AppendRewrite(newNode, rw)
287 }
Akron8414ae52026-05-19 13:31:14 +0200288}
289
Akron958fc472026-05-19 13:58:52 +0200290// buildRewrites creates Rewrite entries describing what changed between
291// originalNode and newNode. For term-level changes it emits one scoped
292// rewrite per changed field so the transformation is fully reversible.
293// For structural changes it stores the full original as an object.
294func buildRewrites(originalNode, newNode ast.Node) []ast.Rewrite {
Akron2f93c582026-02-19 16:49:13 +0100295 if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(newNode) && originalNode.Type() == newNode.Type() {
296 newTerm := newNode.(*ast.Term)
Akron958fc472026-05-19 13:58:52 +0200297 var rewrites []ast.Rewrite
298
Akron2f93c582026-02-19 16:49:13 +0100299 if term.Foundry != newTerm.Foundry {
Akron958fc472026-05-19 13:58:52 +0200300 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "foundry"}
301 if term.Foundry != "" {
302 rw.Original = term.Foundry
303 }
304 rewrites = append(rewrites, rw)
Akron2f93c582026-02-19 16:49:13 +0100305 }
306 if term.Layer != newTerm.Layer {
Akron958fc472026-05-19 13:58:52 +0200307 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "layer"}
308 if term.Layer != "" {
309 rw.Original = term.Layer
310 }
311 rewrites = append(rewrites, rw)
Akron2f93c582026-02-19 16:49:13 +0100312 }
313 if term.Key != newTerm.Key {
Akron958fc472026-05-19 13:58:52 +0200314 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "key"}
315 if term.Key != "" {
316 rw.Original = term.Key
317 }
318 rewrites = append(rewrites, rw)
Akron2f93c582026-02-19 16:49:13 +0100319 }
320 if term.Value != newTerm.Value {
Akron958fc472026-05-19 13:58:52 +0200321 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "value"}
322 if term.Value != "" {
323 rw.Original = term.Value
324 }
325 rewrites = append(rewrites, rw)
326 }
327
328 if len(rewrites) > 0 {
329 return rewrites
Akron2f93c582026-02-19 16:49:13 +0100330 }
331 }
332
Akron8414ae52026-05-19 13:31:14 +0200333 // Structural change: serialize the original as the rewrite value
Akron2f93c582026-02-19 16:49:13 +0100334 originalBytes, err := parser.SerializeToJSON(originalNode)
335 if err != nil {
Akron958fc472026-05-19 13:58:52 +0200336 return []ast.Rewrite{{Editor: RewriteEditor}}
Akron2f93c582026-02-19 16:49:13 +0100337 }
338 var originalJSON any
339 if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
Akron958fc472026-05-19 13:58:52 +0200340 return []ast.Rewrite{{Editor: RewriteEditor}}
Akron2f93c582026-02-19 16:49:13 +0100341 }
Akron958fc472026-05-19 13:58:52 +0200342 return []ast.Rewrite{{Editor: RewriteEditor, Original: originalJSON}}
Akron2f93c582026-02-19 16:49:13 +0100343}
344
Akron330c8212026-05-19 14:12:39 +0200345// collectRewrites returns the rewrites from the deepest rewritable node.
346// For a Token wrapping a Term, it returns the Term's rewrites.
347// This captures rewrites added by previous cascade steps.
348func collectRewrites(node ast.Node) []ast.Rewrite {
349 if node == nil {
350 return nil
351 }
352 // Unwrap Token to reach the inner node that carries rewrites
353 if tok, ok := node.(*ast.Token); ok && tok.Wrap != nil {
354 return collectRewrites(tok.Wrap)
355 }
356 if r, ok := node.(ast.Rewriteable); ok {
357 return r.GetRewrites()
358 }
359 return nil
360}
361
362// prependRewrites inserts existing rewrites at the front of the node's
363// rewrite list so they appear before any rewrites added by the current step.
364func prependRewrites(node ast.Node, rewrites []ast.Rewrite) {
365 if node == nil || len(rewrites) == 0 {
366 return
367 }
368 // Unwrap Token to reach the inner rewritable node
369 if tok, ok := node.(*ast.Token); ok && tok.Wrap != nil {
370 prependRewrites(tok.Wrap, rewrites)
371 return
372 }
373 if r, ok := node.(ast.Rewriteable); ok {
374 current := r.GetRewrites()
375 // Prepend old rewrites before any newly added ones
376 combined := make([]ast.Rewrite, 0, len(rewrites)+len(current))
377 combined = append(combined, rewrites...)
378 combined = append(combined, current...)
379 r.SetRewrites(combined)
380 }
381}
382
Akron8414ae52026-05-19 13:31:14 +0200383// isValidQueryObject returns true if data is a JSON object with an @type field.
Akron4de47a92025-06-27 11:58:11 +0200384func isValidQueryObject(data any) bool {
Akron4de47a92025-06-27 11:58:11 +0200385 queryMap, ok := data.(map[string]any)
386 if !ok {
387 return false
388 }
Akron8414ae52026-05-19 13:31:14 +0200389 _, ok = queryMap["@type"]
390 return ok
Akron4de47a92025-06-27 11:58:11 +0200391}