blob: 72d1398836f4a79d9f16ed4a0187156deaa3099f [file] [log] [blame]
Akron8414ae52026-05-19 13:31:14 +02001package mapper
Akron4de47a92025-06-27 11:58:11 +02002
3import (
4 "encoding/json"
5 "fmt"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/matcher"
9 "github.com/KorAP/Koral-Mapper/parser"
Akron4de47a92025-06-27 11:58:11 +020010)
11
Akron8414ae52026-05-19 13:31:14 +020012// ApplyQueryMappings transforms a JSON query object using the mapping rules
13// identified by mappingID. The input may be a bare query node or a wrapper
14// object containing a "query" field; both forms are accepted.
Akron4de47a92025-06-27 11:58:11 +020015func (m *Mapper) ApplyQueryMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
Akron4de47a92025-06-27 11:58:11 +020016 if _, exists := m.mappingLists[mappingID]; !exists {
17 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
18 }
19
Akron2f93c582026-02-19 16:49:13 +010020 if m.mappingLists[mappingID].IsCorpus() {
21 return m.applyCorpusQueryMappings(mappingID, opts, jsonData)
22 }
23
Akron2f93c582026-02-19 16:49:13 +010024 rules := m.parsedQueryRules[mappingID]
Akron4de47a92025-06-27 11:58:11 +020025
Akron8414ae52026-05-19 13:31:14 +020026 // Detect wrapper: input may be {"query": ...} or a bare koral:token
Akron4de47a92025-06-27 11:58:11 +020027 var queryData any
28 var hasQueryWrapper bool
29
30 if jsonMap, ok := jsonData.(map[string]any); ok {
31 if query, exists := jsonMap["query"]; exists {
32 queryData = query
33 hasQueryWrapper = true
34 }
35 }
36
Akron4de47a92025-06-27 11:58:11 +020037 if !hasQueryWrapper {
Akron4de47a92025-06-27 11:58:11 +020038 if !isValidQueryObject(jsonData) {
39 return jsonData, nil
40 }
41 queryData = jsonData
42 } else if queryData == nil || !isValidQueryObject(queryData) {
Akron4de47a92025-06-27 11:58:11 +020043 return jsonData, nil
44 }
45
Akron8414ae52026-05-19 13:31:14 +020046 // Strip pre-existing rewrites before AST conversion so they do not
47 // interfere with matching. They are restored after transformation.
Akron4de47a92025-06-27 11:58:11 +020048 var oldRewrites any
49 if queryMap, ok := queryData.(map[string]any); ok {
50 if rewrites, exists := queryMap["rewrites"]; exists {
51 oldRewrites = rewrites
52 delete(queryMap, "rewrites")
53 }
54 }
55
Akron4de47a92025-06-27 11:58:11 +020056 jsonBytes, err := json.Marshal(queryData)
57 if err != nil {
58 return nil, fmt.Errorf("failed to marshal input JSON: %w", err)
59 }
60
61 node, err := parser.ParseJSON(jsonBytes)
62 if err != nil {
63 return nil, fmt.Errorf("failed to parse JSON into AST: %w", err)
64 }
65
Akron8414ae52026-05-19 13:31:14 +020066 // Unwrap Token so matching operates on the inner node; re-wrapped later.
Akron4de47a92025-06-27 11:58:11 +020067 isToken := false
68 var tokenWrap ast.Node
69 if token, ok := node.(*ast.Token); ok {
70 isToken = true
71 tokenWrap = token.Wrap
72 node = tokenWrap
73 }
74
Akron8414ae52026-05-19 13:31:14 +020075 // Resolve foundry/layer overrides per direction once, before the rule loop.
Akron4de47a92025-06-27 11:58:11 +020076 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron8414ae52026-05-19 13:31:14 +020077 if opts.Direction {
Akron4de47a92025-06-27 11:58:11 +020078 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
79 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
80 } else {
81 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
82 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
83 }
84
Akron8414ae52026-05-19 13:31:14 +020085 // patternCache avoids redundant Clone+Override for the same rule index
86 // and foundry/layer combination across repeated calls.
Akron4de47a92025-06-27 11:58:11 +020087 type patternCacheKey struct {
88 ruleIndex int
89 foundry string
90 layer string
91 isReplacement bool
92 }
93 patternCache := make(map[patternCacheKey]ast.Node)
94
Akron4de47a92025-06-27 11:58:11 +020095 for i, rule := range rules {
Akron4de47a92025-06-27 11:58:11 +020096 var pattern, replacement ast.Node
Akron8414ae52026-05-19 13:31:14 +020097 if opts.Direction {
Akron4de47a92025-06-27 11:58:11 +020098 pattern = rule.Upper
99 replacement = rule.Lower
100 } else {
101 pattern = rule.Lower
102 replacement = rule.Upper
103 }
104
Akron4de47a92025-06-27 11:58:11 +0200105 if token, ok := pattern.(*ast.Token); ok {
106 pattern = token.Wrap
107 }
108 if token, ok := replacement.(*ast.Token); ok {
109 replacement = token.Wrap
110 }
111
Akron4de47a92025-06-27 11:58:11 +0200112 patternKey := patternCacheKey{ruleIndex: i, foundry: patternFoundry, layer: patternLayer, isReplacement: false}
113 processedPattern, exists := patternCache[patternKey]
114 if !exists {
Akron4de47a92025-06-27 11:58:11 +0200115 processedPattern = pattern.Clone()
Akron4de47a92025-06-27 11:58:11 +0200116 if patternFoundry != "" || patternLayer != "" {
117 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
118 }
119 patternCache[patternKey] = processedPattern
120 }
121
Akron8414ae52026-05-19 13:31:14 +0200122 // Probe for a match before cloning the replacement (lazy evaluation)
Akron4de47a92025-06-27 11:58:11 +0200123 tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
124 if err != nil {
125 return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
126 }
Akron4de47a92025-06-27 11:58:11 +0200127 if !tempMatcher.Match(node) {
128 continue
129 }
130
Akron4de47a92025-06-27 11:58:11 +0200131 replacementKey := patternCacheKey{ruleIndex: i, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
132 processedReplacement, exists := patternCache[replacementKey]
133 if !exists {
Akron4de47a92025-06-27 11:58:11 +0200134 processedReplacement = replacement.Clone()
Akron4de47a92025-06-27 11:58:11 +0200135 if replacementFoundry != "" || replacementLayer != "" {
136 ast.ApplyFoundryAndLayerOverrides(processedReplacement, replacementFoundry, replacementLayer)
137 }
138 patternCache[replacementKey] = processedReplacement
139 }
140
Akron8414ae52026-05-19 13:31:14 +0200141 var beforeNode ast.Node
142 if opts.AddRewrites {
143 beforeNode = node.Clone()
144 }
145
Akron330c8212026-05-19 14:12:39 +0200146 // Collect pre-existing rewrites before replacement so they
147 // survive when the matcher creates a fresh replacement node.
148 existingRewrites := collectRewrites(node)
149
Akron4de47a92025-06-27 11:58:11 +0200150 actualMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: processedReplacement})
151 if err != nil {
152 return nil, fmt.Errorf("failed to create matcher: %w", err)
153 }
154 node = actualMatcher.Replace(node)
Akron8414ae52026-05-19 13:31:14 +0200155
Akron330c8212026-05-19 14:12:39 +0200156 // Carry forward pre-existing rewrites from earlier cascade steps.
157 if len(existingRewrites) > 0 {
158 prependRewrites(node, existingRewrites)
159 }
160
Akron8414ae52026-05-19 13:31:14 +0200161 if opts.AddRewrites {
162 recordRewrites(node, beforeNode)
163 }
Akron4de47a92025-06-27 11:58:11 +0200164 }
165
Akron4de47a92025-06-27 11:58:11 +0200166 var result ast.Node
167 if isToken {
168 result = &ast.Token{Wrap: node}
169 } else {
170 result = node
171 }
172
Akron4de47a92025-06-27 11:58:11 +0200173 resultBytes, err := parser.SerializeToJSON(result)
174 if err != nil {
175 return nil, fmt.Errorf("failed to serialize AST to JSON: %w", err)
176 }
177
Akron4de47a92025-06-27 11:58:11 +0200178 var resultData any
179 if err := json.Unmarshal(resultBytes, &resultData); err != nil {
180 return nil, fmt.Errorf("failed to parse result JSON: %w", err)
181 }
182
Akron8414ae52026-05-19 13:31:14 +0200183 // Restore pre-existing rewrites. The round-trip through ast.Rewrite
184 // normalizes legacy field names (e.g. "source" -> "editor") so the
185 // output always uses the modern schema.
Akron4de47a92025-06-27 11:58:11 +0200186 if oldRewrites != nil {
Akron4de47a92025-06-27 11:58:11 +0200187 if rewritesList, ok := oldRewrites.([]any); ok {
188 processedRewrites := make([]any, len(rewritesList))
189 for i, rewriteData := range rewritesList {
Akron4de47a92025-06-27 11:58:11 +0200190 rewriteBytes, err := json.Marshal(rewriteData)
191 if err != nil {
192 return nil, fmt.Errorf("failed to marshal old rewrite %d: %w", i, err)
193 }
194 var rewrite ast.Rewrite
195 if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
196 return nil, fmt.Errorf("failed to unmarshal old rewrite %d: %w", i, err)
197 }
Akron4de47a92025-06-27 11:58:11 +0200198 transformedBytes, err := json.Marshal(&rewrite)
199 if err != nil {
200 return nil, fmt.Errorf("failed to marshal transformed rewrite %d: %w", i, err)
201 }
202 var transformedRewrite any
203 if err := json.Unmarshal(transformedBytes, &transformedRewrite); err != nil {
204 return nil, fmt.Errorf("failed to unmarshal transformed rewrite %d: %w", i, err)
205 }
206 processedRewrites[i] = transformedRewrite
207 }
208 if resultMap, ok := resultData.(map[string]any); ok {
209 resultMap["rewrites"] = processedRewrites
210 }
211 } else {
Akron4de47a92025-06-27 11:58:11 +0200212 if resultMap, ok := resultData.(map[string]any); ok {
213 resultMap["rewrites"] = oldRewrites
214 }
215 }
216 }
217
Akron4de47a92025-06-27 11:58:11 +0200218 if hasQueryWrapper {
219 if wrapper, ok := jsonData.(map[string]any); ok {
220 wrapper["query"] = resultData
221 return wrapper, nil
222 }
223 }
224
225 return resultData, nil
226}
227
Akron8414ae52026-05-19 13:31:14 +0200228// recordRewrites compares the new node against the before-snapshot and
229// attaches rewrite entries to any changed nodes. It handles both simple
230// nodes (Term, TermGroup) and container nodes (CatchallNode with operands).
231func recordRewrites(newNode, beforeNode ast.Node) {
232 if ast.NodesEqual(newNode, beforeNode) {
233 return
234 }
235
236 // For CatchallNodes with operands (e.g. token sequences), attach
237 // per-operand rewrites so each changed token gets its own annotation.
238 if newCatchall, ok := newNode.(*ast.CatchallNode); ok {
239 if oldCatchall, ok := beforeNode.(*ast.CatchallNode); ok && len(newCatchall.Operands) > 0 {
240 for i, newOp := range newCatchall.Operands {
241 if i >= len(oldCatchall.Operands) {
242 break
243 }
244 oldOp := oldCatchall.Operands[i]
245 recordRewritesForOperand(newOp, oldOp)
246 }
247 return
248 }
249 }
250
251 addRewriteToNode(newNode, beforeNode)
252}
253
254// recordRewritesForOperand handles rewrite recording for a single operand,
255// unwrapping Token nodes so the rewrite attaches to the inner term/termGroup
256// rather than the token wrapper.
257func recordRewritesForOperand(newOp, oldOp ast.Node) {
258 if ast.NodesEqual(newOp, oldOp) {
259 return
260 }
261
262 newInner := newOp
263 oldInner := oldOp
264 if tok, ok := newOp.(*ast.Token); ok {
265 newInner = tok.Wrap
266 }
267 if tok, ok := oldOp.(*ast.Token); ok {
268 oldInner = tok.Wrap
269 }
270
271 if newInner == nil || ast.NodesEqual(newInner, oldInner) {
272 return
273 }
274
275 addRewriteToNode(newInner, oldInner)
276}
277
Akron958fc472026-05-19 13:58:52 +0200278// addRewriteToNode creates and attaches rewrite entries to a node,
Akron8414ae52026-05-19 13:31:14 +0200279// recording what the node looked like before the change.
280func addRewriteToNode(newNode, originalNode ast.Node) {
Akron958fc472026-05-19 13:58:52 +0200281 for _, rw := range buildRewrites(originalNode, newNode) {
282 ast.AppendRewrite(newNode, rw)
283 }
Akron8414ae52026-05-19 13:31:14 +0200284}
285
Akron958fc472026-05-19 13:58:52 +0200286// buildRewrites creates Rewrite entries describing what changed between
287// originalNode and newNode. For term-level changes it emits one scoped
288// rewrite per changed field so the transformation is fully reversible.
289// For structural changes it stores the full original as an object.
290func buildRewrites(originalNode, newNode ast.Node) []ast.Rewrite {
Akron2f93c582026-02-19 16:49:13 +0100291 if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(newNode) && originalNode.Type() == newNode.Type() {
292 newTerm := newNode.(*ast.Term)
Akron958fc472026-05-19 13:58:52 +0200293 var rewrites []ast.Rewrite
294
Akron2f93c582026-02-19 16:49:13 +0100295 if term.Foundry != newTerm.Foundry {
Akron958fc472026-05-19 13:58:52 +0200296 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "foundry"}
297 if term.Foundry != "" {
298 rw.Original = term.Foundry
299 }
300 rewrites = append(rewrites, rw)
Akron2f93c582026-02-19 16:49:13 +0100301 }
302 if term.Layer != newTerm.Layer {
Akron958fc472026-05-19 13:58:52 +0200303 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "layer"}
304 if term.Layer != "" {
305 rw.Original = term.Layer
306 }
307 rewrites = append(rewrites, rw)
Akron2f93c582026-02-19 16:49:13 +0100308 }
309 if term.Key != newTerm.Key {
Akron958fc472026-05-19 13:58:52 +0200310 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "key"}
311 if term.Key != "" {
312 rw.Original = term.Key
313 }
314 rewrites = append(rewrites, rw)
Akron2f93c582026-02-19 16:49:13 +0100315 }
316 if term.Value != newTerm.Value {
Akron958fc472026-05-19 13:58:52 +0200317 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "value"}
318 if term.Value != "" {
319 rw.Original = term.Value
320 }
321 rewrites = append(rewrites, rw)
322 }
323
324 if len(rewrites) > 0 {
325 return rewrites
Akron2f93c582026-02-19 16:49:13 +0100326 }
327 }
328
Akron8414ae52026-05-19 13:31:14 +0200329 // Structural change: serialize the original as the rewrite value
Akron2f93c582026-02-19 16:49:13 +0100330 originalBytes, err := parser.SerializeToJSON(originalNode)
331 if err != nil {
Akron958fc472026-05-19 13:58:52 +0200332 return []ast.Rewrite{{Editor: RewriteEditor}}
Akron2f93c582026-02-19 16:49:13 +0100333 }
334 var originalJSON any
335 if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
Akron958fc472026-05-19 13:58:52 +0200336 return []ast.Rewrite{{Editor: RewriteEditor}}
Akron2f93c582026-02-19 16:49:13 +0100337 }
Akron958fc472026-05-19 13:58:52 +0200338 return []ast.Rewrite{{Editor: RewriteEditor, Original: originalJSON}}
Akron2f93c582026-02-19 16:49:13 +0100339}
340
Akron330c8212026-05-19 14:12:39 +0200341// collectRewrites returns the rewrites from the deepest rewritable node.
342// For a Token wrapping a Term, it returns the Term's rewrites.
343// This captures rewrites added by previous cascade steps.
344func collectRewrites(node ast.Node) []ast.Rewrite {
345 if node == nil {
346 return nil
347 }
348 // Unwrap Token to reach the inner node that carries rewrites
349 if tok, ok := node.(*ast.Token); ok && tok.Wrap != nil {
350 return collectRewrites(tok.Wrap)
351 }
352 if r, ok := node.(ast.Rewriteable); ok {
353 return r.GetRewrites()
354 }
355 return nil
356}
357
358// prependRewrites inserts existing rewrites at the front of the node's
359// rewrite list so they appear before any rewrites added by the current step.
360func prependRewrites(node ast.Node, rewrites []ast.Rewrite) {
361 if node == nil || len(rewrites) == 0 {
362 return
363 }
364 // Unwrap Token to reach the inner rewritable node
365 if tok, ok := node.(*ast.Token); ok && tok.Wrap != nil {
366 prependRewrites(tok.Wrap, rewrites)
367 return
368 }
369 if r, ok := node.(ast.Rewriteable); ok {
370 current := r.GetRewrites()
371 // Prepend old rewrites before any newly added ones
372 combined := make([]ast.Rewrite, 0, len(rewrites)+len(current))
373 combined = append(combined, rewrites...)
374 combined = append(combined, current...)
375 r.SetRewrites(combined)
376 }
377}
378
Akron8414ae52026-05-19 13:31:14 +0200379// isValidQueryObject returns true if data is a JSON object with an @type field.
Akron4de47a92025-06-27 11:58:11 +0200380func isValidQueryObject(data any) bool {
Akron4de47a92025-06-27 11:58:11 +0200381 queryMap, ok := data.(map[string]any)
382 if !ok {
383 return false
384 }
Akron8414ae52026-05-19 13:31:14 +0200385 _, ok = queryMap["@type"]
386 return ok
Akron4de47a92025-06-27 11:58:11 +0200387}