blob: ec52da3bfdc38ede1d91632f90e8e96080f3fb3a [file] [log] [blame]
Akron8414ae52026-05-19 13:31:14 +02001package mapper
Akron4de47a92025-06-27 11:58:11 +02002
3import (
4 "encoding/json"
5 "fmt"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/matcher"
9 "github.com/KorAP/Koral-Mapper/parser"
Akron4de47a92025-06-27 11:58:11 +020010)
11
Akron8414ae52026-05-19 13:31:14 +020012// ApplyQueryMappings transforms a JSON query object using the mapping rules
13// identified by mappingID. The input may be a bare query node or a wrapper
14// object containing a "query" field; both forms are accepted.
Akron4de47a92025-06-27 11:58:11 +020015func (m *Mapper) ApplyQueryMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
Akron4de47a92025-06-27 11:58:11 +020016 if _, exists := m.mappingLists[mappingID]; !exists {
17 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
18 }
19
Akron422cd252026-05-19 16:31:19 +020020 if err := m.validateEffectiveOptions(mappingID, opts); err != nil {
21 return nil, err
22 }
23
Akron2f93c582026-02-19 16:49:13 +010024 if m.mappingLists[mappingID].IsCorpus() {
25 return m.applyCorpusQueryMappings(mappingID, opts, jsonData)
26 }
27
Akron2f93c582026-02-19 16:49:13 +010028 rules := m.parsedQueryRules[mappingID]
Akron4de47a92025-06-27 11:58:11 +020029
Akron8414ae52026-05-19 13:31:14 +020030 // Detect wrapper: input may be {"query": ...} or a bare koral:token
Akron4de47a92025-06-27 11:58:11 +020031 var queryData any
32 var hasQueryWrapper bool
33
34 if jsonMap, ok := jsonData.(map[string]any); ok {
35 if query, exists := jsonMap["query"]; exists {
36 queryData = query
37 hasQueryWrapper = true
38 }
39 }
40
Akron4de47a92025-06-27 11:58:11 +020041 if !hasQueryWrapper {
Akron4de47a92025-06-27 11:58:11 +020042 if !isValidQueryObject(jsonData) {
43 return jsonData, nil
44 }
45 queryData = jsonData
46 } else if queryData == nil || !isValidQueryObject(queryData) {
Akron4de47a92025-06-27 11:58:11 +020047 return jsonData, nil
48 }
49
Akron8414ae52026-05-19 13:31:14 +020050 // Strip pre-existing rewrites before AST conversion so they do not
51 // interfere with matching. They are restored after transformation.
Akron4de47a92025-06-27 11:58:11 +020052 var oldRewrites any
53 if queryMap, ok := queryData.(map[string]any); ok {
54 if rewrites, exists := queryMap["rewrites"]; exists {
55 oldRewrites = rewrites
56 delete(queryMap, "rewrites")
57 }
58 }
59
Akron4de47a92025-06-27 11:58:11 +020060 jsonBytes, err := json.Marshal(queryData)
61 if err != nil {
62 return nil, fmt.Errorf("failed to marshal input JSON: %w", err)
63 }
64
65 node, err := parser.ParseJSON(jsonBytes)
66 if err != nil {
67 return nil, fmt.Errorf("failed to parse JSON into AST: %w", err)
68 }
69
Akron8414ae52026-05-19 13:31:14 +020070 // Unwrap Token so matching operates on the inner node; re-wrapped later.
Akron4de47a92025-06-27 11:58:11 +020071 isToken := false
72 var tokenWrap ast.Node
73 if token, ok := node.(*ast.Token); ok {
74 isToken = true
75 tokenWrap = token.Wrap
76 node = tokenWrap
77 }
78
Akron8414ae52026-05-19 13:31:14 +020079 // Resolve foundry/layer overrides per direction once, before the rule loop.
Akron4de47a92025-06-27 11:58:11 +020080 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron8414ae52026-05-19 13:31:14 +020081 if opts.Direction {
Akron4de47a92025-06-27 11:58:11 +020082 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
83 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
84 } else {
85 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
86 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
87 }
88
Akron8414ae52026-05-19 13:31:14 +020089 // patternCache avoids redundant Clone+Override for the same rule index
90 // and foundry/layer combination across repeated calls.
Akron4de47a92025-06-27 11:58:11 +020091 type patternCacheKey struct {
92 ruleIndex int
93 foundry string
94 layer string
95 isReplacement bool
96 }
97 patternCache := make(map[patternCacheKey]ast.Node)
98
Akronb4e36f62026-05-21 11:44:25 +020099 // getProcessedPattern returns a cached, override-applied clone of a rule's pattern.
100 getProcessedPattern := func(i int, rule *parser.MappingResult) (ast.Node, ast.Node, ast.Node, error) {
Akron4de47a92025-06-27 11:58:11 +0200101 var pattern, replacement ast.Node
Akron8414ae52026-05-19 13:31:14 +0200102 if opts.Direction {
Akron4de47a92025-06-27 11:58:11 +0200103 pattern = rule.Upper
104 replacement = rule.Lower
105 } else {
106 pattern = rule.Lower
107 replacement = rule.Upper
108 }
Akron4de47a92025-06-27 11:58:11 +0200109 if token, ok := pattern.(*ast.Token); ok {
110 pattern = token.Wrap
111 }
112 if token, ok := replacement.(*ast.Token); ok {
113 replacement = token.Wrap
114 }
115
Akron4de47a92025-06-27 11:58:11 +0200116 patternKey := patternCacheKey{ruleIndex: i, foundry: patternFoundry, layer: patternLayer, isReplacement: false}
117 processedPattern, exists := patternCache[patternKey]
118 if !exists {
Akron4de47a92025-06-27 11:58:11 +0200119 processedPattern = pattern.Clone()
Akron4de47a92025-06-27 11:58:11 +0200120 if patternFoundry != "" || patternLayer != "" {
121 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
122 }
123 patternCache[patternKey] = processedPattern
124 }
Akronb4e36f62026-05-21 11:44:25 +0200125 return processedPattern, replacement, pattern, nil
126 }
Akron4de47a92025-06-27 11:58:11 +0200127
Akronb4e36f62026-05-21 11:44:25 +0200128 // applyBestRule applies the best-matching rule (by specificity) to a single node.
129 applyBestRule := func(target ast.Node) (ast.Node, error) {
130 var candidates []matchCandidate
131 for i, rule := range rules {
132 processedPattern, replacement, _, err := getProcessedPattern(i, rule)
133 if err != nil {
134 return nil, err
135 }
136 tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
137 if err != nil {
138 return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
139 }
140 if !tempMatcher.Match(target) {
141 continue
142 }
143 candidates = append(candidates, matchCandidate{
144 ruleIndex: i,
145 patternSpecificity: ast.Specificity(processedPattern),
146 replacementSpecificity: ast.Specificity(replacement),
147 })
Akron4de47a92025-06-27 11:58:11 +0200148 }
Akronb4e36f62026-05-21 11:44:25 +0200149 if len(candidates) == 0 {
150 return target, nil
Akron4de47a92025-06-27 11:58:11 +0200151 }
152
Akronb4e36f62026-05-21 11:44:25 +0200153 best := selectBestCandidate(candidates)
154
155 rule := rules[best.ruleIndex]
156 processedPattern, replacement, _, _ := getProcessedPattern(best.ruleIndex, rule)
157
158 replacementKey := patternCacheKey{ruleIndex: best.ruleIndex, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
Akron4de47a92025-06-27 11:58:11 +0200159 processedReplacement, exists := patternCache[replacementKey]
160 if !exists {
Akron4de47a92025-06-27 11:58:11 +0200161 processedReplacement = replacement.Clone()
Akron4de47a92025-06-27 11:58:11 +0200162 if replacementFoundry != "" || replacementLayer != "" {
163 ast.ApplyFoundryAndLayerOverrides(processedReplacement, replacementFoundry, replacementLayer)
164 }
165 patternCache[replacementKey] = processedReplacement
166 }
167
Akron8414ae52026-05-19 13:31:14 +0200168 var beforeNode ast.Node
169 if opts.AddRewrites {
Akronb4e36f62026-05-21 11:44:25 +0200170 beforeNode = target.Clone()
Akron8414ae52026-05-19 13:31:14 +0200171 }
172
Akron330c8212026-05-19 14:12:39 +0200173 // Collect pre-existing rewrites before replacement so they
174 // survive when the matcher creates a fresh replacement node.
175 existingRewrites := collectRewrites(node)
176
Akron4de47a92025-06-27 11:58:11 +0200177 actualMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: processedReplacement})
178 if err != nil {
179 return nil, fmt.Errorf("failed to create matcher: %w", err)
180 }
Akronb4e36f62026-05-21 11:44:25 +0200181 result := actualMatcher.Replace(target)
Akron8414ae52026-05-19 13:31:14 +0200182
Akron330c8212026-05-19 14:12:39 +0200183 if len(existingRewrites) > 0 {
Akronb4e36f62026-05-21 11:44:25 +0200184 prependRewrites(result, existingRewrites)
Akron330c8212026-05-19 14:12:39 +0200185 }
186
Akron8414ae52026-05-19 13:31:14 +0200187 if opts.AddRewrites {
Akronb4e36f62026-05-21 11:44:25 +0200188 recordRewrites(result, beforeNode)
189 }
190 return result, nil
191 }
192
193 // For CatchallNodes (any complex KoralQuery operation like sequence,
194 // disjunction, or position), apply best-rule selection per operand
195 // so each token gets its own best-matching rule.
196 if catchall, ok := node.(*ast.CatchallNode); ok && len(catchall.Operands) > 0 {
197 newOperands := make([]ast.Node, len(catchall.Operands))
198 for i, op := range catchall.Operands {
199 replaced, err := applyBestRule(op)
200 if err != nil {
201 return nil, err
202 }
203 newOperands[i] = replaced
204 }
205 node = &ast.CatchallNode{
206 NodeType: catchall.NodeType,
207 RawContent: catchall.RawContent,
208 Wrap: catchall.Wrap,
209 Operands: newOperands,
210 }
211 } else {
212 var err error
213 node, err = applyBestRule(node)
214 if err != nil {
215 return nil, err
Akron8414ae52026-05-19 13:31:14 +0200216 }
Akron4de47a92025-06-27 11:58:11 +0200217 }
218
Akron4de47a92025-06-27 11:58:11 +0200219 var result ast.Node
220 if isToken {
221 result = &ast.Token{Wrap: node}
222 } else {
223 result = node
224 }
225
Akron4de47a92025-06-27 11:58:11 +0200226 resultBytes, err := parser.SerializeToJSON(result)
227 if err != nil {
228 return nil, fmt.Errorf("failed to serialize AST to JSON: %w", err)
229 }
230
Akron4de47a92025-06-27 11:58:11 +0200231 var resultData any
232 if err := json.Unmarshal(resultBytes, &resultData); err != nil {
233 return nil, fmt.Errorf("failed to parse result JSON: %w", err)
234 }
235
Akron8414ae52026-05-19 13:31:14 +0200236 // Restore pre-existing rewrites. The round-trip through ast.Rewrite
237 // normalizes legacy field names (e.g. "source" -> "editor") so the
238 // output always uses the modern schema.
Akron4de47a92025-06-27 11:58:11 +0200239 if oldRewrites != nil {
Akron4de47a92025-06-27 11:58:11 +0200240 if rewritesList, ok := oldRewrites.([]any); ok {
241 processedRewrites := make([]any, len(rewritesList))
242 for i, rewriteData := range rewritesList {
Akron4de47a92025-06-27 11:58:11 +0200243 rewriteBytes, err := json.Marshal(rewriteData)
244 if err != nil {
245 return nil, fmt.Errorf("failed to marshal old rewrite %d: %w", i, err)
246 }
247 var rewrite ast.Rewrite
248 if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
249 return nil, fmt.Errorf("failed to unmarshal old rewrite %d: %w", i, err)
250 }
Akron4de47a92025-06-27 11:58:11 +0200251 transformedBytes, err := json.Marshal(&rewrite)
252 if err != nil {
253 return nil, fmt.Errorf("failed to marshal transformed rewrite %d: %w", i, err)
254 }
255 var transformedRewrite any
256 if err := json.Unmarshal(transformedBytes, &transformedRewrite); err != nil {
257 return nil, fmt.Errorf("failed to unmarshal transformed rewrite %d: %w", i, err)
258 }
259 processedRewrites[i] = transformedRewrite
260 }
261 if resultMap, ok := resultData.(map[string]any); ok {
262 resultMap["rewrites"] = processedRewrites
263 }
264 } else {
Akron4de47a92025-06-27 11:58:11 +0200265 if resultMap, ok := resultData.(map[string]any); ok {
266 resultMap["rewrites"] = oldRewrites
267 }
268 }
269 }
270
Akron4de47a92025-06-27 11:58:11 +0200271 if hasQueryWrapper {
272 if wrapper, ok := jsonData.(map[string]any); ok {
273 wrapper["query"] = resultData
274 return wrapper, nil
275 }
276 }
277
278 return resultData, nil
279}
280
Akronb4e36f62026-05-21 11:44:25 +0200281// selectBestCandidate picks the best match from candidates using:
282// 1. Highest pattern specificity (most features matched)
283// 2. Lowest replacement specificity (broadest/fallback output)
284// 3. First in file order (lowest ruleIndex)
285func selectBestCandidate(candidates []matchCandidate) matchCandidate {
286 best := candidates[0]
287 for _, c := range candidates[1:] {
288 if c.patternSpecificity > best.patternSpecificity {
289 best = c
290 } else if c.patternSpecificity == best.patternSpecificity {
291 if c.replacementSpecificity < best.replacementSpecificity {
292 best = c
293 }
294 }
295 }
296 return best
297}
298
299// matchCandidate holds a rule index and its specificity scores for selection.
300type matchCandidate struct {
301 ruleIndex int
302 patternSpecificity int
303 replacementSpecificity int
304}
305
Akron8414ae52026-05-19 13:31:14 +0200306// recordRewrites compares the new node against the before-snapshot and
307// attaches rewrite entries to any changed nodes. It handles both simple
308// nodes (Term, TermGroup) and container nodes (CatchallNode with operands).
309func recordRewrites(newNode, beforeNode ast.Node) {
310 if ast.NodesEqual(newNode, beforeNode) {
311 return
312 }
313
Akronb4e36f62026-05-21 11:44:25 +0200314 // For CatchallNodes with operands (e.g. any complex KoralQuery
315 // operation), attach per-operand rewrites so each changed token
316 // gets its own annotation.
Akron8414ae52026-05-19 13:31:14 +0200317 if newCatchall, ok := newNode.(*ast.CatchallNode); ok {
318 if oldCatchall, ok := beforeNode.(*ast.CatchallNode); ok && len(newCatchall.Operands) > 0 {
319 for i, newOp := range newCatchall.Operands {
320 if i >= len(oldCatchall.Operands) {
321 break
322 }
323 oldOp := oldCatchall.Operands[i]
324 recordRewritesForOperand(newOp, oldOp)
325 }
326 return
327 }
328 }
329
330 addRewriteToNode(newNode, beforeNode)
331}
332
333// recordRewritesForOperand handles rewrite recording for a single operand,
334// unwrapping Token nodes so the rewrite attaches to the inner term/termGroup
335// rather than the token wrapper.
336func recordRewritesForOperand(newOp, oldOp ast.Node) {
337 if ast.NodesEqual(newOp, oldOp) {
338 return
339 }
340
341 newInner := newOp
342 oldInner := oldOp
343 if tok, ok := newOp.(*ast.Token); ok {
344 newInner = tok.Wrap
345 }
346 if tok, ok := oldOp.(*ast.Token); ok {
347 oldInner = tok.Wrap
348 }
349
350 if newInner == nil || ast.NodesEqual(newInner, oldInner) {
351 return
352 }
353
354 addRewriteToNode(newInner, oldInner)
355}
356
Akron958fc472026-05-19 13:58:52 +0200357// addRewriteToNode creates and attaches rewrite entries to a node,
Akron8414ae52026-05-19 13:31:14 +0200358// recording what the node looked like before the change.
359func addRewriteToNode(newNode, originalNode ast.Node) {
Akron958fc472026-05-19 13:58:52 +0200360 for _, rw := range buildRewrites(originalNode, newNode) {
361 ast.AppendRewrite(newNode, rw)
362 }
Akron8414ae52026-05-19 13:31:14 +0200363}
364
Akrona0174352026-05-19 17:04:42 +0200365// buildRewrites creates a single Rewrite entry describing what changed between
366// originalNode and newNode. One rule application on one object always produces
367// exactly one koral:rewrite with the full original serialized in `original`.
368// Rewrites from previous cascade steps are stripped from the original so the
369// serialized value only contains the node's own content.
Akron958fc472026-05-19 13:58:52 +0200370func buildRewrites(originalNode, newNode ast.Node) []ast.Rewrite {
Akrona0174352026-05-19 17:04:42 +0200371 clean := originalNode.Clone()
372 ast.StripRewrites(clean)
373 originalBytes, err := parser.SerializeToJSON(clean)
Akron2f93c582026-02-19 16:49:13 +0100374 if err != nil {
Akron958fc472026-05-19 13:58:52 +0200375 return []ast.Rewrite{{Editor: RewriteEditor}}
Akron2f93c582026-02-19 16:49:13 +0100376 }
377 var originalJSON any
378 if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
Akron958fc472026-05-19 13:58:52 +0200379 return []ast.Rewrite{{Editor: RewriteEditor}}
Akron2f93c582026-02-19 16:49:13 +0100380 }
Akron958fc472026-05-19 13:58:52 +0200381 return []ast.Rewrite{{Editor: RewriteEditor, Original: originalJSON}}
Akron2f93c582026-02-19 16:49:13 +0100382}
383
Akron330c8212026-05-19 14:12:39 +0200384// collectRewrites returns the rewrites from the deepest rewritable node.
385// For a Token wrapping a Term, it returns the Term's rewrites.
386// This captures rewrites added by previous cascade steps.
387func collectRewrites(node ast.Node) []ast.Rewrite {
388 if node == nil {
389 return nil
390 }
391 // Unwrap Token to reach the inner node that carries rewrites
392 if tok, ok := node.(*ast.Token); ok && tok.Wrap != nil {
393 return collectRewrites(tok.Wrap)
394 }
395 if r, ok := node.(ast.Rewriteable); ok {
396 return r.GetRewrites()
397 }
398 return nil
399}
400
401// prependRewrites inserts existing rewrites at the front of the node's
402// rewrite list so they appear before any rewrites added by the current step.
403func prependRewrites(node ast.Node, rewrites []ast.Rewrite) {
404 if node == nil || len(rewrites) == 0 {
405 return
406 }
407 // Unwrap Token to reach the inner rewritable node
408 if tok, ok := node.(*ast.Token); ok && tok.Wrap != nil {
409 prependRewrites(tok.Wrap, rewrites)
410 return
411 }
412 if r, ok := node.(ast.Rewriteable); ok {
413 current := r.GetRewrites()
414 // Prepend old rewrites before any newly added ones
415 combined := make([]ast.Rewrite, 0, len(rewrites)+len(current))
416 combined = append(combined, rewrites...)
417 combined = append(combined, current...)
418 r.SetRewrites(combined)
419 }
420}
421
Akron8414ae52026-05-19 13:31:14 +0200422// isValidQueryObject returns true if data is a JSON object with an @type field.
Akron4de47a92025-06-27 11:58:11 +0200423func isValidQueryObject(data any) bool {
Akron4de47a92025-06-27 11:58:11 +0200424 queryMap, ok := data.(map[string]any)
425 if !ok {
426 return false
427 }
Akron8414ae52026-05-19 13:31:14 +0200428 _, ok = queryMap["@type"]
429 return ok
Akron4de47a92025-06-27 11:58:11 +0200430}