blob: 9aed2db97c869c3012a504820813575806b3df24 [file] [log] [blame]
Akron8414ae52026-05-19 13:31:14 +02001package mapper
Akron4de47a92025-06-27 11:58:11 +02002
3import (
4 "encoding/json"
5 "fmt"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/matcher"
9 "github.com/KorAP/Koral-Mapper/parser"
Akron4de47a92025-06-27 11:58:11 +020010)
11
Akron8414ae52026-05-19 13:31:14 +020012// ApplyQueryMappings transforms a JSON query object using the mapping rules
13// identified by mappingID. The input may be a bare query node or a wrapper
14// object containing a "query" field; both forms are accepted.
Akron4de47a92025-06-27 11:58:11 +020015func (m *Mapper) ApplyQueryMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
Akron4de47a92025-06-27 11:58:11 +020016 if _, exists := m.mappingLists[mappingID]; !exists {
17 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
18 }
19
Akron2f93c582026-02-19 16:49:13 +010020 if m.mappingLists[mappingID].IsCorpus() {
21 return m.applyCorpusQueryMappings(mappingID, opts, jsonData)
22 }
23
Akron2f93c582026-02-19 16:49:13 +010024 rules := m.parsedQueryRules[mappingID]
Akron4de47a92025-06-27 11:58:11 +020025
Akron8414ae52026-05-19 13:31:14 +020026 // Detect wrapper: input may be {"query": ...} or a bare koral:token
Akron4de47a92025-06-27 11:58:11 +020027 var queryData any
28 var hasQueryWrapper bool
29
30 if jsonMap, ok := jsonData.(map[string]any); ok {
31 if query, exists := jsonMap["query"]; exists {
32 queryData = query
33 hasQueryWrapper = true
34 }
35 }
36
Akron4de47a92025-06-27 11:58:11 +020037 if !hasQueryWrapper {
Akron4de47a92025-06-27 11:58:11 +020038 if !isValidQueryObject(jsonData) {
39 return jsonData, nil
40 }
41 queryData = jsonData
42 } else if queryData == nil || !isValidQueryObject(queryData) {
Akron4de47a92025-06-27 11:58:11 +020043 return jsonData, nil
44 }
45
Akron8414ae52026-05-19 13:31:14 +020046 // Strip pre-existing rewrites before AST conversion so they do not
47 // interfere with matching. They are restored after transformation.
Akron4de47a92025-06-27 11:58:11 +020048 var oldRewrites any
49 if queryMap, ok := queryData.(map[string]any); ok {
50 if rewrites, exists := queryMap["rewrites"]; exists {
51 oldRewrites = rewrites
52 delete(queryMap, "rewrites")
53 }
54 }
55
Akron4de47a92025-06-27 11:58:11 +020056 jsonBytes, err := json.Marshal(queryData)
57 if err != nil {
58 return nil, fmt.Errorf("failed to marshal input JSON: %w", err)
59 }
60
61 node, err := parser.ParseJSON(jsonBytes)
62 if err != nil {
63 return nil, fmt.Errorf("failed to parse JSON into AST: %w", err)
64 }
65
Akron8414ae52026-05-19 13:31:14 +020066 // Unwrap Token so matching operates on the inner node; re-wrapped later.
Akron4de47a92025-06-27 11:58:11 +020067 isToken := false
68 var tokenWrap ast.Node
69 if token, ok := node.(*ast.Token); ok {
70 isToken = true
71 tokenWrap = token.Wrap
72 node = tokenWrap
73 }
74
Akron8414ae52026-05-19 13:31:14 +020075 // Resolve foundry/layer overrides per direction once, before the rule loop.
Akron4de47a92025-06-27 11:58:11 +020076 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron8414ae52026-05-19 13:31:14 +020077 if opts.Direction {
Akron4de47a92025-06-27 11:58:11 +020078 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
79 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
80 } else {
81 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
82 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
83 }
84
Akron8414ae52026-05-19 13:31:14 +020085 // patternCache avoids redundant Clone+Override for the same rule index
86 // and foundry/layer combination across repeated calls.
Akron4de47a92025-06-27 11:58:11 +020087 type patternCacheKey struct {
88 ruleIndex int
89 foundry string
90 layer string
91 isReplacement bool
92 }
93 patternCache := make(map[patternCacheKey]ast.Node)
94
Akron4de47a92025-06-27 11:58:11 +020095 for i, rule := range rules {
Akron4de47a92025-06-27 11:58:11 +020096 var pattern, replacement ast.Node
Akron8414ae52026-05-19 13:31:14 +020097 if opts.Direction {
Akron4de47a92025-06-27 11:58:11 +020098 pattern = rule.Upper
99 replacement = rule.Lower
100 } else {
101 pattern = rule.Lower
102 replacement = rule.Upper
103 }
104
Akron4de47a92025-06-27 11:58:11 +0200105 if token, ok := pattern.(*ast.Token); ok {
106 pattern = token.Wrap
107 }
108 if token, ok := replacement.(*ast.Token); ok {
109 replacement = token.Wrap
110 }
111
Akron4de47a92025-06-27 11:58:11 +0200112 patternKey := patternCacheKey{ruleIndex: i, foundry: patternFoundry, layer: patternLayer, isReplacement: false}
113 processedPattern, exists := patternCache[patternKey]
114 if !exists {
Akron4de47a92025-06-27 11:58:11 +0200115 processedPattern = pattern.Clone()
Akron4de47a92025-06-27 11:58:11 +0200116 if patternFoundry != "" || patternLayer != "" {
117 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
118 }
119 patternCache[patternKey] = processedPattern
120 }
121
Akron8414ae52026-05-19 13:31:14 +0200122 // Probe for a match before cloning the replacement (lazy evaluation)
Akron4de47a92025-06-27 11:58:11 +0200123 tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
124 if err != nil {
125 return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
126 }
Akron4de47a92025-06-27 11:58:11 +0200127 if !tempMatcher.Match(node) {
128 continue
129 }
130
Akron4de47a92025-06-27 11:58:11 +0200131 replacementKey := patternCacheKey{ruleIndex: i, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
132 processedReplacement, exists := patternCache[replacementKey]
133 if !exists {
Akron4de47a92025-06-27 11:58:11 +0200134 processedReplacement = replacement.Clone()
Akron4de47a92025-06-27 11:58:11 +0200135 if replacementFoundry != "" || replacementLayer != "" {
136 ast.ApplyFoundryAndLayerOverrides(processedReplacement, replacementFoundry, replacementLayer)
137 }
138 patternCache[replacementKey] = processedReplacement
139 }
140
Akron8414ae52026-05-19 13:31:14 +0200141 var beforeNode ast.Node
142 if opts.AddRewrites {
143 beforeNode = node.Clone()
144 }
145
Akron4de47a92025-06-27 11:58:11 +0200146 actualMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: processedReplacement})
147 if err != nil {
148 return nil, fmt.Errorf("failed to create matcher: %w", err)
149 }
150 node = actualMatcher.Replace(node)
Akron8414ae52026-05-19 13:31:14 +0200151
152 if opts.AddRewrites {
153 recordRewrites(node, beforeNode)
154 }
Akron4de47a92025-06-27 11:58:11 +0200155 }
156
Akron4de47a92025-06-27 11:58:11 +0200157 var result ast.Node
158 if isToken {
159 result = &ast.Token{Wrap: node}
160 } else {
161 result = node
162 }
163
Akron4de47a92025-06-27 11:58:11 +0200164 resultBytes, err := parser.SerializeToJSON(result)
165 if err != nil {
166 return nil, fmt.Errorf("failed to serialize AST to JSON: %w", err)
167 }
168
Akron4de47a92025-06-27 11:58:11 +0200169 var resultData any
170 if err := json.Unmarshal(resultBytes, &resultData); err != nil {
171 return nil, fmt.Errorf("failed to parse result JSON: %w", err)
172 }
173
Akron8414ae52026-05-19 13:31:14 +0200174 // Restore pre-existing rewrites. The round-trip through ast.Rewrite
175 // normalizes legacy field names (e.g. "source" -> "editor") so the
176 // output always uses the modern schema.
Akron4de47a92025-06-27 11:58:11 +0200177 if oldRewrites != nil {
Akron4de47a92025-06-27 11:58:11 +0200178 if rewritesList, ok := oldRewrites.([]any); ok {
179 processedRewrites := make([]any, len(rewritesList))
180 for i, rewriteData := range rewritesList {
Akron4de47a92025-06-27 11:58:11 +0200181 rewriteBytes, err := json.Marshal(rewriteData)
182 if err != nil {
183 return nil, fmt.Errorf("failed to marshal old rewrite %d: %w", i, err)
184 }
185 var rewrite ast.Rewrite
186 if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
187 return nil, fmt.Errorf("failed to unmarshal old rewrite %d: %w", i, err)
188 }
Akron4de47a92025-06-27 11:58:11 +0200189 transformedBytes, err := json.Marshal(&rewrite)
190 if err != nil {
191 return nil, fmt.Errorf("failed to marshal transformed rewrite %d: %w", i, err)
192 }
193 var transformedRewrite any
194 if err := json.Unmarshal(transformedBytes, &transformedRewrite); err != nil {
195 return nil, fmt.Errorf("failed to unmarshal transformed rewrite %d: %w", i, err)
196 }
197 processedRewrites[i] = transformedRewrite
198 }
199 if resultMap, ok := resultData.(map[string]any); ok {
200 resultMap["rewrites"] = processedRewrites
201 }
202 } else {
Akron4de47a92025-06-27 11:58:11 +0200203 if resultMap, ok := resultData.(map[string]any); ok {
204 resultMap["rewrites"] = oldRewrites
205 }
206 }
207 }
208
Akron4de47a92025-06-27 11:58:11 +0200209 if hasQueryWrapper {
210 if wrapper, ok := jsonData.(map[string]any); ok {
211 wrapper["query"] = resultData
212 return wrapper, nil
213 }
214 }
215
216 return resultData, nil
217}
218
Akron8414ae52026-05-19 13:31:14 +0200219// recordRewrites compares the new node against the before-snapshot and
220// attaches rewrite entries to any changed nodes. It handles both simple
221// nodes (Term, TermGroup) and container nodes (CatchallNode with operands).
222func recordRewrites(newNode, beforeNode ast.Node) {
223 if ast.NodesEqual(newNode, beforeNode) {
224 return
225 }
226
227 // For CatchallNodes with operands (e.g. token sequences), attach
228 // per-operand rewrites so each changed token gets its own annotation.
229 if newCatchall, ok := newNode.(*ast.CatchallNode); ok {
230 if oldCatchall, ok := beforeNode.(*ast.CatchallNode); ok && len(newCatchall.Operands) > 0 {
231 for i, newOp := range newCatchall.Operands {
232 if i >= len(oldCatchall.Operands) {
233 break
234 }
235 oldOp := oldCatchall.Operands[i]
236 recordRewritesForOperand(newOp, oldOp)
237 }
238 return
239 }
240 }
241
242 addRewriteToNode(newNode, beforeNode)
243}
244
245// recordRewritesForOperand handles rewrite recording for a single operand,
246// unwrapping Token nodes so the rewrite attaches to the inner term/termGroup
247// rather than the token wrapper.
248func recordRewritesForOperand(newOp, oldOp ast.Node) {
249 if ast.NodesEqual(newOp, oldOp) {
250 return
251 }
252
253 newInner := newOp
254 oldInner := oldOp
255 if tok, ok := newOp.(*ast.Token); ok {
256 newInner = tok.Wrap
257 }
258 if tok, ok := oldOp.(*ast.Token); ok {
259 oldInner = tok.Wrap
260 }
261
262 if newInner == nil || ast.NodesEqual(newInner, oldInner) {
263 return
264 }
265
266 addRewriteToNode(newInner, oldInner)
267}
268
269// addRewriteToNode creates and attaches a rewrite entry to a node,
270// recording what the node looked like before the change.
271func addRewriteToNode(newNode, originalNode ast.Node) {
272 rw := buildRewrite(originalNode, newNode)
273 ast.AppendRewrite(newNode, rw)
274}
275
276// buildRewrite creates a Rewrite describing what changed between
277// originalNode and newNode. For simple term-level changes (just foundry,
278// layer, key, or value), it uses a scoped rewrite. For structural changes,
279// it stores the full original as an object.
280func buildRewrite(originalNode, newNode ast.Node) ast.Rewrite {
Akron2f93c582026-02-19 16:49:13 +0100281 if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(newNode) && originalNode.Type() == newNode.Type() {
282 newTerm := newNode.(*ast.Term)
283 if term.Foundry != newTerm.Foundry {
Akron8414ae52026-05-19 13:31:14 +0200284 return ast.Rewrite{Editor: RewriteEditor, Scope: "foundry", Original: term.Foundry}
Akron2f93c582026-02-19 16:49:13 +0100285 }
286 if term.Layer != newTerm.Layer {
Akron8414ae52026-05-19 13:31:14 +0200287 return ast.Rewrite{Editor: RewriteEditor, Scope: "layer", Original: term.Layer}
Akron2f93c582026-02-19 16:49:13 +0100288 }
289 if term.Key != newTerm.Key {
Akron8414ae52026-05-19 13:31:14 +0200290 return ast.Rewrite{Editor: RewriteEditor, Scope: "key", Original: term.Key}
Akron2f93c582026-02-19 16:49:13 +0100291 }
292 if term.Value != newTerm.Value {
Akron8414ae52026-05-19 13:31:14 +0200293 return ast.Rewrite{Editor: RewriteEditor, Scope: "value", Original: term.Value}
Akron2f93c582026-02-19 16:49:13 +0100294 }
295 }
296
Akron8414ae52026-05-19 13:31:14 +0200297 // Structural change: serialize the original as the rewrite value
Akron2f93c582026-02-19 16:49:13 +0100298 originalBytes, err := parser.SerializeToJSON(originalNode)
299 if err != nil {
Akron8414ae52026-05-19 13:31:14 +0200300 return ast.Rewrite{Editor: RewriteEditor}
Akron2f93c582026-02-19 16:49:13 +0100301 }
302 var originalJSON any
303 if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
Akron8414ae52026-05-19 13:31:14 +0200304 return ast.Rewrite{Editor: RewriteEditor}
Akron2f93c582026-02-19 16:49:13 +0100305 }
Akron8414ae52026-05-19 13:31:14 +0200306 return ast.Rewrite{Editor: RewriteEditor, Original: originalJSON}
Akron2f93c582026-02-19 16:49:13 +0100307}
308
Akron8414ae52026-05-19 13:31:14 +0200309// isValidQueryObject returns true if data is a JSON object with an @type field.
Akron4de47a92025-06-27 11:58:11 +0200310func isValidQueryObject(data any) bool {
Akron4de47a92025-06-27 11:58:11 +0200311 queryMap, ok := data.(map[string]any)
312 if !ok {
313 return false
314 }
Akron8414ae52026-05-19 13:31:14 +0200315 _, ok = queryMap["@type"]
316 return ok
Akron4de47a92025-06-27 11:58:11 +0200317}