blob: 57de91286fd0f9f75e5402d9c9d7ce3f544d9a3f [file] [log] [blame]
Akron8414ae52026-05-19 13:31:14 +02001package mapper
Akron4de47a92025-06-27 11:58:11 +02002
3import (
4 "encoding/json"
5 "fmt"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/matcher"
9 "github.com/KorAP/Koral-Mapper/parser"
Akron4de47a92025-06-27 11:58:11 +020010)
11
Akron8414ae52026-05-19 13:31:14 +020012// ApplyQueryMappings transforms a JSON query object using the mapping rules
13// identified by mappingID. The input may be a bare query node or a wrapper
14// object containing a "query" field; both forms are accepted.
Akron4de47a92025-06-27 11:58:11 +020015func (m *Mapper) ApplyQueryMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
Akron4de47a92025-06-27 11:58:11 +020016 if _, exists := m.mappingLists[mappingID]; !exists {
17 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
18 }
19
Akron2f93c582026-02-19 16:49:13 +010020 if m.mappingLists[mappingID].IsCorpus() {
21 return m.applyCorpusQueryMappings(mappingID, opts, jsonData)
22 }
23
Akron2f93c582026-02-19 16:49:13 +010024 rules := m.parsedQueryRules[mappingID]
Akron4de47a92025-06-27 11:58:11 +020025
Akron8414ae52026-05-19 13:31:14 +020026 // Detect wrapper: input may be {"query": ...} or a bare koral:token
Akron4de47a92025-06-27 11:58:11 +020027 var queryData any
28 var hasQueryWrapper bool
29
30 if jsonMap, ok := jsonData.(map[string]any); ok {
31 if query, exists := jsonMap["query"]; exists {
32 queryData = query
33 hasQueryWrapper = true
34 }
35 }
36
Akron4de47a92025-06-27 11:58:11 +020037 if !hasQueryWrapper {
Akron4de47a92025-06-27 11:58:11 +020038 if !isValidQueryObject(jsonData) {
39 return jsonData, nil
40 }
41 queryData = jsonData
42 } else if queryData == nil || !isValidQueryObject(queryData) {
Akron4de47a92025-06-27 11:58:11 +020043 return jsonData, nil
44 }
45
Akron8414ae52026-05-19 13:31:14 +020046 // Strip pre-existing rewrites before AST conversion so they do not
47 // interfere with matching. They are restored after transformation.
Akron4de47a92025-06-27 11:58:11 +020048 var oldRewrites any
49 if queryMap, ok := queryData.(map[string]any); ok {
50 if rewrites, exists := queryMap["rewrites"]; exists {
51 oldRewrites = rewrites
52 delete(queryMap, "rewrites")
53 }
54 }
55
Akron4de47a92025-06-27 11:58:11 +020056 jsonBytes, err := json.Marshal(queryData)
57 if err != nil {
58 return nil, fmt.Errorf("failed to marshal input JSON: %w", err)
59 }
60
61 node, err := parser.ParseJSON(jsonBytes)
62 if err != nil {
63 return nil, fmt.Errorf("failed to parse JSON into AST: %w", err)
64 }
65
Akron8414ae52026-05-19 13:31:14 +020066 // Unwrap Token so matching operates on the inner node; re-wrapped later.
Akron4de47a92025-06-27 11:58:11 +020067 isToken := false
68 var tokenWrap ast.Node
69 if token, ok := node.(*ast.Token); ok {
70 isToken = true
71 tokenWrap = token.Wrap
72 node = tokenWrap
73 }
74
Akron8414ae52026-05-19 13:31:14 +020075 // Resolve foundry/layer overrides per direction once, before the rule loop.
Akron4de47a92025-06-27 11:58:11 +020076 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron8414ae52026-05-19 13:31:14 +020077 if opts.Direction {
Akron4de47a92025-06-27 11:58:11 +020078 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
79 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
80 } else {
81 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
82 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
83 }
84
Akron8414ae52026-05-19 13:31:14 +020085 // patternCache avoids redundant Clone+Override for the same rule index
86 // and foundry/layer combination across repeated calls.
Akron4de47a92025-06-27 11:58:11 +020087 type patternCacheKey struct {
88 ruleIndex int
89 foundry string
90 layer string
91 isReplacement bool
92 }
93 patternCache := make(map[patternCacheKey]ast.Node)
94
Akron4de47a92025-06-27 11:58:11 +020095 for i, rule := range rules {
Akron4de47a92025-06-27 11:58:11 +020096 var pattern, replacement ast.Node
Akron8414ae52026-05-19 13:31:14 +020097 if opts.Direction {
Akron4de47a92025-06-27 11:58:11 +020098 pattern = rule.Upper
99 replacement = rule.Lower
100 } else {
101 pattern = rule.Lower
102 replacement = rule.Upper
103 }
104
Akron4de47a92025-06-27 11:58:11 +0200105 if token, ok := pattern.(*ast.Token); ok {
106 pattern = token.Wrap
107 }
108 if token, ok := replacement.(*ast.Token); ok {
109 replacement = token.Wrap
110 }
111
Akron4de47a92025-06-27 11:58:11 +0200112 patternKey := patternCacheKey{ruleIndex: i, foundry: patternFoundry, layer: patternLayer, isReplacement: false}
113 processedPattern, exists := patternCache[patternKey]
114 if !exists {
Akron4de47a92025-06-27 11:58:11 +0200115 processedPattern = pattern.Clone()
Akron4de47a92025-06-27 11:58:11 +0200116 if patternFoundry != "" || patternLayer != "" {
117 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
118 }
119 patternCache[patternKey] = processedPattern
120 }
121
Akron8414ae52026-05-19 13:31:14 +0200122 // Probe for a match before cloning the replacement (lazy evaluation)
Akron4de47a92025-06-27 11:58:11 +0200123 tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
124 if err != nil {
125 return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
126 }
Akron4de47a92025-06-27 11:58:11 +0200127 if !tempMatcher.Match(node) {
128 continue
129 }
130
Akron4de47a92025-06-27 11:58:11 +0200131 replacementKey := patternCacheKey{ruleIndex: i, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
132 processedReplacement, exists := patternCache[replacementKey]
133 if !exists {
Akron4de47a92025-06-27 11:58:11 +0200134 processedReplacement = replacement.Clone()
Akron4de47a92025-06-27 11:58:11 +0200135 if replacementFoundry != "" || replacementLayer != "" {
136 ast.ApplyFoundryAndLayerOverrides(processedReplacement, replacementFoundry, replacementLayer)
137 }
138 patternCache[replacementKey] = processedReplacement
139 }
140
Akron8414ae52026-05-19 13:31:14 +0200141 var beforeNode ast.Node
142 if opts.AddRewrites {
143 beforeNode = node.Clone()
144 }
145
Akron4de47a92025-06-27 11:58:11 +0200146 actualMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: processedReplacement})
147 if err != nil {
148 return nil, fmt.Errorf("failed to create matcher: %w", err)
149 }
150 node = actualMatcher.Replace(node)
Akron8414ae52026-05-19 13:31:14 +0200151
152 if opts.AddRewrites {
153 recordRewrites(node, beforeNode)
154 }
Akron4de47a92025-06-27 11:58:11 +0200155 }
156
Akron4de47a92025-06-27 11:58:11 +0200157 var result ast.Node
158 if isToken {
159 result = &ast.Token{Wrap: node}
160 } else {
161 result = node
162 }
163
Akron4de47a92025-06-27 11:58:11 +0200164 resultBytes, err := parser.SerializeToJSON(result)
165 if err != nil {
166 return nil, fmt.Errorf("failed to serialize AST to JSON: %w", err)
167 }
168
Akron4de47a92025-06-27 11:58:11 +0200169 var resultData any
170 if err := json.Unmarshal(resultBytes, &resultData); err != nil {
171 return nil, fmt.Errorf("failed to parse result JSON: %w", err)
172 }
173
Akron8414ae52026-05-19 13:31:14 +0200174 // Restore pre-existing rewrites. The round-trip through ast.Rewrite
175 // normalizes legacy field names (e.g. "source" -> "editor") so the
176 // output always uses the modern schema.
Akron4de47a92025-06-27 11:58:11 +0200177 if oldRewrites != nil {
Akron4de47a92025-06-27 11:58:11 +0200178 if rewritesList, ok := oldRewrites.([]any); ok {
179 processedRewrites := make([]any, len(rewritesList))
180 for i, rewriteData := range rewritesList {
Akron4de47a92025-06-27 11:58:11 +0200181 rewriteBytes, err := json.Marshal(rewriteData)
182 if err != nil {
183 return nil, fmt.Errorf("failed to marshal old rewrite %d: %w", i, err)
184 }
185 var rewrite ast.Rewrite
186 if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
187 return nil, fmt.Errorf("failed to unmarshal old rewrite %d: %w", i, err)
188 }
Akron4de47a92025-06-27 11:58:11 +0200189 transformedBytes, err := json.Marshal(&rewrite)
190 if err != nil {
191 return nil, fmt.Errorf("failed to marshal transformed rewrite %d: %w", i, err)
192 }
193 var transformedRewrite any
194 if err := json.Unmarshal(transformedBytes, &transformedRewrite); err != nil {
195 return nil, fmt.Errorf("failed to unmarshal transformed rewrite %d: %w", i, err)
196 }
197 processedRewrites[i] = transformedRewrite
198 }
199 if resultMap, ok := resultData.(map[string]any); ok {
200 resultMap["rewrites"] = processedRewrites
201 }
202 } else {
Akron4de47a92025-06-27 11:58:11 +0200203 if resultMap, ok := resultData.(map[string]any); ok {
204 resultMap["rewrites"] = oldRewrites
205 }
206 }
207 }
208
Akron4de47a92025-06-27 11:58:11 +0200209 if hasQueryWrapper {
210 if wrapper, ok := jsonData.(map[string]any); ok {
211 wrapper["query"] = resultData
212 return wrapper, nil
213 }
214 }
215
216 return resultData, nil
217}
218
Akron8414ae52026-05-19 13:31:14 +0200219// recordRewrites compares the new node against the before-snapshot and
220// attaches rewrite entries to any changed nodes. It handles both simple
221// nodes (Term, TermGroup) and container nodes (CatchallNode with operands).
222func recordRewrites(newNode, beforeNode ast.Node) {
223 if ast.NodesEqual(newNode, beforeNode) {
224 return
225 }
226
227 // For CatchallNodes with operands (e.g. token sequences), attach
228 // per-operand rewrites so each changed token gets its own annotation.
229 if newCatchall, ok := newNode.(*ast.CatchallNode); ok {
230 if oldCatchall, ok := beforeNode.(*ast.CatchallNode); ok && len(newCatchall.Operands) > 0 {
231 for i, newOp := range newCatchall.Operands {
232 if i >= len(oldCatchall.Operands) {
233 break
234 }
235 oldOp := oldCatchall.Operands[i]
236 recordRewritesForOperand(newOp, oldOp)
237 }
238 return
239 }
240 }
241
242 addRewriteToNode(newNode, beforeNode)
243}
244
245// recordRewritesForOperand handles rewrite recording for a single operand,
246// unwrapping Token nodes so the rewrite attaches to the inner term/termGroup
247// rather than the token wrapper.
248func recordRewritesForOperand(newOp, oldOp ast.Node) {
249 if ast.NodesEqual(newOp, oldOp) {
250 return
251 }
252
253 newInner := newOp
254 oldInner := oldOp
255 if tok, ok := newOp.(*ast.Token); ok {
256 newInner = tok.Wrap
257 }
258 if tok, ok := oldOp.(*ast.Token); ok {
259 oldInner = tok.Wrap
260 }
261
262 if newInner == nil || ast.NodesEqual(newInner, oldInner) {
263 return
264 }
265
266 addRewriteToNode(newInner, oldInner)
267}
268
Akron958fc472026-05-19 13:58:52 +0200269// addRewriteToNode creates and attaches rewrite entries to a node,
Akron8414ae52026-05-19 13:31:14 +0200270// recording what the node looked like before the change.
271func addRewriteToNode(newNode, originalNode ast.Node) {
Akron958fc472026-05-19 13:58:52 +0200272 for _, rw := range buildRewrites(originalNode, newNode) {
273 ast.AppendRewrite(newNode, rw)
274 }
Akron8414ae52026-05-19 13:31:14 +0200275}
276
Akron958fc472026-05-19 13:58:52 +0200277// buildRewrites creates Rewrite entries describing what changed between
278// originalNode and newNode. For term-level changes it emits one scoped
279// rewrite per changed field so the transformation is fully reversible.
280// For structural changes it stores the full original as an object.
281func buildRewrites(originalNode, newNode ast.Node) []ast.Rewrite {
Akron2f93c582026-02-19 16:49:13 +0100282 if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(newNode) && originalNode.Type() == newNode.Type() {
283 newTerm := newNode.(*ast.Term)
Akron958fc472026-05-19 13:58:52 +0200284 var rewrites []ast.Rewrite
285
Akron2f93c582026-02-19 16:49:13 +0100286 if term.Foundry != newTerm.Foundry {
Akron958fc472026-05-19 13:58:52 +0200287 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "foundry"}
288 if term.Foundry != "" {
289 rw.Original = term.Foundry
290 }
291 rewrites = append(rewrites, rw)
Akron2f93c582026-02-19 16:49:13 +0100292 }
293 if term.Layer != newTerm.Layer {
Akron958fc472026-05-19 13:58:52 +0200294 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "layer"}
295 if term.Layer != "" {
296 rw.Original = term.Layer
297 }
298 rewrites = append(rewrites, rw)
Akron2f93c582026-02-19 16:49:13 +0100299 }
300 if term.Key != newTerm.Key {
Akron958fc472026-05-19 13:58:52 +0200301 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "key"}
302 if term.Key != "" {
303 rw.Original = term.Key
304 }
305 rewrites = append(rewrites, rw)
Akron2f93c582026-02-19 16:49:13 +0100306 }
307 if term.Value != newTerm.Value {
Akron958fc472026-05-19 13:58:52 +0200308 rw := ast.Rewrite{Editor: RewriteEditor, Scope: "value"}
309 if term.Value != "" {
310 rw.Original = term.Value
311 }
312 rewrites = append(rewrites, rw)
313 }
314
315 if len(rewrites) > 0 {
316 return rewrites
Akron2f93c582026-02-19 16:49:13 +0100317 }
318 }
319
Akron8414ae52026-05-19 13:31:14 +0200320 // Structural change: serialize the original as the rewrite value
Akron2f93c582026-02-19 16:49:13 +0100321 originalBytes, err := parser.SerializeToJSON(originalNode)
322 if err != nil {
Akron958fc472026-05-19 13:58:52 +0200323 return []ast.Rewrite{{Editor: RewriteEditor}}
Akron2f93c582026-02-19 16:49:13 +0100324 }
325 var originalJSON any
326 if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
Akron958fc472026-05-19 13:58:52 +0200327 return []ast.Rewrite{{Editor: RewriteEditor}}
Akron2f93c582026-02-19 16:49:13 +0100328 }
Akron958fc472026-05-19 13:58:52 +0200329 return []ast.Rewrite{{Editor: RewriteEditor, Original: originalJSON}}
Akron2f93c582026-02-19 16:49:13 +0100330}
331
Akron8414ae52026-05-19 13:31:14 +0200332// isValidQueryObject returns true if data is a JSON object with an @type field.
Akron4de47a92025-06-27 11:58:11 +0200333func isValidQueryObject(data any) bool {
Akron4de47a92025-06-27 11:58:11 +0200334 queryMap, ok := data.(map[string]any)
335 if !ok {
336 return false
337 }
Akron8414ae52026-05-19 13:31:14 +0200338 _, ok = queryMap["@type"]
339 return ok
Akron4de47a92025-06-27 11:58:11 +0200340}