blob: 0bbe39f68eb9921ac36b8f18ba1ffeae4e3fdba7 [file] [log] [blame]
Akron32d53de2025-05-22 13:45:32 +02001package mapper
2
3import (
4 "encoding/json"
5 "fmt"
6
Akronfa55bb22025-05-26 15:10:42 +02007 "github.com/KorAP/KoralPipe-TermMapper/ast"
8 "github.com/KorAP/KoralPipe-TermMapper/config"
9 "github.com/KorAP/KoralPipe-TermMapper/matcher"
10 "github.com/KorAP/KoralPipe-TermMapper/parser"
Akron32d53de2025-05-22 13:45:32 +020011)
12
13// Direction represents the mapping direction (A to B or B to A)
Akrona1a183f2025-05-26 17:47:33 +020014type Direction bool
Akron32d53de2025-05-22 13:45:32 +020015
16const (
Akrona1a183f2025-05-26 17:47:33 +020017 AtoB Direction = true
18 BtoA Direction = false
Akron32d53de2025-05-22 13:45:32 +020019)
20
Akrona1a183f2025-05-26 17:47:33 +020021// String converts the Direction to its string representation
22func (d Direction) String() string {
23 if d {
24 return "atob"
25 }
26 return "btoa"
27}
28
29// ParseDirection converts a string direction to Direction type
30func ParseDirection(dir string) (Direction, error) {
31 switch dir {
32 case "atob":
33 return AtoB, nil
34 case "btoa":
35 return BtoA, nil
36 default:
37 return false, fmt.Errorf("invalid direction: %s", dir)
38 }
39}
40
Akron32d53de2025-05-22 13:45:32 +020041// Mapper handles the application of mapping rules to JSON objects
42type Mapper struct {
43 mappingLists map[string]*config.MappingList
44 parsedRules map[string][]*parser.MappingResult
45}
46
Akrona00d4752025-05-26 17:34:36 +020047// NewMapper creates a new Mapper instance from a list of MappingLists
48func NewMapper(lists []config.MappingList) (*Mapper, error) {
Akron32d53de2025-05-22 13:45:32 +020049 m := &Mapper{
50 mappingLists: make(map[string]*config.MappingList),
51 parsedRules: make(map[string][]*parser.MappingResult),
52 }
53
Akrona00d4752025-05-26 17:34:36 +020054 // Store mapping lists by ID
55 for _, list := range lists {
56 if _, exists := m.mappingLists[list.ID]; exists {
57 return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
58 }
59
60 // Create a copy of the list to store
61 listCopy := list
62 m.mappingLists[list.ID] = &listCopy
63
64 // Parse the rules immediately
65 parsedRules, err := list.ParseMappings()
Akron32d53de2025-05-22 13:45:32 +020066 if err != nil {
Akrona00d4752025-05-26 17:34:36 +020067 return nil, fmt.Errorf("failed to parse mappings for list %s: %w", list.ID, err)
Akron32d53de2025-05-22 13:45:32 +020068 }
Akrona00d4752025-05-26 17:34:36 +020069 m.parsedRules[list.ID] = parsedRules
Akron32d53de2025-05-22 13:45:32 +020070 }
71
72 return m, nil
73}
74
75// MappingOptions contains the options for applying mappings
76type MappingOptions struct {
Akron0d9117c2025-05-27 15:20:21 +020077 FoundryA string
78 LayerA string
79 FoundryB string
80 LayerB string
81 Direction Direction
82 AddRewrites bool
Akron32d53de2025-05-22 13:45:32 +020083}
84
Akron7b4984e2025-05-26 19:12:20 +020085// ApplyQueryMappings applies the specified mapping rules to a JSON object
86func (m *Mapper) ApplyQueryMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
Akron32d53de2025-05-22 13:45:32 +020087 // Validate mapping ID
88 if _, exists := m.mappingLists[mappingID]; !exists {
89 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
90 }
91
Akron32d53de2025-05-22 13:45:32 +020092 // Get the parsed rules
93 rules := m.parsedRules[mappingID]
94
Akron7b4984e2025-05-26 19:12:20 +020095 // Check if we have a wrapper object with a "query" field
96 var queryData any
97 var hasQueryWrapper bool
98
99 if jsonMap, ok := jsonData.(map[string]any); ok {
100 if query, exists := jsonMap["query"]; exists {
101 queryData = query
102 hasQueryWrapper = true
103 }
104 }
105
106 // If no query wrapper was found, use the entire input
107 if !hasQueryWrapper {
108 // If the input itself is not a valid query object, return it as is
109 if !isValidQueryObject(jsonData) {
110 return jsonData, nil
111 }
112 queryData = jsonData
113 } else if queryData == nil || !isValidQueryObject(queryData) {
114 // If we have a query wrapper but the query is nil or not a valid object,
115 // return the original data
116 return jsonData, nil
117 }
118
Akroncc83eb52025-05-27 14:39:12 +0200119 // Store rewrites if they exist
120 var oldRewrites any
121 if queryMap, ok := queryData.(map[string]any); ok {
122 if rewrites, exists := queryMap["rewrites"]; exists {
123 oldRewrites = rewrites
124 delete(queryMap, "rewrites")
125 }
126 }
127
Akron32d53de2025-05-22 13:45:32 +0200128 // Convert input JSON to AST
Akron7b4984e2025-05-26 19:12:20 +0200129 jsonBytes, err := json.Marshal(queryData)
Akron32d53de2025-05-22 13:45:32 +0200130 if err != nil {
131 return nil, fmt.Errorf("failed to marshal input JSON: %w", err)
132 }
133
134 node, err := parser.ParseJSON(jsonBytes)
135 if err != nil {
136 return nil, fmt.Errorf("failed to parse JSON into AST: %w", err)
137 }
138
Akrond5850f82025-05-23 16:44:44 +0200139 // Store whether the input was a Token
140 isToken := false
141 var tokenWrap ast.Node
Akron32d53de2025-05-22 13:45:32 +0200142 if token, ok := node.(*ast.Token); ok {
Akrond5850f82025-05-23 16:44:44 +0200143 isToken = true
144 tokenWrap = token.Wrap
145 node = tokenWrap
Akron32d53de2025-05-22 13:45:32 +0200146 }
147
Akron0d9117c2025-05-27 15:20:21 +0200148 // Store original node for rewrite if needed
149 var originalNode ast.Node
150 if opts.AddRewrites {
Akron441bd122025-05-30 14:19:50 +0200151 originalNode = node.Clone()
Akron0d9117c2025-05-27 15:20:21 +0200152 }
153
Akron441bd122025-05-30 14:19:50 +0200154 // Pre-check foundry/layer overrides to optimize processing
155 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
156 if opts.Direction { // true means AtoB
157 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
158 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
159 } else {
160 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
161 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
162 }
163
164 // Create a pattern cache key for memoization
165 type patternCacheKey struct {
166 ruleIndex int
167 foundry string
168 layer string
169 isReplacement bool
170 }
171 patternCache := make(map[patternCacheKey]ast.Node)
172
Akron32d53de2025-05-22 13:45:32 +0200173 // Apply each rule to the AST
Akron441bd122025-05-30 14:19:50 +0200174 for i, rule := range rules {
Akron32d53de2025-05-22 13:45:32 +0200175 // Create pattern and replacement based on direction
176 var pattern, replacement ast.Node
Akrona1a183f2025-05-26 17:47:33 +0200177 if opts.Direction { // true means AtoB
Akron32d53de2025-05-22 13:45:32 +0200178 pattern = rule.Upper
179 replacement = rule.Lower
180 } else {
181 pattern = rule.Lower
182 replacement = rule.Upper
183 }
184
185 // Extract the inner nodes from the pattern and replacement tokens
186 if token, ok := pattern.(*ast.Token); ok {
187 pattern = token.Wrap
188 }
189 if token, ok := replacement.(*ast.Token); ok {
190 replacement = token.Wrap
191 }
192
Akron441bd122025-05-30 14:19:50 +0200193 // First, quickly check if the pattern could match without creating a full matcher
194 // This is a lightweight pre-check to avoid expensive operations
195 if !m.couldPatternMatch(node, pattern) {
196 continue
Akron32d53de2025-05-22 13:45:32 +0200197 }
198
Akron441bd122025-05-30 14:19:50 +0200199 // Get or create pattern with overrides
200 patternKey := patternCacheKey{ruleIndex: i, foundry: patternFoundry, layer: patternLayer, isReplacement: false}
201 processedPattern, exists := patternCache[patternKey]
202 if !exists {
203 // Clone pattern only when needed
204 processedPattern = pattern.Clone()
205 // Apply foundry and layer overrides only if they're non-empty
206 if patternFoundry != "" || patternLayer != "" {
207 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
208 }
209 patternCache[patternKey] = processedPattern
Akron8f1970f2025-05-30 12:52:03 +0200210 }
211
Akron441bd122025-05-30 14:19:50 +0200212 // Create a temporary matcher to check for actual matches
213 tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
214 if err != nil {
215 return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
Akron8f1970f2025-05-30 12:52:03 +0200216 }
217
Akron441bd122025-05-30 14:19:50 +0200218 // Only proceed if there's an actual match
219 if !tempMatcher.Match(node) {
220 continue
221 }
222
223 // Get or create replacement with overrides (lazy evaluation)
224 replacementKey := patternCacheKey{ruleIndex: i, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
225 processedReplacement, exists := patternCache[replacementKey]
226 if !exists {
227 // Clone replacement only when we have a match
228 processedReplacement = replacement.Clone()
229 // Apply foundry and layer overrides only if they're non-empty
230 if replacementFoundry != "" || replacementLayer != "" {
231 ast.ApplyFoundryAndLayerOverrides(processedReplacement, replacementFoundry, replacementLayer)
232 }
233 patternCache[replacementKey] = processedReplacement
234 }
235
236 // Create the actual matcher and apply replacement
237 actualMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: processedReplacement})
Akrond5850f82025-05-23 16:44:44 +0200238 if err != nil {
239 return nil, fmt.Errorf("failed to create matcher: %w", err)
240 }
Akron441bd122025-05-30 14:19:50 +0200241 node = actualMatcher.Replace(node)
Akron32d53de2025-05-22 13:45:32 +0200242 }
243
Akrond5850f82025-05-23 16:44:44 +0200244 // Wrap the result in a token if the input was a token
245 var result ast.Node
246 if isToken {
247 result = &ast.Token{Wrap: node}
248 } else {
249 result = node
250 }
Akron32d53de2025-05-22 13:45:32 +0200251
252 // Convert AST back to JSON
253 resultBytes, err := parser.SerializeToJSON(result)
254 if err != nil {
255 return nil, fmt.Errorf("failed to serialize AST to JSON: %w", err)
256 }
257
Akron6f455152025-05-27 09:03:00 +0200258 // Parse the JSON string back into
259 var resultData any
Akron32d53de2025-05-22 13:45:32 +0200260 if err := json.Unmarshal(resultBytes, &resultData); err != nil {
261 return nil, fmt.Errorf("failed to parse result JSON: %w", err)
262 }
263
Akron0d9117c2025-05-27 15:20:21 +0200264 // Add rewrites if enabled and node was changed
265 if opts.AddRewrites && !ast.NodesEqual(node, originalNode) {
266 // Create rewrite object
267 rewrite := map[string]any{
268 "@type": "koral:rewrite",
269 "editor": "termMapper",
270 }
271
Akron8a87d9a2025-05-27 15:30:48 +0200272 // Check if the node types are different (structural change)
273 if originalNode.Type() != node.Type() {
274 // Full node replacement
275 originalBytes, err := parser.SerializeToJSON(originalNode)
276 if err != nil {
277 return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
278 }
279 var originalJSON any
280 if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
281 return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
282 }
283 rewrite["original"] = originalJSON
284 } else if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(node) {
285 // Check which attributes changed
286 newTerm := node.(*ast.Term)
287 if term.Foundry != newTerm.Foundry {
288 rewrite["scope"] = "foundry"
289 rewrite["original"] = term.Foundry
290 } else if term.Layer != newTerm.Layer {
291 rewrite["scope"] = "layer"
292 rewrite["original"] = term.Layer
293 } else if term.Key != newTerm.Key {
294 rewrite["scope"] = "key"
295 rewrite["original"] = term.Key
296 } else if term.Value != newTerm.Value {
297 rewrite["scope"] = "value"
298 rewrite["original"] = term.Value
299 } else {
300 // No specific attribute changed, use full node replacement
301 originalBytes, err := parser.SerializeToJSON(originalNode)
302 if err != nil {
303 return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
Akron0d9117c2025-05-27 15:20:21 +0200304 }
Akron8a87d9a2025-05-27 15:30:48 +0200305 var originalJSON any
306 if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
307 return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
Akron0d9117c2025-05-27 15:20:21 +0200308 }
Akron8a87d9a2025-05-27 15:30:48 +0200309 rewrite["original"] = originalJSON
Akron0d9117c2025-05-27 15:20:21 +0200310 }
311 } else {
312 // Full node replacement
313 originalBytes, err := parser.SerializeToJSON(originalNode)
314 if err != nil {
315 return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
316 }
317 var originalJSON any
318 if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
319 return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
320 }
Akron8a87d9a2025-05-27 15:30:48 +0200321 rewrite["original"] = originalJSON
Akron0d9117c2025-05-27 15:20:21 +0200322 }
323
324 // Add rewrite to the node
325 if resultMap, ok := resultData.(map[string]any); ok {
326 if wrapMap, ok := resultMap["wrap"].(map[string]any); ok {
327 rewrites, exists := wrapMap["rewrites"]
328 if !exists {
329 rewrites = []any{}
330 }
331 if rewritesList, ok := rewrites.([]any); ok {
332 wrapMap["rewrites"] = append(rewritesList, rewrite)
333 } else {
334 wrapMap["rewrites"] = []any{rewrite}
335 }
336 }
337 }
338 }
339
Akroncc83eb52025-05-27 14:39:12 +0200340 // Restore rewrites if they existed
341 if oldRewrites != nil {
Akron8f1970f2025-05-30 12:52:03 +0200342 // Process old rewrites through AST to ensure backward compatibility
343 if rewritesList, ok := oldRewrites.([]any); ok {
344 processedRewrites := make([]any, len(rewritesList))
345 for i, rewriteData := range rewritesList {
346 // Marshal and unmarshal each rewrite to apply backward compatibility
347 rewriteBytes, err := json.Marshal(rewriteData)
348 if err != nil {
349 return nil, fmt.Errorf("failed to marshal old rewrite %d: %w", i, err)
350 }
351 var rewrite ast.Rewrite
352 if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
353 return nil, fmt.Errorf("failed to unmarshal old rewrite %d: %w", i, err)
354 }
355 // Marshal back to get the transformed version
356 transformedBytes, err := json.Marshal(&rewrite)
357 if err != nil {
358 return nil, fmt.Errorf("failed to marshal transformed rewrite %d: %w", i, err)
359 }
360 var transformedRewrite any
361 if err := json.Unmarshal(transformedBytes, &transformedRewrite); err != nil {
362 return nil, fmt.Errorf("failed to unmarshal transformed rewrite %d: %w", i, err)
363 }
364 processedRewrites[i] = transformedRewrite
365 }
366 if resultMap, ok := resultData.(map[string]any); ok {
367 resultMap["rewrites"] = processedRewrites
368 }
369 } else {
370 // If it's not a list, restore as-is
371 if resultMap, ok := resultData.(map[string]any); ok {
372 resultMap["rewrites"] = oldRewrites
373 }
Akroncc83eb52025-05-27 14:39:12 +0200374 }
375 }
376
Akron7b4984e2025-05-26 19:12:20 +0200377 // If we had a query wrapper, put the transformed data back in it
378 if hasQueryWrapper {
379 if wrapper, ok := jsonData.(map[string]any); ok {
380 wrapper["query"] = resultData
381 return wrapper, nil
382 }
383 }
384
Akron32d53de2025-05-22 13:45:32 +0200385 return resultData, nil
386}
387
Akron7b4984e2025-05-26 19:12:20 +0200388// isValidQueryObject checks if the query data is a valid object that can be processed
389func isValidQueryObject(data any) bool {
390 // Check if it's a map
391 queryMap, ok := data.(map[string]any)
392 if !ok {
393 return false
394 }
395
396 // Check if it has the required @type field
397 if _, ok := queryMap["@type"]; !ok {
398 return false
399 }
400
401 return true
402}
403
Akron441bd122025-05-30 14:19:50 +0200404// couldPatternMatch performs a lightweight check to see if a pattern could potentially match a node
405// This is an optimization to avoid expensive operations when there's clearly no match possible
406func (m *Mapper) couldPatternMatch(node, pattern ast.Node) bool {
407 if pattern == nil {
408 return true
409 }
Akron32d53de2025-05-22 13:45:32 +0200410 if node == nil {
Akron441bd122025-05-30 14:19:50 +0200411 return false
412 }
413
414 // Handle Token wrappers
415 if token, ok := pattern.(*ast.Token); ok {
416 pattern = token.Wrap
417 }
418 if token, ok := node.(*ast.Token); ok {
419 node = token.Wrap
420 }
421
422 // For simple terms, check basic compatibility
423 if patternTerm, ok := pattern.(*ast.Term); ok {
424 // Check if there's any term in the node structure that could match
425 return m.hasMatchingTerm(node, patternTerm)
426 }
427
428 // For TermGroups, we need to check all possible matches
429 if patternGroup, ok := pattern.(*ast.TermGroup); ok {
430 if patternGroup.Relation == ast.OrRelation {
431 // For OR relations, any operand could match
432 for _, op := range patternGroup.Operands {
433 if m.couldPatternMatch(node, op) {
434 return true
435 }
436 }
437 return false
438 } else {
439 // For AND relations, all operands must have potential matches
440 for _, op := range patternGroup.Operands {
441 if !m.couldPatternMatch(node, op) {
442 return false
443 }
444 }
445 return true
446 }
447 }
448
449 // For other cases, assume they could match (conservative approach)
450 return true
451}
452
453// hasMatchingTerm checks if there's any term in the node structure that could match the pattern term
454func (m *Mapper) hasMatchingTerm(node ast.Node, patternTerm *ast.Term) bool {
455 if node == nil {
456 return false
Akron32d53de2025-05-22 13:45:32 +0200457 }
458
459 switch n := node.(type) {
460 case *ast.Term:
Akron441bd122025-05-30 14:19:50 +0200461 // Check if this term could match the pattern
462 // We only check key as that's the most distinctive attribute
463 return n.Key == patternTerm.Key
Akron32d53de2025-05-22 13:45:32 +0200464 case *ast.TermGroup:
Akron441bd122025-05-30 14:19:50 +0200465 // Check all operands
Akron32d53de2025-05-22 13:45:32 +0200466 for _, op := range n.Operands {
Akron441bd122025-05-30 14:19:50 +0200467 if m.hasMatchingTerm(op, patternTerm) {
468 return true
469 }
Akron32d53de2025-05-22 13:45:32 +0200470 }
Akron441bd122025-05-30 14:19:50 +0200471 return false
Akron32d53de2025-05-22 13:45:32 +0200472 case *ast.Token:
Akron441bd122025-05-30 14:19:50 +0200473 return m.hasMatchingTerm(n.Wrap, patternTerm)
Akron32d53de2025-05-22 13:45:32 +0200474 case *ast.CatchallNode:
Akron441bd122025-05-30 14:19:50 +0200475 if n.Wrap != nil && m.hasMatchingTerm(n.Wrap, patternTerm) {
476 return true
Akron32d53de2025-05-22 13:45:32 +0200477 }
478 for _, op := range n.Operands {
Akron441bd122025-05-30 14:19:50 +0200479 if m.hasMatchingTerm(op, patternTerm) {
480 return true
481 }
Akron32d53de2025-05-22 13:45:32 +0200482 }
Akron441bd122025-05-30 14:19:50 +0200483 return false
484 default:
485 return false
Akron32d53de2025-05-22 13:45:32 +0200486 }
487}