blob: ced97f339a39ba8e9d2af11b5e16442df8ae8b89 [file] [log] [blame]
Akrona3675e92025-06-26 17:46:59 +02001package mapper
2
3import (
4 "fmt"
5 "strings"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/matcher"
9 "github.com/KorAP/Koral-Mapper/parser"
Akrona1337ef2025-07-01 12:28:03 +020010 "github.com/rs/zerolog/log"
Akrona3675e92025-06-26 17:46:59 +020011)
12
13// ApplyResponseMappings applies the specified mapping rules to a JSON object
14func (m *Mapper) ApplyResponseMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
15 // Validate mapping ID
16 if _, exists := m.mappingLists[mappingID]; !exists {
17 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
18 }
19
Akron2f93c582026-02-19 16:49:13 +010020 if m.mappingLists[mappingID].IsCorpus() {
21 return m.applyCorpusResponseMappings(mappingID, opts, jsonData)
22 }
23
Akrona3675e92025-06-26 17:46:59 +020024 // Get the parsed rules
Akron2f93c582026-02-19 16:49:13 +010025 rules := m.parsedQueryRules[mappingID]
Akrona3675e92025-06-26 17:46:59 +020026
27 // Check if we have a snippet to process
28 jsonMap, ok := jsonData.(map[string]any)
29 if !ok {
30 return jsonData, nil
31 }
32
33 snippetValue, exists := jsonMap["snippet"]
34 if !exists {
35 return jsonData, nil
36 }
37
38 snippet, ok := snippetValue.(string)
39 if !ok {
40 return jsonData, nil
41 }
42
43 // Process the snippet with each rule
44 processedSnippet := snippet
Akron497cfe82025-07-03 13:26:54 +020045 for ruleIndex, rule := range rules {
Akrona3675e92025-06-26 17:46:59 +020046 // Create pattern and replacement based on direction
47 var pattern, replacement ast.Node
48 if opts.Direction { // true means AtoB
49 pattern = rule.Upper
50 replacement = rule.Lower
51 } else {
52 pattern = rule.Lower
53 replacement = rule.Upper
54 }
55
56 // Extract the inner nodes from the pattern and replacement tokens
57 if token, ok := pattern.(*ast.Token); ok {
58 pattern = token.Wrap
59 }
60 if token, ok := replacement.(*ast.Token); ok {
61 replacement = token.Wrap
62 }
63
Akron497cfe82025-07-03 13:26:54 +020064 // Apply foundry and layer overrides with proper precedence
65 mappingList := m.mappingLists[mappingID]
66
67 // Determine foundry and layer values based on direction
Akrona3675e92025-06-26 17:46:59 +020068 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron497cfe82025-07-03 13:26:54 +020069 if opts.Direction { // AtoB
Akrona3675e92025-06-26 17:46:59 +020070 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
71 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
Akron497cfe82025-07-03 13:26:54 +020072 // Apply mapping list defaults if not specified
73 if replacementFoundry == "" {
74 replacementFoundry = mappingList.FoundryB
75 }
76 if replacementLayer == "" {
77 replacementLayer = mappingList.LayerB
78 }
79 } else { // BtoA
Akrona3675e92025-06-26 17:46:59 +020080 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
81 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
Akron497cfe82025-07-03 13:26:54 +020082 // Apply mapping list defaults if not specified
83 if replacementFoundry == "" {
Akrona3675e92025-06-26 17:46:59 +020084 replacementFoundry = mappingList.FoundryA
Akron4de47a92025-06-27 11:58:11 +020085 }
Akron497cfe82025-07-03 13:26:54 +020086 if replacementLayer == "" {
Akrona3675e92025-06-26 17:46:59 +020087 replacementLayer = mappingList.LayerA
88 }
89 }
90
Akron497cfe82025-07-03 13:26:54 +020091 // Clone pattern and apply foundry and layer overrides
Akrona3675e92025-06-26 17:46:59 +020092 processedPattern := pattern.Clone()
93 if patternFoundry != "" || patternLayer != "" {
94 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
95 }
96
Akrona3675e92025-06-26 17:46:59 +020097 // Create snippet matcher for this rule
98 snippetMatcher, err := matcher.NewSnippetMatcher(
99 ast.Pattern{Root: processedPattern},
100 ast.Replacement{Root: replacement},
101 )
102 if err != nil {
103 continue // Skip this rule if we can't create a matcher
104 }
105
106 // Find matching tokens in the snippet
107 matchingTokens, err := snippetMatcher.FindMatchingTokens(processedSnippet)
108 if err != nil {
109 continue // Skip this rule if parsing fails
110 }
111
112 if len(matchingTokens) == 0 {
113 continue // No matches, try next rule
114 }
115
Akron497cfe82025-07-03 13:26:54 +0200116 // Apply RestrictToObligatory with layer precedence logic
117 restrictedReplacement := m.applyReplacementWithLayerPrecedence(
118 replacement, replacementFoundry, replacementLayer,
119 mappingID, ruleIndex, bool(opts.Direction))
Akrona3675e92025-06-26 17:46:59 +0200120 if restrictedReplacement == nil {
121 continue // Nothing obligatory to add
122 }
123
124 // Generate annotation strings from the restricted replacement
125 annotationStrings, err := m.generateAnnotationStrings(restrictedReplacement)
126 if err != nil {
127 continue // Skip if we can't generate annotations
128 }
129
130 if len(annotationStrings) == 0 {
131 continue // Nothing to add
132 }
133
134 // Apply annotations to matching tokens in the snippet
135 processedSnippet, err = m.addAnnotationsToSnippet(processedSnippet, matchingTokens, annotationStrings)
136 if err != nil {
137 continue // Skip if we can't apply annotations
138 }
139 }
140
Akrona1337ef2025-07-01 12:28:03 +0200141 log.Debug().Str("snippet", processedSnippet).Msg("Processed snippet")
142
Akrona3675e92025-06-26 17:46:59 +0200143 // Create a copy of the input data and update the snippet
144 result := make(map[string]any)
145 for k, v := range jsonMap {
146 result[k] = v
147 }
148 result["snippet"] = processedSnippet
149
150 return result, nil
151}
152
153// generateAnnotationStrings converts a replacement AST node into annotation strings
154func (m *Mapper) generateAnnotationStrings(node ast.Node) ([]string, error) {
155 if node == nil {
156 return nil, nil
157 }
158
159 switch n := node.(type) {
160 case *ast.Term:
161 // Create annotation string in format "foundry/layer:key" or "foundry/layer:key:value"
162 annotation := n.Foundry + "/" + n.Layer + ":" + n.Key
163 if n.Value != "" {
164 annotation += ":" + n.Value
165 }
166 return []string{annotation}, nil
167
168 case *ast.TermGroup:
169 if n.Relation == ast.AndRelation {
170 // For AND groups, collect all annotations
171 var allAnnotations []string
172 for _, operand := range n.Operands {
173 annotations, err := m.generateAnnotationStrings(operand)
174 if err != nil {
175 return nil, err
176 }
177 allAnnotations = append(allAnnotations, annotations...)
178 }
179 return allAnnotations, nil
180 } else {
181 // For OR groups (should not happen with RestrictToObligatory, but handle gracefully)
182 return nil, nil
183 }
184
185 case *ast.Token:
186 // Handle wrapped tokens
187 if n.Wrap != nil {
188 return m.generateAnnotationStrings(n.Wrap)
189 }
190 return nil, nil
191
192 default:
193 return nil, nil
194 }
195}
196
197// addAnnotationsToSnippet adds new annotations to matching tokens in the snippet
198func (m *Mapper) addAnnotationsToSnippet(snippet string, matchingTokens []matcher.TokenSpan, annotationStrings []string) (string, error) {
199 if len(matchingTokens) == 0 || len(annotationStrings) == 0 {
200 return snippet, nil
201 }
202
203 result := snippet
204
205 // Process each matching token
206 for _, token := range matchingTokens {
207 // For nested span structure, we need to find the innermost text and wrap it
208 // Look for the actual token text within span tags
209 tokenText := token.Text
210
211 // Find all occurrences of the token text in the current snippet
212 // We need to be careful about which occurrence to replace
213 startPos := 0
214 for {
215 tokenStart := strings.Index(result[startPos:], tokenText)
216 if tokenStart == -1 {
217 break // No more occurrences
218 }
219 tokenStart += startPos
220 tokenEnd := tokenStart + len(tokenText)
221
222 // Check if this token text is within the expected context
223 // Look backwards and forwards to see if we're in the right span context
224 beforeContext := result[:tokenStart]
225 afterContext := result[tokenEnd:]
226
227 // Simple heuristic: if we're immediately preceded by a > and followed by a <
228 // then we're likely at the innermost text node
229 if strings.HasSuffix(beforeContext, ">") && (strings.HasPrefix(afterContext, "<") || len(afterContext) == 0 || afterContext[0] == ' ') {
230 // Build the replacement with nested spans for each annotation
231 replacement := tokenText
232 for i := len(annotationStrings) - 1; i >= 0; i-- {
233 replacement = fmt.Sprintf(`<span title="%s" class="notinindex">%s</span>`, annotationStrings[i], replacement)
234 }
235
236 // Replace this occurrence
237 result = result[:tokenStart] + replacement + result[tokenEnd:]
238 break // Only replace the first appropriate occurrence for this token
239 }
240
241 // Move past this occurrence
242 startPos = tokenEnd
243 }
244 }
245
246 return result, nil
247}
Akron497cfe82025-07-03 13:26:54 +0200248
249// applyReplacementWithLayerPrecedence applies RestrictToObligatory with proper layer precedence
250func (m *Mapper) applyReplacementWithLayerPrecedence(
251 replacement ast.Node, foundry, layerOverride string,
252 mappingID string, ruleIndex int, direction bool) ast.Node {
253
254 // First, apply RestrictToObligatory without layer override to preserve explicit layers
255 restricted := ast.RestrictToObligatory(replacement, foundry, "")
256 if restricted == nil {
257 return nil
258 }
259
260 // If no layer override is specified, we're done
261 if layerOverride == "" {
262 return restricted
263 }
264
265 // Apply layer override only to terms that didn't have explicit layers in the original rule
266 mappingList := m.mappingLists[mappingID]
267 if ruleIndex < len(mappingList.Mappings) {
268 originalRule := string(mappingList.Mappings[ruleIndex])
269 m.applySelectiveLayerOverrides(restricted, layerOverride, originalRule, direction)
270 }
271
272 return restricted
273}
274
275// applySelectiveLayerOverrides applies layer overrides only to terms without explicit layers
276func (m *Mapper) applySelectiveLayerOverrides(node ast.Node, layerOverride, originalRule string, direction bool) {
277 if node == nil {
278 return
279 }
280
281 // Parse the original rule without defaults to detect explicit layers
282 explicitTerms := m.getExplicitTerms(originalRule, direction)
283
284 // Apply overrides only to terms that weren't explicit in the original rule
285 termIndex := 0
286 m.applyLayerOverrideToImplicitTerms(node, layerOverride, explicitTerms, &termIndex)
287}
288
289// getExplicitTerms parses the original rule without defaults to identify terms with explicit layers
290func (m *Mapper) getExplicitTerms(originalRule string, direction bool) map[int]bool {
291 explicitTerms := make(map[int]bool)
292
293 // Parse without defaults to see what was explicitly specified
294 parser, err := parser.NewGrammarParser("", "")
295 if err != nil {
296 return explicitTerms
297 }
298
299 result, err := parser.ParseMapping(originalRule)
300 if err != nil {
301 return explicitTerms
302 }
303
304 // Get the replacement side based on direction
305 var replacement ast.Node
306 if direction { // AtoB
307 replacement = result.Lower.Wrap
308 } else { // BtoA
309 replacement = result.Upper.Wrap
310 }
311
312 // Extract terms and check which ones have explicit layers
313 termIndex := 0
314 m.markExplicitTerms(replacement, explicitTerms, &termIndex)
315 return explicitTerms
316}
317
318// markExplicitTerms recursively marks terms that have explicit layers
319func (m *Mapper) markExplicitTerms(node ast.Node, explicitTerms map[int]bool, termIndex *int) {
320 if node == nil {
321 return
322 }
323
324 switch n := node.(type) {
325 case *ast.Term:
326 // A term has an explicit layer if it was specified in the original rule
327 if n.Layer != "" {
328 explicitTerms[*termIndex] = true
329 }
330 *termIndex++
331
332 case *ast.TermGroup:
333 for _, operand := range n.Operands {
334 m.markExplicitTerms(operand, explicitTerms, termIndex)
335 }
336
337 case *ast.Token:
338 if n.Wrap != nil {
339 m.markExplicitTerms(n.Wrap, explicitTerms, termIndex)
340 }
341 }
342}
343
344// applyLayerOverrideToImplicitTerms applies layer override only to terms not marked as explicit
345func (m *Mapper) applyLayerOverrideToImplicitTerms(node ast.Node, layerOverride string, explicitTerms map[int]bool, termIndex *int) {
346 if node == nil {
347 return
348 }
349
350 switch n := node.(type) {
351 case *ast.Term:
352 // Apply override only if this term wasn't explicit in the original rule
353 if !explicitTerms[*termIndex] && n.Layer != "" {
354 n.Layer = layerOverride
355 }
356 *termIndex++
357
358 case *ast.TermGroup:
359 for _, operand := range n.Operands {
360 m.applyLayerOverrideToImplicitTerms(operand, layerOverride, explicitTerms, termIndex)
361 }
362
363 case *ast.Token:
364 if n.Wrap != nil {
365 m.applyLayerOverrideToImplicitTerms(n.Wrap, layerOverride, explicitTerms, termIndex)
366 }
367 }
368}