blob: 3ac6b0288942f656d4e58463eb9f047f5caa6946 [file] [log] [blame]
Akrona3675e92025-06-26 17:46:59 +02001package mapper
2
3import (
4 "fmt"
Akrona8b9fbc2026-03-05 16:43:05 +01005 "maps"
Akrona3675e92025-06-26 17:46:59 +02006 "strings"
7
Akron2ef703c2025-07-03 15:57:42 +02008 "github.com/KorAP/Koral-Mapper/ast"
9 "github.com/KorAP/Koral-Mapper/matcher"
10 "github.com/KorAP/Koral-Mapper/parser"
Akron9663af92026-02-20 13:45:08 +010011 "github.com/orisano/gosax"
Akrona1337ef2025-07-01 12:28:03 +020012 "github.com/rs/zerolog/log"
Akrona3675e92025-06-26 17:46:59 +020013)
14
15// ApplyResponseMappings applies the specified mapping rules to a JSON object
16func (m *Mapper) ApplyResponseMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
17 // Validate mapping ID
18 if _, exists := m.mappingLists[mappingID]; !exists {
19 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
20 }
21
Akron422cd252026-05-19 16:31:19 +020022 if err := m.validateEffectiveOptions(mappingID, opts); err != nil {
23 return nil, err
24 }
25
Akron2f93c582026-02-19 16:49:13 +010026 if m.mappingLists[mappingID].IsCorpus() {
27 return m.applyCorpusResponseMappings(mappingID, opts, jsonData)
28 }
29
Akrona3675e92025-06-26 17:46:59 +020030 // Get the parsed rules
Akron2f93c582026-02-19 16:49:13 +010031 rules := m.parsedQueryRules[mappingID]
Akrona3675e92025-06-26 17:46:59 +020032
33 // Check if we have a snippet to process
34 jsonMap, ok := jsonData.(map[string]any)
35 if !ok {
36 return jsonData, nil
37 }
38
39 snippetValue, exists := jsonMap["snippet"]
40 if !exists {
41 return jsonData, nil
42 }
43
44 snippet, ok := snippetValue.(string)
45 if !ok {
46 return jsonData, nil
47 }
48
49 // Process the snippet with each rule
50 processedSnippet := snippet
Akron497cfe82025-07-03 13:26:54 +020051 for ruleIndex, rule := range rules {
Akrona3675e92025-06-26 17:46:59 +020052 // Create pattern and replacement based on direction
53 var pattern, replacement ast.Node
54 if opts.Direction { // true means AtoB
55 pattern = rule.Upper
56 replacement = rule.Lower
57 } else {
58 pattern = rule.Lower
59 replacement = rule.Upper
60 }
61
62 // Extract the inner nodes from the pattern and replacement tokens
63 if token, ok := pattern.(*ast.Token); ok {
64 pattern = token.Wrap
65 }
66 if token, ok := replacement.(*ast.Token); ok {
67 replacement = token.Wrap
68 }
69
Akron497cfe82025-07-03 13:26:54 +020070 // Apply foundry and layer overrides with proper precedence
71 mappingList := m.mappingLists[mappingID]
72
73 // Determine foundry and layer values based on direction
Akrona3675e92025-06-26 17:46:59 +020074 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron497cfe82025-07-03 13:26:54 +020075 if opts.Direction { // AtoB
Akrona3675e92025-06-26 17:46:59 +020076 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
77 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
Akron497cfe82025-07-03 13:26:54 +020078 // Apply mapping list defaults if not specified
79 if replacementFoundry == "" {
80 replacementFoundry = mappingList.FoundryB
81 }
82 if replacementLayer == "" {
83 replacementLayer = mappingList.LayerB
84 }
85 } else { // BtoA
Akrona3675e92025-06-26 17:46:59 +020086 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
87 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
Akron497cfe82025-07-03 13:26:54 +020088 // Apply mapping list defaults if not specified
89 if replacementFoundry == "" {
Akrona3675e92025-06-26 17:46:59 +020090 replacementFoundry = mappingList.FoundryA
Akron4de47a92025-06-27 11:58:11 +020091 }
Akron497cfe82025-07-03 13:26:54 +020092 if replacementLayer == "" {
Akrona3675e92025-06-26 17:46:59 +020093 replacementLayer = mappingList.LayerA
94 }
95 }
96
Akron497cfe82025-07-03 13:26:54 +020097 // Clone pattern and apply foundry and layer overrides
Akrona3675e92025-06-26 17:46:59 +020098 processedPattern := pattern.Clone()
99 if patternFoundry != "" || patternLayer != "" {
100 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
101 }
102
Akrona3675e92025-06-26 17:46:59 +0200103 // Create snippet matcher for this rule
104 snippetMatcher, err := matcher.NewSnippetMatcher(
105 ast.Pattern{Root: processedPattern},
106 ast.Replacement{Root: replacement},
107 )
108 if err != nil {
109 continue // Skip this rule if we can't create a matcher
110 }
111
112 // Find matching tokens in the snippet
113 matchingTokens, err := snippetMatcher.FindMatchingTokens(processedSnippet)
114 if err != nil {
115 continue // Skip this rule if parsing fails
116 }
117
118 if len(matchingTokens) == 0 {
119 continue // No matches, try next rule
120 }
121
Akron497cfe82025-07-03 13:26:54 +0200122 // Apply RestrictToObligatory with layer precedence logic
123 restrictedReplacement := m.applyReplacementWithLayerPrecedence(
124 replacement, replacementFoundry, replacementLayer,
125 mappingID, ruleIndex, bool(opts.Direction))
Akrona3675e92025-06-26 17:46:59 +0200126 if restrictedReplacement == nil {
127 continue // Nothing obligatory to add
128 }
129
130 // Generate annotation strings from the restricted replacement
131 annotationStrings, err := m.generateAnnotationStrings(restrictedReplacement)
132 if err != nil {
133 continue // Skip if we can't generate annotations
134 }
135
136 if len(annotationStrings) == 0 {
137 continue // Nothing to add
138 }
139
140 // Apply annotations to matching tokens in the snippet
141 processedSnippet, err = m.addAnnotationsToSnippet(processedSnippet, matchingTokens, annotationStrings)
142 if err != nil {
143 continue // Skip if we can't apply annotations
144 }
145 }
146
Akrona1337ef2025-07-01 12:28:03 +0200147 log.Debug().Str("snippet", processedSnippet).Msg("Processed snippet")
148
Akrona3675e92025-06-26 17:46:59 +0200149 // Create a copy of the input data and update the snippet
150 result := make(map[string]any)
Akrona8b9fbc2026-03-05 16:43:05 +0100151 maps.Copy(result, jsonMap)
Akrona3675e92025-06-26 17:46:59 +0200152 result["snippet"] = processedSnippet
153
154 return result, nil
155}
156
157// generateAnnotationStrings converts a replacement AST node into annotation strings
158func (m *Mapper) generateAnnotationStrings(node ast.Node) ([]string, error) {
159 if node == nil {
160 return nil, nil
161 }
162
163 switch n := node.(type) {
164 case *ast.Term:
165 // Create annotation string in format "foundry/layer:key" or "foundry/layer:key:value"
166 annotation := n.Foundry + "/" + n.Layer + ":" + n.Key
167 if n.Value != "" {
168 annotation += ":" + n.Value
169 }
170 return []string{annotation}, nil
171
172 case *ast.TermGroup:
173 if n.Relation == ast.AndRelation {
174 // For AND groups, collect all annotations
175 var allAnnotations []string
176 for _, operand := range n.Operands {
177 annotations, err := m.generateAnnotationStrings(operand)
178 if err != nil {
179 return nil, err
180 }
181 allAnnotations = append(allAnnotations, annotations...)
182 }
183 return allAnnotations, nil
184 } else {
185 // For OR groups (should not happen with RestrictToObligatory, but handle gracefully)
186 return nil, nil
187 }
188
189 case *ast.Token:
190 // Handle wrapped tokens
191 if n.Wrap != nil {
192 return m.generateAnnotationStrings(n.Wrap)
193 }
194 return nil, nil
195
196 default:
197 return nil, nil
198 }
199}
200
201// addAnnotationsToSnippet adds new annotations to matching tokens in the snippet
Akron9663af92026-02-20 13:45:08 +0100202// using SAX-based parsing for structural identification of text nodes.
Akrona3675e92025-06-26 17:46:59 +0200203func (m *Mapper) addAnnotationsToSnippet(snippet string, matchingTokens []matcher.TokenSpan, annotationStrings []string) (string, error) {
204 if len(matchingTokens) == 0 || len(annotationStrings) == 0 {
205 return snippet, nil
206 }
207
Akron9663af92026-02-20 13:45:08 +0100208 tokenByStartPos := make(map[int]matcher.TokenSpan)
209 for _, tok := range matchingTokens {
210 tokenByStartPos[tok.StartPos] = tok
211 }
Akrona3675e92025-06-26 17:46:59 +0200212
Akron9663af92026-02-20 13:45:08 +0100213 reader := strings.NewReader(snippet)
214 r := gosax.NewReader(reader)
Akrona3675e92025-06-26 17:46:59 +0200215
Akron9663af92026-02-20 13:45:08 +0100216 var result strings.Builder
217 result.Grow(len(snippet) + len(matchingTokens)*100)
218
219 var textPos int
220
221 for {
222 e, err := r.Event()
223 if err != nil {
224 return "", fmt.Errorf("failed to parse snippet for annotation: %w", err)
225 }
226 if e.Type() == gosax.EventEOF {
227 break
228 }
229
230 switch e.Type() {
231 case gosax.EventStart:
232 result.Write(e.Bytes)
233
234 case gosax.EventEnd:
235 result.Write(e.Bytes)
236
237 case gosax.EventText:
238 charData, err := gosax.CharData(e.Bytes)
239 if err != nil {
240 result.Write(e.Bytes)
241 break
Akrona3675e92025-06-26 17:46:59 +0200242 }
Akrona3675e92025-06-26 17:46:59 +0200243
Akron9663af92026-02-20 13:45:08 +0100244 text := string(charData)
245 trimmed := strings.TrimSpace(text)
Akrona3675e92025-06-26 17:46:59 +0200246
Akron9663af92026-02-20 13:45:08 +0100247 if token, ok := tokenByStartPos[textPos]; ok && trimmed != "" && trimmed == token.Text {
Akrona8b9fbc2026-03-05 16:43:05 +0100248 before, after, _ := strings.Cut(text, trimmed)
249 leadingWS := before
250 trailingWS := after
Akron9663af92026-02-20 13:45:08 +0100251
252 result.WriteString(leadingWS)
253
254 annotated := escapeXMLText(trimmed)
Akrona3675e92025-06-26 17:46:59 +0200255 for i := len(annotationStrings) - 1; i >= 0; i-- {
Akron9663af92026-02-20 13:45:08 +0100256 annotated = fmt.Sprintf(`<span title="%s" class="notinindex">%s</span>`, annotationStrings[i], annotated)
Akrona3675e92025-06-26 17:46:59 +0200257 }
Akron9663af92026-02-20 13:45:08 +0100258 result.WriteString(annotated)
259 result.WriteString(trailingWS)
260 } else {
261 result.Write(e.Bytes)
Akrona3675e92025-06-26 17:46:59 +0200262 }
263
Akron9663af92026-02-20 13:45:08 +0100264 textPos += len(text)
265
266 default:
267 result.Write(e.Bytes)
Akrona3675e92025-06-26 17:46:59 +0200268 }
269 }
270
Akron9663af92026-02-20 13:45:08 +0100271 return result.String(), nil
272}
273
274func escapeXMLText(s string) string {
275 s = strings.ReplaceAll(s, "&", "&amp;")
276 s = strings.ReplaceAll(s, "<", "&lt;")
277 s = strings.ReplaceAll(s, ">", "&gt;")
278 return s
Akrona3675e92025-06-26 17:46:59 +0200279}
Akron497cfe82025-07-03 13:26:54 +0200280
281// applyReplacementWithLayerPrecedence applies RestrictToObligatory with proper layer precedence
282func (m *Mapper) applyReplacementWithLayerPrecedence(
283 replacement ast.Node, foundry, layerOverride string,
284 mappingID string, ruleIndex int, direction bool) ast.Node {
285
286 // First, apply RestrictToObligatory without layer override to preserve explicit layers
287 restricted := ast.RestrictToObligatory(replacement, foundry, "")
288 if restricted == nil {
289 return nil
290 }
291
292 // If no layer override is specified, we're done
293 if layerOverride == "" {
294 return restricted
295 }
296
297 // Apply layer override only to terms that didn't have explicit layers in the original rule
298 mappingList := m.mappingLists[mappingID]
299 if ruleIndex < len(mappingList.Mappings) {
300 originalRule := string(mappingList.Mappings[ruleIndex])
301 m.applySelectiveLayerOverrides(restricted, layerOverride, originalRule, direction)
302 }
303
304 return restricted
305}
306
307// applySelectiveLayerOverrides applies layer overrides only to terms without explicit layers
308func (m *Mapper) applySelectiveLayerOverrides(node ast.Node, layerOverride, originalRule string, direction bool) {
309 if node == nil {
310 return
311 }
312
313 // Parse the original rule without defaults to detect explicit layers
314 explicitTerms := m.getExplicitTerms(originalRule, direction)
315
316 // Apply overrides only to terms that weren't explicit in the original rule
317 termIndex := 0
318 m.applyLayerOverrideToImplicitTerms(node, layerOverride, explicitTerms, &termIndex)
319}
320
321// getExplicitTerms parses the original rule without defaults to identify terms with explicit layers
322func (m *Mapper) getExplicitTerms(originalRule string, direction bool) map[int]bool {
323 explicitTerms := make(map[int]bool)
324
325 // Parse without defaults to see what was explicitly specified
326 parser, err := parser.NewGrammarParser("", "")
327 if err != nil {
328 return explicitTerms
329 }
330
331 result, err := parser.ParseMapping(originalRule)
332 if err != nil {
333 return explicitTerms
334 }
335
336 // Get the replacement side based on direction
337 var replacement ast.Node
338 if direction { // AtoB
339 replacement = result.Lower.Wrap
340 } else { // BtoA
341 replacement = result.Upper.Wrap
342 }
343
344 // Extract terms and check which ones have explicit layers
345 termIndex := 0
346 m.markExplicitTerms(replacement, explicitTerms, &termIndex)
347 return explicitTerms
348}
349
350// markExplicitTerms recursively marks terms that have explicit layers
351func (m *Mapper) markExplicitTerms(node ast.Node, explicitTerms map[int]bool, termIndex *int) {
352 if node == nil {
353 return
354 }
355
356 switch n := node.(type) {
357 case *ast.Term:
358 // A term has an explicit layer if it was specified in the original rule
359 if n.Layer != "" {
360 explicitTerms[*termIndex] = true
361 }
362 *termIndex++
363
364 case *ast.TermGroup:
365 for _, operand := range n.Operands {
366 m.markExplicitTerms(operand, explicitTerms, termIndex)
367 }
368
369 case *ast.Token:
370 if n.Wrap != nil {
371 m.markExplicitTerms(n.Wrap, explicitTerms, termIndex)
372 }
373 }
374}
375
376// applyLayerOverrideToImplicitTerms applies layer override only to terms not marked as explicit
377func (m *Mapper) applyLayerOverrideToImplicitTerms(node ast.Node, layerOverride string, explicitTerms map[int]bool, termIndex *int) {
378 if node == nil {
379 return
380 }
381
382 switch n := node.(type) {
383 case *ast.Term:
384 // Apply override only if this term wasn't explicit in the original rule
385 if !explicitTerms[*termIndex] && n.Layer != "" {
386 n.Layer = layerOverride
387 }
388 *termIndex++
389
390 case *ast.TermGroup:
391 for _, operand := range n.Operands {
392 m.applyLayerOverrideToImplicitTerms(operand, layerOverride, explicitTerms, termIndex)
393 }
394
395 case *ast.Token:
396 if n.Wrap != nil {
397 m.applyLayerOverrideToImplicitTerms(n.Wrap, layerOverride, explicitTerms, termIndex)
398 }
399 }
400}