blob: 14fd978e947a43ebf3020c4479931b5f949bac72 [file] [log] [blame]
Akrona3675e92025-06-26 17:46:59 +02001package mapper
2
3import (
4 "fmt"
5 "strings"
6
Akron2ef703c2025-07-03 15:57:42 +02007 "github.com/KorAP/Koral-Mapper/ast"
8 "github.com/KorAP/Koral-Mapper/matcher"
9 "github.com/KorAP/Koral-Mapper/parser"
Akron9663af92026-02-20 13:45:08 +010010 "github.com/orisano/gosax"
Akrona1337ef2025-07-01 12:28:03 +020011 "github.com/rs/zerolog/log"
Akrona3675e92025-06-26 17:46:59 +020012)
13
14// ApplyResponseMappings applies the specified mapping rules to a JSON object
15func (m *Mapper) ApplyResponseMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
16 // Validate mapping ID
17 if _, exists := m.mappingLists[mappingID]; !exists {
18 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
19 }
20
Akron2f93c582026-02-19 16:49:13 +010021 if m.mappingLists[mappingID].IsCorpus() {
22 return m.applyCorpusResponseMappings(mappingID, opts, jsonData)
23 }
24
Akrona3675e92025-06-26 17:46:59 +020025 // Get the parsed rules
Akron2f93c582026-02-19 16:49:13 +010026 rules := m.parsedQueryRules[mappingID]
Akrona3675e92025-06-26 17:46:59 +020027
28 // Check if we have a snippet to process
29 jsonMap, ok := jsonData.(map[string]any)
30 if !ok {
31 return jsonData, nil
32 }
33
34 snippetValue, exists := jsonMap["snippet"]
35 if !exists {
36 return jsonData, nil
37 }
38
39 snippet, ok := snippetValue.(string)
40 if !ok {
41 return jsonData, nil
42 }
43
44 // Process the snippet with each rule
45 processedSnippet := snippet
Akron497cfe82025-07-03 13:26:54 +020046 for ruleIndex, rule := range rules {
Akrona3675e92025-06-26 17:46:59 +020047 // Create pattern and replacement based on direction
48 var pattern, replacement ast.Node
49 if opts.Direction { // true means AtoB
50 pattern = rule.Upper
51 replacement = rule.Lower
52 } else {
53 pattern = rule.Lower
54 replacement = rule.Upper
55 }
56
57 // Extract the inner nodes from the pattern and replacement tokens
58 if token, ok := pattern.(*ast.Token); ok {
59 pattern = token.Wrap
60 }
61 if token, ok := replacement.(*ast.Token); ok {
62 replacement = token.Wrap
63 }
64
Akron497cfe82025-07-03 13:26:54 +020065 // Apply foundry and layer overrides with proper precedence
66 mappingList := m.mappingLists[mappingID]
67
68 // Determine foundry and layer values based on direction
Akrona3675e92025-06-26 17:46:59 +020069 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron497cfe82025-07-03 13:26:54 +020070 if opts.Direction { // AtoB
Akrona3675e92025-06-26 17:46:59 +020071 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
72 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
Akron497cfe82025-07-03 13:26:54 +020073 // Apply mapping list defaults if not specified
74 if replacementFoundry == "" {
75 replacementFoundry = mappingList.FoundryB
76 }
77 if replacementLayer == "" {
78 replacementLayer = mappingList.LayerB
79 }
80 } else { // BtoA
Akrona3675e92025-06-26 17:46:59 +020081 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
82 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
Akron497cfe82025-07-03 13:26:54 +020083 // Apply mapping list defaults if not specified
84 if replacementFoundry == "" {
Akrona3675e92025-06-26 17:46:59 +020085 replacementFoundry = mappingList.FoundryA
Akron4de47a92025-06-27 11:58:11 +020086 }
Akron497cfe82025-07-03 13:26:54 +020087 if replacementLayer == "" {
Akrona3675e92025-06-26 17:46:59 +020088 replacementLayer = mappingList.LayerA
89 }
90 }
91
Akron497cfe82025-07-03 13:26:54 +020092 // Clone pattern and apply foundry and layer overrides
Akrona3675e92025-06-26 17:46:59 +020093 processedPattern := pattern.Clone()
94 if patternFoundry != "" || patternLayer != "" {
95 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
96 }
97
Akrona3675e92025-06-26 17:46:59 +020098 // Create snippet matcher for this rule
99 snippetMatcher, err := matcher.NewSnippetMatcher(
100 ast.Pattern{Root: processedPattern},
101 ast.Replacement{Root: replacement},
102 )
103 if err != nil {
104 continue // Skip this rule if we can't create a matcher
105 }
106
107 // Find matching tokens in the snippet
108 matchingTokens, err := snippetMatcher.FindMatchingTokens(processedSnippet)
109 if err != nil {
110 continue // Skip this rule if parsing fails
111 }
112
113 if len(matchingTokens) == 0 {
114 continue // No matches, try next rule
115 }
116
Akron497cfe82025-07-03 13:26:54 +0200117 // Apply RestrictToObligatory with layer precedence logic
118 restrictedReplacement := m.applyReplacementWithLayerPrecedence(
119 replacement, replacementFoundry, replacementLayer,
120 mappingID, ruleIndex, bool(opts.Direction))
Akrona3675e92025-06-26 17:46:59 +0200121 if restrictedReplacement == nil {
122 continue // Nothing obligatory to add
123 }
124
125 // Generate annotation strings from the restricted replacement
126 annotationStrings, err := m.generateAnnotationStrings(restrictedReplacement)
127 if err != nil {
128 continue // Skip if we can't generate annotations
129 }
130
131 if len(annotationStrings) == 0 {
132 continue // Nothing to add
133 }
134
135 // Apply annotations to matching tokens in the snippet
136 processedSnippet, err = m.addAnnotationsToSnippet(processedSnippet, matchingTokens, annotationStrings)
137 if err != nil {
138 continue // Skip if we can't apply annotations
139 }
140 }
141
Akrona1337ef2025-07-01 12:28:03 +0200142 log.Debug().Str("snippet", processedSnippet).Msg("Processed snippet")
143
Akrona3675e92025-06-26 17:46:59 +0200144 // Create a copy of the input data and update the snippet
145 result := make(map[string]any)
146 for k, v := range jsonMap {
147 result[k] = v
148 }
149 result["snippet"] = processedSnippet
150
151 return result, nil
152}
153
154// generateAnnotationStrings converts a replacement AST node into annotation strings
155func (m *Mapper) generateAnnotationStrings(node ast.Node) ([]string, error) {
156 if node == nil {
157 return nil, nil
158 }
159
160 switch n := node.(type) {
161 case *ast.Term:
162 // Create annotation string in format "foundry/layer:key" or "foundry/layer:key:value"
163 annotation := n.Foundry + "/" + n.Layer + ":" + n.Key
164 if n.Value != "" {
165 annotation += ":" + n.Value
166 }
167 return []string{annotation}, nil
168
169 case *ast.TermGroup:
170 if n.Relation == ast.AndRelation {
171 // For AND groups, collect all annotations
172 var allAnnotations []string
173 for _, operand := range n.Operands {
174 annotations, err := m.generateAnnotationStrings(operand)
175 if err != nil {
176 return nil, err
177 }
178 allAnnotations = append(allAnnotations, annotations...)
179 }
180 return allAnnotations, nil
181 } else {
182 // For OR groups (should not happen with RestrictToObligatory, but handle gracefully)
183 return nil, nil
184 }
185
186 case *ast.Token:
187 // Handle wrapped tokens
188 if n.Wrap != nil {
189 return m.generateAnnotationStrings(n.Wrap)
190 }
191 return nil, nil
192
193 default:
194 return nil, nil
195 }
196}
197
198// addAnnotationsToSnippet adds new annotations to matching tokens in the snippet
Akron9663af92026-02-20 13:45:08 +0100199// using SAX-based parsing for structural identification of text nodes.
Akrona3675e92025-06-26 17:46:59 +0200200func (m *Mapper) addAnnotationsToSnippet(snippet string, matchingTokens []matcher.TokenSpan, annotationStrings []string) (string, error) {
201 if len(matchingTokens) == 0 || len(annotationStrings) == 0 {
202 return snippet, nil
203 }
204
Akron9663af92026-02-20 13:45:08 +0100205 tokenByStartPos := make(map[int]matcher.TokenSpan)
206 for _, tok := range matchingTokens {
207 tokenByStartPos[tok.StartPos] = tok
208 }
Akrona3675e92025-06-26 17:46:59 +0200209
Akron9663af92026-02-20 13:45:08 +0100210 reader := strings.NewReader(snippet)
211 r := gosax.NewReader(reader)
Akrona3675e92025-06-26 17:46:59 +0200212
Akron9663af92026-02-20 13:45:08 +0100213 var result strings.Builder
214 result.Grow(len(snippet) + len(matchingTokens)*100)
215
216 var textPos int
217
218 for {
219 e, err := r.Event()
220 if err != nil {
221 return "", fmt.Errorf("failed to parse snippet for annotation: %w", err)
222 }
223 if e.Type() == gosax.EventEOF {
224 break
225 }
226
227 switch e.Type() {
228 case gosax.EventStart:
229 result.Write(e.Bytes)
230
231 case gosax.EventEnd:
232 result.Write(e.Bytes)
233
234 case gosax.EventText:
235 charData, err := gosax.CharData(e.Bytes)
236 if err != nil {
237 result.Write(e.Bytes)
238 break
Akrona3675e92025-06-26 17:46:59 +0200239 }
Akrona3675e92025-06-26 17:46:59 +0200240
Akron9663af92026-02-20 13:45:08 +0100241 text := string(charData)
242 trimmed := strings.TrimSpace(text)
Akrona3675e92025-06-26 17:46:59 +0200243
Akron9663af92026-02-20 13:45:08 +0100244 if token, ok := tokenByStartPos[textPos]; ok && trimmed != "" && trimmed == token.Text {
245 trimStart := strings.Index(text, trimmed)
246 leadingWS := text[:trimStart]
247 trailingWS := text[trimStart+len(trimmed):]
248
249 result.WriteString(leadingWS)
250
251 annotated := escapeXMLText(trimmed)
Akrona3675e92025-06-26 17:46:59 +0200252 for i := len(annotationStrings) - 1; i >= 0; i-- {
Akron9663af92026-02-20 13:45:08 +0100253 annotated = fmt.Sprintf(`<span title="%s" class="notinindex">%s</span>`, annotationStrings[i], annotated)
Akrona3675e92025-06-26 17:46:59 +0200254 }
Akron9663af92026-02-20 13:45:08 +0100255 result.WriteString(annotated)
256 result.WriteString(trailingWS)
257 } else {
258 result.Write(e.Bytes)
Akrona3675e92025-06-26 17:46:59 +0200259 }
260
Akron9663af92026-02-20 13:45:08 +0100261 textPos += len(text)
262
263 default:
264 result.Write(e.Bytes)
Akrona3675e92025-06-26 17:46:59 +0200265 }
266 }
267
Akron9663af92026-02-20 13:45:08 +0100268 return result.String(), nil
269}
270
271func escapeXMLText(s string) string {
272 s = strings.ReplaceAll(s, "&", "&amp;")
273 s = strings.ReplaceAll(s, "<", "&lt;")
274 s = strings.ReplaceAll(s, ">", "&gt;")
275 return s
Akrona3675e92025-06-26 17:46:59 +0200276}
Akron497cfe82025-07-03 13:26:54 +0200277
278// applyReplacementWithLayerPrecedence applies RestrictToObligatory with proper layer precedence
279func (m *Mapper) applyReplacementWithLayerPrecedence(
280 replacement ast.Node, foundry, layerOverride string,
281 mappingID string, ruleIndex int, direction bool) ast.Node {
282
283 // First, apply RestrictToObligatory without layer override to preserve explicit layers
284 restricted := ast.RestrictToObligatory(replacement, foundry, "")
285 if restricted == nil {
286 return nil
287 }
288
289 // If no layer override is specified, we're done
290 if layerOverride == "" {
291 return restricted
292 }
293
294 // Apply layer override only to terms that didn't have explicit layers in the original rule
295 mappingList := m.mappingLists[mappingID]
296 if ruleIndex < len(mappingList.Mappings) {
297 originalRule := string(mappingList.Mappings[ruleIndex])
298 m.applySelectiveLayerOverrides(restricted, layerOverride, originalRule, direction)
299 }
300
301 return restricted
302}
303
304// applySelectiveLayerOverrides applies layer overrides only to terms without explicit layers
305func (m *Mapper) applySelectiveLayerOverrides(node ast.Node, layerOverride, originalRule string, direction bool) {
306 if node == nil {
307 return
308 }
309
310 // Parse the original rule without defaults to detect explicit layers
311 explicitTerms := m.getExplicitTerms(originalRule, direction)
312
313 // Apply overrides only to terms that weren't explicit in the original rule
314 termIndex := 0
315 m.applyLayerOverrideToImplicitTerms(node, layerOverride, explicitTerms, &termIndex)
316}
317
318// getExplicitTerms parses the original rule without defaults to identify terms with explicit layers
319func (m *Mapper) getExplicitTerms(originalRule string, direction bool) map[int]bool {
320 explicitTerms := make(map[int]bool)
321
322 // Parse without defaults to see what was explicitly specified
323 parser, err := parser.NewGrammarParser("", "")
324 if err != nil {
325 return explicitTerms
326 }
327
328 result, err := parser.ParseMapping(originalRule)
329 if err != nil {
330 return explicitTerms
331 }
332
333 // Get the replacement side based on direction
334 var replacement ast.Node
335 if direction { // AtoB
336 replacement = result.Lower.Wrap
337 } else { // BtoA
338 replacement = result.Upper.Wrap
339 }
340
341 // Extract terms and check which ones have explicit layers
342 termIndex := 0
343 m.markExplicitTerms(replacement, explicitTerms, &termIndex)
344 return explicitTerms
345}
346
347// markExplicitTerms recursively marks terms that have explicit layers
348func (m *Mapper) markExplicitTerms(node ast.Node, explicitTerms map[int]bool, termIndex *int) {
349 if node == nil {
350 return
351 }
352
353 switch n := node.(type) {
354 case *ast.Term:
355 // A term has an explicit layer if it was specified in the original rule
356 if n.Layer != "" {
357 explicitTerms[*termIndex] = true
358 }
359 *termIndex++
360
361 case *ast.TermGroup:
362 for _, operand := range n.Operands {
363 m.markExplicitTerms(operand, explicitTerms, termIndex)
364 }
365
366 case *ast.Token:
367 if n.Wrap != nil {
368 m.markExplicitTerms(n.Wrap, explicitTerms, termIndex)
369 }
370 }
371}
372
373// applyLayerOverrideToImplicitTerms applies layer override only to terms not marked as explicit
374func (m *Mapper) applyLayerOverrideToImplicitTerms(node ast.Node, layerOverride string, explicitTerms map[int]bool, termIndex *int) {
375 if node == nil {
376 return
377 }
378
379 switch n := node.(type) {
380 case *ast.Term:
381 // Apply override only if this term wasn't explicit in the original rule
382 if !explicitTerms[*termIndex] && n.Layer != "" {
383 n.Layer = layerOverride
384 }
385 *termIndex++
386
387 case *ast.TermGroup:
388 for _, operand := range n.Operands {
389 m.applyLayerOverrideToImplicitTerms(operand, layerOverride, explicitTerms, termIndex)
390 }
391
392 case *ast.Token:
393 if n.Wrap != nil {
394 m.applyLayerOverrideToImplicitTerms(n.Wrap, layerOverride, explicitTerms, termIndex)
395 }
396 }
397}