blob: e870a2991bbaf2d69b423577b12343ba6d8b04f7 [file] [log] [blame]
Akrona3675e92025-06-26 17:46:59 +02001package mapper
2
3import (
4 "fmt"
Akrona8b9fbc2026-03-05 16:43:05 +01005 "maps"
Akrona3675e92025-06-26 17:46:59 +02006 "strings"
7
Akron2ef703c2025-07-03 15:57:42 +02008 "github.com/KorAP/Koral-Mapper/ast"
9 "github.com/KorAP/Koral-Mapper/matcher"
10 "github.com/KorAP/Koral-Mapper/parser"
Akron9663af92026-02-20 13:45:08 +010011 "github.com/orisano/gosax"
Akrona1337ef2025-07-01 12:28:03 +020012 "github.com/rs/zerolog/log"
Akrona3675e92025-06-26 17:46:59 +020013)
14
15// ApplyResponseMappings applies the specified mapping rules to a JSON object
16func (m *Mapper) ApplyResponseMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
17 // Validate mapping ID
18 if _, exists := m.mappingLists[mappingID]; !exists {
19 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
20 }
21
Akron2f93c582026-02-19 16:49:13 +010022 if m.mappingLists[mappingID].IsCorpus() {
23 return m.applyCorpusResponseMappings(mappingID, opts, jsonData)
24 }
25
Akrona3675e92025-06-26 17:46:59 +020026 // Get the parsed rules
Akron2f93c582026-02-19 16:49:13 +010027 rules := m.parsedQueryRules[mappingID]
Akrona3675e92025-06-26 17:46:59 +020028
29 // Check if we have a snippet to process
30 jsonMap, ok := jsonData.(map[string]any)
31 if !ok {
32 return jsonData, nil
33 }
34
35 snippetValue, exists := jsonMap["snippet"]
36 if !exists {
37 return jsonData, nil
38 }
39
40 snippet, ok := snippetValue.(string)
41 if !ok {
42 return jsonData, nil
43 }
44
45 // Process the snippet with each rule
46 processedSnippet := snippet
Akron497cfe82025-07-03 13:26:54 +020047 for ruleIndex, rule := range rules {
Akrona3675e92025-06-26 17:46:59 +020048 // Create pattern and replacement based on direction
49 var pattern, replacement ast.Node
50 if opts.Direction { // true means AtoB
51 pattern = rule.Upper
52 replacement = rule.Lower
53 } else {
54 pattern = rule.Lower
55 replacement = rule.Upper
56 }
57
58 // Extract the inner nodes from the pattern and replacement tokens
59 if token, ok := pattern.(*ast.Token); ok {
60 pattern = token.Wrap
61 }
62 if token, ok := replacement.(*ast.Token); ok {
63 replacement = token.Wrap
64 }
65
Akron497cfe82025-07-03 13:26:54 +020066 // Apply foundry and layer overrides with proper precedence
67 mappingList := m.mappingLists[mappingID]
68
69 // Determine foundry and layer values based on direction
Akrona3675e92025-06-26 17:46:59 +020070 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron497cfe82025-07-03 13:26:54 +020071 if opts.Direction { // AtoB
Akrona3675e92025-06-26 17:46:59 +020072 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
73 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
Akron497cfe82025-07-03 13:26:54 +020074 // Apply mapping list defaults if not specified
75 if replacementFoundry == "" {
76 replacementFoundry = mappingList.FoundryB
77 }
78 if replacementLayer == "" {
79 replacementLayer = mappingList.LayerB
80 }
81 } else { // BtoA
Akrona3675e92025-06-26 17:46:59 +020082 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
83 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
Akron497cfe82025-07-03 13:26:54 +020084 // Apply mapping list defaults if not specified
85 if replacementFoundry == "" {
Akrona3675e92025-06-26 17:46:59 +020086 replacementFoundry = mappingList.FoundryA
Akron4de47a92025-06-27 11:58:11 +020087 }
Akron497cfe82025-07-03 13:26:54 +020088 if replacementLayer == "" {
Akrona3675e92025-06-26 17:46:59 +020089 replacementLayer = mappingList.LayerA
90 }
91 }
92
Akron497cfe82025-07-03 13:26:54 +020093 // Clone pattern and apply foundry and layer overrides
Akrona3675e92025-06-26 17:46:59 +020094 processedPattern := pattern.Clone()
95 if patternFoundry != "" || patternLayer != "" {
96 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
97 }
98
Akrona3675e92025-06-26 17:46:59 +020099 // Create snippet matcher for this rule
100 snippetMatcher, err := matcher.NewSnippetMatcher(
101 ast.Pattern{Root: processedPattern},
102 ast.Replacement{Root: replacement},
103 )
104 if err != nil {
105 continue // Skip this rule if we can't create a matcher
106 }
107
108 // Find matching tokens in the snippet
109 matchingTokens, err := snippetMatcher.FindMatchingTokens(processedSnippet)
110 if err != nil {
111 continue // Skip this rule if parsing fails
112 }
113
114 if len(matchingTokens) == 0 {
115 continue // No matches, try next rule
116 }
117
Akron497cfe82025-07-03 13:26:54 +0200118 // Apply RestrictToObligatory with layer precedence logic
119 restrictedReplacement := m.applyReplacementWithLayerPrecedence(
120 replacement, replacementFoundry, replacementLayer,
121 mappingID, ruleIndex, bool(opts.Direction))
Akrona3675e92025-06-26 17:46:59 +0200122 if restrictedReplacement == nil {
123 continue // Nothing obligatory to add
124 }
125
126 // Generate annotation strings from the restricted replacement
127 annotationStrings, err := m.generateAnnotationStrings(restrictedReplacement)
128 if err != nil {
129 continue // Skip if we can't generate annotations
130 }
131
132 if len(annotationStrings) == 0 {
133 continue // Nothing to add
134 }
135
136 // Apply annotations to matching tokens in the snippet
137 processedSnippet, err = m.addAnnotationsToSnippet(processedSnippet, matchingTokens, annotationStrings)
138 if err != nil {
139 continue // Skip if we can't apply annotations
140 }
141 }
142
Akrona1337ef2025-07-01 12:28:03 +0200143 log.Debug().Str("snippet", processedSnippet).Msg("Processed snippet")
144
Akrona3675e92025-06-26 17:46:59 +0200145 // Create a copy of the input data and update the snippet
146 result := make(map[string]any)
Akrona8b9fbc2026-03-05 16:43:05 +0100147 maps.Copy(result, jsonMap)
Akrona3675e92025-06-26 17:46:59 +0200148 result["snippet"] = processedSnippet
149
150 return result, nil
151}
152
153// generateAnnotationStrings converts a replacement AST node into annotation strings
154func (m *Mapper) generateAnnotationStrings(node ast.Node) ([]string, error) {
155 if node == nil {
156 return nil, nil
157 }
158
159 switch n := node.(type) {
160 case *ast.Term:
161 // Create annotation string in format "foundry/layer:key" or "foundry/layer:key:value"
162 annotation := n.Foundry + "/" + n.Layer + ":" + n.Key
163 if n.Value != "" {
164 annotation += ":" + n.Value
165 }
166 return []string{annotation}, nil
167
168 case *ast.TermGroup:
169 if n.Relation == ast.AndRelation {
170 // For AND groups, collect all annotations
171 var allAnnotations []string
172 for _, operand := range n.Operands {
173 annotations, err := m.generateAnnotationStrings(operand)
174 if err != nil {
175 return nil, err
176 }
177 allAnnotations = append(allAnnotations, annotations...)
178 }
179 return allAnnotations, nil
180 } else {
181 // For OR groups (should not happen with RestrictToObligatory, but handle gracefully)
182 return nil, nil
183 }
184
185 case *ast.Token:
186 // Handle wrapped tokens
187 if n.Wrap != nil {
188 return m.generateAnnotationStrings(n.Wrap)
189 }
190 return nil, nil
191
192 default:
193 return nil, nil
194 }
195}
196
197// addAnnotationsToSnippet adds new annotations to matching tokens in the snippet
Akron9663af92026-02-20 13:45:08 +0100198// using SAX-based parsing for structural identification of text nodes.
Akrona3675e92025-06-26 17:46:59 +0200199func (m *Mapper) addAnnotationsToSnippet(snippet string, matchingTokens []matcher.TokenSpan, annotationStrings []string) (string, error) {
200 if len(matchingTokens) == 0 || len(annotationStrings) == 0 {
201 return snippet, nil
202 }
203
Akron9663af92026-02-20 13:45:08 +0100204 tokenByStartPos := make(map[int]matcher.TokenSpan)
205 for _, tok := range matchingTokens {
206 tokenByStartPos[tok.StartPos] = tok
207 }
Akrona3675e92025-06-26 17:46:59 +0200208
Akron9663af92026-02-20 13:45:08 +0100209 reader := strings.NewReader(snippet)
210 r := gosax.NewReader(reader)
Akrona3675e92025-06-26 17:46:59 +0200211
Akron9663af92026-02-20 13:45:08 +0100212 var result strings.Builder
213 result.Grow(len(snippet) + len(matchingTokens)*100)
214
215 var textPos int
216
217 for {
218 e, err := r.Event()
219 if err != nil {
220 return "", fmt.Errorf("failed to parse snippet for annotation: %w", err)
221 }
222 if e.Type() == gosax.EventEOF {
223 break
224 }
225
226 switch e.Type() {
227 case gosax.EventStart:
228 result.Write(e.Bytes)
229
230 case gosax.EventEnd:
231 result.Write(e.Bytes)
232
233 case gosax.EventText:
234 charData, err := gosax.CharData(e.Bytes)
235 if err != nil {
236 result.Write(e.Bytes)
237 break
Akrona3675e92025-06-26 17:46:59 +0200238 }
Akrona3675e92025-06-26 17:46:59 +0200239
Akron9663af92026-02-20 13:45:08 +0100240 text := string(charData)
241 trimmed := strings.TrimSpace(text)
Akrona3675e92025-06-26 17:46:59 +0200242
Akron9663af92026-02-20 13:45:08 +0100243 if token, ok := tokenByStartPos[textPos]; ok && trimmed != "" && trimmed == token.Text {
Akrona8b9fbc2026-03-05 16:43:05 +0100244 before, after, _ := strings.Cut(text, trimmed)
245 leadingWS := before
246 trailingWS := after
Akron9663af92026-02-20 13:45:08 +0100247
248 result.WriteString(leadingWS)
249
250 annotated := escapeXMLText(trimmed)
Akrona3675e92025-06-26 17:46:59 +0200251 for i := len(annotationStrings) - 1; i >= 0; i-- {
Akron9663af92026-02-20 13:45:08 +0100252 annotated = fmt.Sprintf(`<span title="%s" class="notinindex">%s</span>`, annotationStrings[i], annotated)
Akrona3675e92025-06-26 17:46:59 +0200253 }
Akron9663af92026-02-20 13:45:08 +0100254 result.WriteString(annotated)
255 result.WriteString(trailingWS)
256 } else {
257 result.Write(e.Bytes)
Akrona3675e92025-06-26 17:46:59 +0200258 }
259
Akron9663af92026-02-20 13:45:08 +0100260 textPos += len(text)
261
262 default:
263 result.Write(e.Bytes)
Akrona3675e92025-06-26 17:46:59 +0200264 }
265 }
266
Akron9663af92026-02-20 13:45:08 +0100267 return result.String(), nil
268}
269
270func escapeXMLText(s string) string {
271 s = strings.ReplaceAll(s, "&", "&amp;")
272 s = strings.ReplaceAll(s, "<", "&lt;")
273 s = strings.ReplaceAll(s, ">", "&gt;")
274 return s
Akrona3675e92025-06-26 17:46:59 +0200275}
Akron497cfe82025-07-03 13:26:54 +0200276
277// applyReplacementWithLayerPrecedence applies RestrictToObligatory with proper layer precedence
278func (m *Mapper) applyReplacementWithLayerPrecedence(
279 replacement ast.Node, foundry, layerOverride string,
280 mappingID string, ruleIndex int, direction bool) ast.Node {
281
282 // First, apply RestrictToObligatory without layer override to preserve explicit layers
283 restricted := ast.RestrictToObligatory(replacement, foundry, "")
284 if restricted == nil {
285 return nil
286 }
287
288 // If no layer override is specified, we're done
289 if layerOverride == "" {
290 return restricted
291 }
292
293 // Apply layer override only to terms that didn't have explicit layers in the original rule
294 mappingList := m.mappingLists[mappingID]
295 if ruleIndex < len(mappingList.Mappings) {
296 originalRule := string(mappingList.Mappings[ruleIndex])
297 m.applySelectiveLayerOverrides(restricted, layerOverride, originalRule, direction)
298 }
299
300 return restricted
301}
302
303// applySelectiveLayerOverrides applies layer overrides only to terms without explicit layers
304func (m *Mapper) applySelectiveLayerOverrides(node ast.Node, layerOverride, originalRule string, direction bool) {
305 if node == nil {
306 return
307 }
308
309 // Parse the original rule without defaults to detect explicit layers
310 explicitTerms := m.getExplicitTerms(originalRule, direction)
311
312 // Apply overrides only to terms that weren't explicit in the original rule
313 termIndex := 0
314 m.applyLayerOverrideToImplicitTerms(node, layerOverride, explicitTerms, &termIndex)
315}
316
317// getExplicitTerms parses the original rule without defaults to identify terms with explicit layers
318func (m *Mapper) getExplicitTerms(originalRule string, direction bool) map[int]bool {
319 explicitTerms := make(map[int]bool)
320
321 // Parse without defaults to see what was explicitly specified
322 parser, err := parser.NewGrammarParser("", "")
323 if err != nil {
324 return explicitTerms
325 }
326
327 result, err := parser.ParseMapping(originalRule)
328 if err != nil {
329 return explicitTerms
330 }
331
332 // Get the replacement side based on direction
333 var replacement ast.Node
334 if direction { // AtoB
335 replacement = result.Lower.Wrap
336 } else { // BtoA
337 replacement = result.Upper.Wrap
338 }
339
340 // Extract terms and check which ones have explicit layers
341 termIndex := 0
342 m.markExplicitTerms(replacement, explicitTerms, &termIndex)
343 return explicitTerms
344}
345
346// markExplicitTerms recursively marks terms that have explicit layers
347func (m *Mapper) markExplicitTerms(node ast.Node, explicitTerms map[int]bool, termIndex *int) {
348 if node == nil {
349 return
350 }
351
352 switch n := node.(type) {
353 case *ast.Term:
354 // A term has an explicit layer if it was specified in the original rule
355 if n.Layer != "" {
356 explicitTerms[*termIndex] = true
357 }
358 *termIndex++
359
360 case *ast.TermGroup:
361 for _, operand := range n.Operands {
362 m.markExplicitTerms(operand, explicitTerms, termIndex)
363 }
364
365 case *ast.Token:
366 if n.Wrap != nil {
367 m.markExplicitTerms(n.Wrap, explicitTerms, termIndex)
368 }
369 }
370}
371
372// applyLayerOverrideToImplicitTerms applies layer override only to terms not marked as explicit
373func (m *Mapper) applyLayerOverrideToImplicitTerms(node ast.Node, layerOverride string, explicitTerms map[int]bool, termIndex *int) {
374 if node == nil {
375 return
376 }
377
378 switch n := node.(type) {
379 case *ast.Term:
380 // Apply override only if this term wasn't explicit in the original rule
381 if !explicitTerms[*termIndex] && n.Layer != "" {
382 n.Layer = layerOverride
383 }
384 *termIndex++
385
386 case *ast.TermGroup:
387 for _, operand := range n.Operands {
388 m.applyLayerOverrideToImplicitTerms(operand, layerOverride, explicitTerms, termIndex)
389 }
390
391 case *ast.Token:
392 if n.Wrap != nil {
393 m.applyLayerOverrideToImplicitTerms(n.Wrap, layerOverride, explicitTerms, termIndex)
394 }
395 }
396}