blob: 61ff94b303d02e253b2b69003b4ffda2fb9421f1 [file] [log] [blame]
Akrona3675e92025-06-26 17:46:59 +02001package mapper
2
3import (
4 "fmt"
Akronbeee5052026-05-20 09:39:45 +02005 "html"
Akrona8b9fbc2026-03-05 16:43:05 +01006 "maps"
Akrona3675e92025-06-26 17:46:59 +02007 "strings"
8
Akron2ef703c2025-07-03 15:57:42 +02009 "github.com/KorAP/Koral-Mapper/ast"
10 "github.com/KorAP/Koral-Mapper/matcher"
11 "github.com/KorAP/Koral-Mapper/parser"
Akron9663af92026-02-20 13:45:08 +010012 "github.com/orisano/gosax"
Akrona1337ef2025-07-01 12:28:03 +020013 "github.com/rs/zerolog/log"
Akrona3675e92025-06-26 17:46:59 +020014)
15
16// ApplyResponseMappings applies the specified mapping rules to a JSON object
17func (m *Mapper) ApplyResponseMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
18 // Validate mapping ID
19 if _, exists := m.mappingLists[mappingID]; !exists {
20 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
21 }
22
Akron422cd252026-05-19 16:31:19 +020023 if err := m.validateEffectiveOptions(mappingID, opts); err != nil {
24 return nil, err
25 }
26
Akron2f93c582026-02-19 16:49:13 +010027 if m.mappingLists[mappingID].IsCorpus() {
28 return m.applyCorpusResponseMappings(mappingID, opts, jsonData)
29 }
30
Akrona3675e92025-06-26 17:46:59 +020031 // Get the parsed rules
Akron2f93c582026-02-19 16:49:13 +010032 rules := m.parsedQueryRules[mappingID]
Akrona3675e92025-06-26 17:46:59 +020033
34 // Check if we have a snippet to process
35 jsonMap, ok := jsonData.(map[string]any)
36 if !ok {
37 return jsonData, nil
38 }
39
40 snippetValue, exists := jsonMap["snippet"]
41 if !exists {
42 return jsonData, nil
43 }
44
45 snippet, ok := snippetValue.(string)
46 if !ok {
47 return jsonData, nil
48 }
49
50 // Process the snippet with each rule
51 processedSnippet := snippet
Akron497cfe82025-07-03 13:26:54 +020052 for ruleIndex, rule := range rules {
Akrona3675e92025-06-26 17:46:59 +020053 // Create pattern and replacement based on direction
54 var pattern, replacement ast.Node
55 if opts.Direction { // true means AtoB
56 pattern = rule.Upper
57 replacement = rule.Lower
58 } else {
59 pattern = rule.Lower
60 replacement = rule.Upper
61 }
62
63 // Extract the inner nodes from the pattern and replacement tokens
64 if token, ok := pattern.(*ast.Token); ok {
65 pattern = token.Wrap
66 }
67 if token, ok := replacement.(*ast.Token); ok {
68 replacement = token.Wrap
69 }
70
Akron497cfe82025-07-03 13:26:54 +020071 // Apply foundry and layer overrides with proper precedence
72 mappingList := m.mappingLists[mappingID]
73
74 // Determine foundry and layer values based on direction
Akrona3675e92025-06-26 17:46:59 +020075 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron497cfe82025-07-03 13:26:54 +020076 if opts.Direction { // AtoB
Akrona3675e92025-06-26 17:46:59 +020077 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
78 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
Akron497cfe82025-07-03 13:26:54 +020079 // Apply mapping list defaults if not specified
80 if replacementFoundry == "" {
81 replacementFoundry = mappingList.FoundryB
82 }
83 if replacementLayer == "" {
84 replacementLayer = mappingList.LayerB
85 }
86 } else { // BtoA
Akrona3675e92025-06-26 17:46:59 +020087 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
88 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
Akron497cfe82025-07-03 13:26:54 +020089 // Apply mapping list defaults if not specified
90 if replacementFoundry == "" {
Akrona3675e92025-06-26 17:46:59 +020091 replacementFoundry = mappingList.FoundryA
Akron4de47a92025-06-27 11:58:11 +020092 }
Akron497cfe82025-07-03 13:26:54 +020093 if replacementLayer == "" {
Akrona3675e92025-06-26 17:46:59 +020094 replacementLayer = mappingList.LayerA
95 }
96 }
97
Akron497cfe82025-07-03 13:26:54 +020098 // Clone pattern and apply foundry and layer overrides
Akrona3675e92025-06-26 17:46:59 +020099 processedPattern := pattern.Clone()
100 if patternFoundry != "" || patternLayer != "" {
101 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
102 }
103
Akrona3675e92025-06-26 17:46:59 +0200104 // Create snippet matcher for this rule
105 snippetMatcher, err := matcher.NewSnippetMatcher(
106 ast.Pattern{Root: processedPattern},
107 ast.Replacement{Root: replacement},
108 )
109 if err != nil {
110 continue // Skip this rule if we can't create a matcher
111 }
112
113 // Find matching tokens in the snippet
114 matchingTokens, err := snippetMatcher.FindMatchingTokens(processedSnippet)
115 if err != nil {
116 continue // Skip this rule if parsing fails
117 }
118
119 if len(matchingTokens) == 0 {
120 continue // No matches, try next rule
121 }
122
Akron497cfe82025-07-03 13:26:54 +0200123 // Apply RestrictToObligatory with layer precedence logic
124 restrictedReplacement := m.applyReplacementWithLayerPrecedence(
125 replacement, replacementFoundry, replacementLayer,
126 mappingID, ruleIndex, bool(opts.Direction))
Akrona3675e92025-06-26 17:46:59 +0200127 if restrictedReplacement == nil {
128 continue // Nothing obligatory to add
129 }
130
131 // Generate annotation strings from the restricted replacement
132 annotationStrings, err := m.generateAnnotationStrings(restrictedReplacement)
133 if err != nil {
134 continue // Skip if we can't generate annotations
135 }
136
137 if len(annotationStrings) == 0 {
138 continue // Nothing to add
139 }
140
141 // Apply annotations to matching tokens in the snippet
142 processedSnippet, err = m.addAnnotationsToSnippet(processedSnippet, matchingTokens, annotationStrings)
143 if err != nil {
144 continue // Skip if we can't apply annotations
145 }
146 }
147
Akrona1337ef2025-07-01 12:28:03 +0200148 log.Debug().Str("snippet", processedSnippet).Msg("Processed snippet")
149
Akrona3675e92025-06-26 17:46:59 +0200150 // Create a copy of the input data and update the snippet
151 result := make(map[string]any)
Akrona8b9fbc2026-03-05 16:43:05 +0100152 maps.Copy(result, jsonMap)
Akrona3675e92025-06-26 17:46:59 +0200153 result["snippet"] = processedSnippet
154
155 return result, nil
156}
157
158// generateAnnotationStrings converts a replacement AST node into annotation strings
159func (m *Mapper) generateAnnotationStrings(node ast.Node) ([]string, error) {
160 if node == nil {
161 return nil, nil
162 }
163
164 switch n := node.(type) {
165 case *ast.Term:
166 // Create annotation string in format "foundry/layer:key" or "foundry/layer:key:value"
167 annotation := n.Foundry + "/" + n.Layer + ":" + n.Key
168 if n.Value != "" {
169 annotation += ":" + n.Value
170 }
171 return []string{annotation}, nil
172
173 case *ast.TermGroup:
174 if n.Relation == ast.AndRelation {
175 // For AND groups, collect all annotations
176 var allAnnotations []string
177 for _, operand := range n.Operands {
178 annotations, err := m.generateAnnotationStrings(operand)
179 if err != nil {
180 return nil, err
181 }
182 allAnnotations = append(allAnnotations, annotations...)
183 }
184 return allAnnotations, nil
185 } else {
186 // For OR groups (should not happen with RestrictToObligatory, but handle gracefully)
187 return nil, nil
188 }
189
190 case *ast.Token:
191 // Handle wrapped tokens
192 if n.Wrap != nil {
193 return m.generateAnnotationStrings(n.Wrap)
194 }
195 return nil, nil
196
197 default:
198 return nil, nil
199 }
200}
201
202// addAnnotationsToSnippet adds new annotations to matching tokens in the snippet
Akron9663af92026-02-20 13:45:08 +0100203// using SAX-based parsing for structural identification of text nodes.
Akrona3675e92025-06-26 17:46:59 +0200204func (m *Mapper) addAnnotationsToSnippet(snippet string, matchingTokens []matcher.TokenSpan, annotationStrings []string) (string, error) {
205 if len(matchingTokens) == 0 || len(annotationStrings) == 0 {
206 return snippet, nil
207 }
208
Akron9663af92026-02-20 13:45:08 +0100209 tokenByStartPos := make(map[int]matcher.TokenSpan)
210 for _, tok := range matchingTokens {
211 tokenByStartPos[tok.StartPos] = tok
212 }
Akrona3675e92025-06-26 17:46:59 +0200213
Akron9663af92026-02-20 13:45:08 +0100214 reader := strings.NewReader(snippet)
215 r := gosax.NewReader(reader)
Akrona3675e92025-06-26 17:46:59 +0200216
Akron9663af92026-02-20 13:45:08 +0100217 var result strings.Builder
218 result.Grow(len(snippet) + len(matchingTokens)*100)
219
220 var textPos int
221
222 for {
223 e, err := r.Event()
224 if err != nil {
225 return "", fmt.Errorf("failed to parse snippet for annotation: %w", err)
226 }
227 if e.Type() == gosax.EventEOF {
228 break
229 }
230
231 switch e.Type() {
232 case gosax.EventStart:
233 result.Write(e.Bytes)
234
235 case gosax.EventEnd:
236 result.Write(e.Bytes)
237
238 case gosax.EventText:
239 charData, err := gosax.CharData(e.Bytes)
240 if err != nil {
241 result.Write(e.Bytes)
242 break
Akrona3675e92025-06-26 17:46:59 +0200243 }
Akrona3675e92025-06-26 17:46:59 +0200244
Akron9663af92026-02-20 13:45:08 +0100245 text := string(charData)
246 trimmed := strings.TrimSpace(text)
Akrona3675e92025-06-26 17:46:59 +0200247
Akron9663af92026-02-20 13:45:08 +0100248 if token, ok := tokenByStartPos[textPos]; ok && trimmed != "" && trimmed == token.Text {
Akrona8b9fbc2026-03-05 16:43:05 +0100249 before, after, _ := strings.Cut(text, trimmed)
250 leadingWS := before
251 trailingWS := after
Akron9663af92026-02-20 13:45:08 +0100252
253 result.WriteString(leadingWS)
254
255 annotated := escapeXMLText(trimmed)
Akrona3675e92025-06-26 17:46:59 +0200256 for i := len(annotationStrings) - 1; i >= 0; i-- {
Akronbeee5052026-05-20 09:39:45 +0200257 annotated = fmt.Sprintf(`<span title="%s" class="notinindex">%s</span>`, html.EscapeString(annotationStrings[i]), annotated)
Akrona3675e92025-06-26 17:46:59 +0200258 }
Akron9663af92026-02-20 13:45:08 +0100259 result.WriteString(annotated)
260 result.WriteString(trailingWS)
261 } else {
262 result.Write(e.Bytes)
Akrona3675e92025-06-26 17:46:59 +0200263 }
264
Akron9663af92026-02-20 13:45:08 +0100265 textPos += len(text)
266
267 default:
268 result.Write(e.Bytes)
Akrona3675e92025-06-26 17:46:59 +0200269 }
270 }
271
Akron9663af92026-02-20 13:45:08 +0100272 return result.String(), nil
273}
274
275func escapeXMLText(s string) string {
276 s = strings.ReplaceAll(s, "&", "&amp;")
277 s = strings.ReplaceAll(s, "<", "&lt;")
278 s = strings.ReplaceAll(s, ">", "&gt;")
279 return s
Akrona3675e92025-06-26 17:46:59 +0200280}
Akron497cfe82025-07-03 13:26:54 +0200281
282// applyReplacementWithLayerPrecedence applies RestrictToObligatory with proper layer precedence
283func (m *Mapper) applyReplacementWithLayerPrecedence(
284 replacement ast.Node, foundry, layerOverride string,
285 mappingID string, ruleIndex int, direction bool) ast.Node {
286
287 // First, apply RestrictToObligatory without layer override to preserve explicit layers
288 restricted := ast.RestrictToObligatory(replacement, foundry, "")
289 if restricted == nil {
290 return nil
291 }
292
293 // If no layer override is specified, we're done
294 if layerOverride == "" {
295 return restricted
296 }
297
298 // Apply layer override only to terms that didn't have explicit layers in the original rule
299 mappingList := m.mappingLists[mappingID]
300 if ruleIndex < len(mappingList.Mappings) {
301 originalRule := string(mappingList.Mappings[ruleIndex])
302 m.applySelectiveLayerOverrides(restricted, layerOverride, originalRule, direction)
303 }
304
305 return restricted
306}
307
308// applySelectiveLayerOverrides applies layer overrides only to terms without explicit layers
309func (m *Mapper) applySelectiveLayerOverrides(node ast.Node, layerOverride, originalRule string, direction bool) {
310 if node == nil {
311 return
312 }
313
314 // Parse the original rule without defaults to detect explicit layers
315 explicitTerms := m.getExplicitTerms(originalRule, direction)
316
317 // Apply overrides only to terms that weren't explicit in the original rule
318 termIndex := 0
319 m.applyLayerOverrideToImplicitTerms(node, layerOverride, explicitTerms, &termIndex)
320}
321
322// getExplicitTerms parses the original rule without defaults to identify terms with explicit layers
323func (m *Mapper) getExplicitTerms(originalRule string, direction bool) map[int]bool {
324 explicitTerms := make(map[int]bool)
325
326 // Parse without defaults to see what was explicitly specified
327 parser, err := parser.NewGrammarParser("", "")
328 if err != nil {
329 return explicitTerms
330 }
331
332 result, err := parser.ParseMapping(originalRule)
333 if err != nil {
334 return explicitTerms
335 }
336
337 // Get the replacement side based on direction
338 var replacement ast.Node
339 if direction { // AtoB
340 replacement = result.Lower.Wrap
341 } else { // BtoA
342 replacement = result.Upper.Wrap
343 }
344
345 // Extract terms and check which ones have explicit layers
346 termIndex := 0
347 m.markExplicitTerms(replacement, explicitTerms, &termIndex)
348 return explicitTerms
349}
350
351// markExplicitTerms recursively marks terms that have explicit layers
352func (m *Mapper) markExplicitTerms(node ast.Node, explicitTerms map[int]bool, termIndex *int) {
353 if node == nil {
354 return
355 }
356
357 switch n := node.(type) {
358 case *ast.Term:
359 // A term has an explicit layer if it was specified in the original rule
360 if n.Layer != "" {
361 explicitTerms[*termIndex] = true
362 }
363 *termIndex++
364
365 case *ast.TermGroup:
366 for _, operand := range n.Operands {
367 m.markExplicitTerms(operand, explicitTerms, termIndex)
368 }
369
370 case *ast.Token:
371 if n.Wrap != nil {
372 m.markExplicitTerms(n.Wrap, explicitTerms, termIndex)
373 }
374 }
375}
376
377// applyLayerOverrideToImplicitTerms applies layer override only to terms not marked as explicit
378func (m *Mapper) applyLayerOverrideToImplicitTerms(node ast.Node, layerOverride string, explicitTerms map[int]bool, termIndex *int) {
379 if node == nil {
380 return
381 }
382
383 switch n := node.(type) {
384 case *ast.Term:
385 // Apply override only if this term wasn't explicit in the original rule
386 if !explicitTerms[*termIndex] && n.Layer != "" {
387 n.Layer = layerOverride
388 }
389 *termIndex++
390
391 case *ast.TermGroup:
392 for _, operand := range n.Operands {
393 m.applyLayerOverrideToImplicitTerms(operand, layerOverride, explicitTerms, termIndex)
394 }
395
396 case *ast.Token:
397 if n.Wrap != nil {
398 m.applyLayerOverrideToImplicitTerms(n.Wrap, layerOverride, explicitTerms, termIndex)
399 }
400 }
401}