blob: e89d12abc894187d737297753c71e3670e609ac5 [file] [log] [blame]
Akrona3675e92025-06-26 17:46:59 +02001package mapper
2
3import (
4 "fmt"
5 "strings"
6
7 "github.com/KorAP/KoralPipe-TermMapper/ast"
8 "github.com/KorAP/KoralPipe-TermMapper/matcher"
Akron497cfe82025-07-03 13:26:54 +02009 "github.com/KorAP/KoralPipe-TermMapper/parser"
Akrona1337ef2025-07-01 12:28:03 +020010 "github.com/rs/zerolog/log"
Akrona3675e92025-06-26 17:46:59 +020011)
12
13// ApplyResponseMappings applies the specified mapping rules to a JSON object
14func (m *Mapper) ApplyResponseMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
15 // Validate mapping ID
16 if _, exists := m.mappingLists[mappingID]; !exists {
17 return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
18 }
19
20 // Get the parsed rules
21 rules := m.parsedRules[mappingID]
22
23 // Check if we have a snippet to process
24 jsonMap, ok := jsonData.(map[string]any)
25 if !ok {
26 return jsonData, nil
27 }
28
29 snippetValue, exists := jsonMap["snippet"]
30 if !exists {
31 return jsonData, nil
32 }
33
34 snippet, ok := snippetValue.(string)
35 if !ok {
36 return jsonData, nil
37 }
38
39 // Process the snippet with each rule
40 processedSnippet := snippet
Akron497cfe82025-07-03 13:26:54 +020041 for ruleIndex, rule := range rules {
Akrona3675e92025-06-26 17:46:59 +020042 // Create pattern and replacement based on direction
43 var pattern, replacement ast.Node
44 if opts.Direction { // true means AtoB
45 pattern = rule.Upper
46 replacement = rule.Lower
47 } else {
48 pattern = rule.Lower
49 replacement = rule.Upper
50 }
51
52 // Extract the inner nodes from the pattern and replacement tokens
53 if token, ok := pattern.(*ast.Token); ok {
54 pattern = token.Wrap
55 }
56 if token, ok := replacement.(*ast.Token); ok {
57 replacement = token.Wrap
58 }
59
Akron497cfe82025-07-03 13:26:54 +020060 // Apply foundry and layer overrides with proper precedence
61 mappingList := m.mappingLists[mappingID]
62
63 // Determine foundry and layer values based on direction
Akrona3675e92025-06-26 17:46:59 +020064 var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
Akron497cfe82025-07-03 13:26:54 +020065 if opts.Direction { // AtoB
Akrona3675e92025-06-26 17:46:59 +020066 patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
67 replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
Akron497cfe82025-07-03 13:26:54 +020068 // Apply mapping list defaults if not specified
69 if replacementFoundry == "" {
70 replacementFoundry = mappingList.FoundryB
71 }
72 if replacementLayer == "" {
73 replacementLayer = mappingList.LayerB
74 }
75 } else { // BtoA
Akrona3675e92025-06-26 17:46:59 +020076 patternFoundry, patternLayer = opts.FoundryB, opts.LayerB
77 replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
Akron497cfe82025-07-03 13:26:54 +020078 // Apply mapping list defaults if not specified
79 if replacementFoundry == "" {
Akrona3675e92025-06-26 17:46:59 +020080 replacementFoundry = mappingList.FoundryA
Akron4de47a92025-06-27 11:58:11 +020081 }
Akron497cfe82025-07-03 13:26:54 +020082 if replacementLayer == "" {
Akrona3675e92025-06-26 17:46:59 +020083 replacementLayer = mappingList.LayerA
84 }
85 }
86
Akron497cfe82025-07-03 13:26:54 +020087 // Clone pattern and apply foundry and layer overrides
Akrona3675e92025-06-26 17:46:59 +020088 processedPattern := pattern.Clone()
89 if patternFoundry != "" || patternLayer != "" {
90 ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
91 }
92
Akrona3675e92025-06-26 17:46:59 +020093 // Create snippet matcher for this rule
94 snippetMatcher, err := matcher.NewSnippetMatcher(
95 ast.Pattern{Root: processedPattern},
96 ast.Replacement{Root: replacement},
97 )
98 if err != nil {
99 continue // Skip this rule if we can't create a matcher
100 }
101
102 // Find matching tokens in the snippet
103 matchingTokens, err := snippetMatcher.FindMatchingTokens(processedSnippet)
104 if err != nil {
105 continue // Skip this rule if parsing fails
106 }
107
108 if len(matchingTokens) == 0 {
109 continue // No matches, try next rule
110 }
111
Akron497cfe82025-07-03 13:26:54 +0200112 // Apply RestrictToObligatory with layer precedence logic
113 restrictedReplacement := m.applyReplacementWithLayerPrecedence(
114 replacement, replacementFoundry, replacementLayer,
115 mappingID, ruleIndex, bool(opts.Direction))
Akrona3675e92025-06-26 17:46:59 +0200116 if restrictedReplacement == nil {
117 continue // Nothing obligatory to add
118 }
119
120 // Generate annotation strings from the restricted replacement
121 annotationStrings, err := m.generateAnnotationStrings(restrictedReplacement)
122 if err != nil {
123 continue // Skip if we can't generate annotations
124 }
125
126 if len(annotationStrings) == 0 {
127 continue // Nothing to add
128 }
129
130 // Apply annotations to matching tokens in the snippet
131 processedSnippet, err = m.addAnnotationsToSnippet(processedSnippet, matchingTokens, annotationStrings)
132 if err != nil {
133 continue // Skip if we can't apply annotations
134 }
135 }
136
Akrona1337ef2025-07-01 12:28:03 +0200137 log.Debug().Str("snippet", processedSnippet).Msg("Processed snippet")
138
Akrona3675e92025-06-26 17:46:59 +0200139 // Create a copy of the input data and update the snippet
140 result := make(map[string]any)
141 for k, v := range jsonMap {
142 result[k] = v
143 }
144 result["snippet"] = processedSnippet
145
146 return result, nil
147}
148
149// generateAnnotationStrings converts a replacement AST node into annotation strings
150func (m *Mapper) generateAnnotationStrings(node ast.Node) ([]string, error) {
151 if node == nil {
152 return nil, nil
153 }
154
155 switch n := node.(type) {
156 case *ast.Term:
157 // Create annotation string in format "foundry/layer:key" or "foundry/layer:key:value"
158 annotation := n.Foundry + "/" + n.Layer + ":" + n.Key
159 if n.Value != "" {
160 annotation += ":" + n.Value
161 }
162 return []string{annotation}, nil
163
164 case *ast.TermGroup:
165 if n.Relation == ast.AndRelation {
166 // For AND groups, collect all annotations
167 var allAnnotations []string
168 for _, operand := range n.Operands {
169 annotations, err := m.generateAnnotationStrings(operand)
170 if err != nil {
171 return nil, err
172 }
173 allAnnotations = append(allAnnotations, annotations...)
174 }
175 return allAnnotations, nil
176 } else {
177 // For OR groups (should not happen with RestrictToObligatory, but handle gracefully)
178 return nil, nil
179 }
180
181 case *ast.Token:
182 // Handle wrapped tokens
183 if n.Wrap != nil {
184 return m.generateAnnotationStrings(n.Wrap)
185 }
186 return nil, nil
187
188 default:
189 return nil, nil
190 }
191}
192
193// addAnnotationsToSnippet adds new annotations to matching tokens in the snippet
194func (m *Mapper) addAnnotationsToSnippet(snippet string, matchingTokens []matcher.TokenSpan, annotationStrings []string) (string, error) {
195 if len(matchingTokens) == 0 || len(annotationStrings) == 0 {
196 return snippet, nil
197 }
198
199 result := snippet
200
201 // Process each matching token
202 for _, token := range matchingTokens {
203 // For nested span structure, we need to find the innermost text and wrap it
204 // Look for the actual token text within span tags
205 tokenText := token.Text
206
207 // Find all occurrences of the token text in the current snippet
208 // We need to be careful about which occurrence to replace
209 startPos := 0
210 for {
211 tokenStart := strings.Index(result[startPos:], tokenText)
212 if tokenStart == -1 {
213 break // No more occurrences
214 }
215 tokenStart += startPos
216 tokenEnd := tokenStart + len(tokenText)
217
218 // Check if this token text is within the expected context
219 // Look backwards and forwards to see if we're in the right span context
220 beforeContext := result[:tokenStart]
221 afterContext := result[tokenEnd:]
222
223 // Simple heuristic: if we're immediately preceded by a > and followed by a <
224 // then we're likely at the innermost text node
225 if strings.HasSuffix(beforeContext, ">") && (strings.HasPrefix(afterContext, "<") || len(afterContext) == 0 || afterContext[0] == ' ') {
226 // Build the replacement with nested spans for each annotation
227 replacement := tokenText
228 for i := len(annotationStrings) - 1; i >= 0; i-- {
229 replacement = fmt.Sprintf(`<span title="%s" class="notinindex">%s</span>`, annotationStrings[i], replacement)
230 }
231
232 // Replace this occurrence
233 result = result[:tokenStart] + replacement + result[tokenEnd:]
234 break // Only replace the first appropriate occurrence for this token
235 }
236
237 // Move past this occurrence
238 startPos = tokenEnd
239 }
240 }
241
242 return result, nil
243}
Akron497cfe82025-07-03 13:26:54 +0200244
245// applyReplacementWithLayerPrecedence applies RestrictToObligatory with proper layer precedence
246func (m *Mapper) applyReplacementWithLayerPrecedence(
247 replacement ast.Node, foundry, layerOverride string,
248 mappingID string, ruleIndex int, direction bool) ast.Node {
249
250 // First, apply RestrictToObligatory without layer override to preserve explicit layers
251 restricted := ast.RestrictToObligatory(replacement, foundry, "")
252 if restricted == nil {
253 return nil
254 }
255
256 // If no layer override is specified, we're done
257 if layerOverride == "" {
258 return restricted
259 }
260
261 // Apply layer override only to terms that didn't have explicit layers in the original rule
262 mappingList := m.mappingLists[mappingID]
263 if ruleIndex < len(mappingList.Mappings) {
264 originalRule := string(mappingList.Mappings[ruleIndex])
265 m.applySelectiveLayerOverrides(restricted, layerOverride, originalRule, direction)
266 }
267
268 return restricted
269}
270
271// applySelectiveLayerOverrides applies layer overrides only to terms without explicit layers
272func (m *Mapper) applySelectiveLayerOverrides(node ast.Node, layerOverride, originalRule string, direction bool) {
273 if node == nil {
274 return
275 }
276
277 // Parse the original rule without defaults to detect explicit layers
278 explicitTerms := m.getExplicitTerms(originalRule, direction)
279
280 // Apply overrides only to terms that weren't explicit in the original rule
281 termIndex := 0
282 m.applyLayerOverrideToImplicitTerms(node, layerOverride, explicitTerms, &termIndex)
283}
284
285// getExplicitTerms parses the original rule without defaults to identify terms with explicit layers
286func (m *Mapper) getExplicitTerms(originalRule string, direction bool) map[int]bool {
287 explicitTerms := make(map[int]bool)
288
289 // Parse without defaults to see what was explicitly specified
290 parser, err := parser.NewGrammarParser("", "")
291 if err != nil {
292 return explicitTerms
293 }
294
295 result, err := parser.ParseMapping(originalRule)
296 if err != nil {
297 return explicitTerms
298 }
299
300 // Get the replacement side based on direction
301 var replacement ast.Node
302 if direction { // AtoB
303 replacement = result.Lower.Wrap
304 } else { // BtoA
305 replacement = result.Upper.Wrap
306 }
307
308 // Extract terms and check which ones have explicit layers
309 termIndex := 0
310 m.markExplicitTerms(replacement, explicitTerms, &termIndex)
311 return explicitTerms
312}
313
314// markExplicitTerms recursively marks terms that have explicit layers
315func (m *Mapper) markExplicitTerms(node ast.Node, explicitTerms map[int]bool, termIndex *int) {
316 if node == nil {
317 return
318 }
319
320 switch n := node.(type) {
321 case *ast.Term:
322 // A term has an explicit layer if it was specified in the original rule
323 if n.Layer != "" {
324 explicitTerms[*termIndex] = true
325 }
326 *termIndex++
327
328 case *ast.TermGroup:
329 for _, operand := range n.Operands {
330 m.markExplicitTerms(operand, explicitTerms, termIndex)
331 }
332
333 case *ast.Token:
334 if n.Wrap != nil {
335 m.markExplicitTerms(n.Wrap, explicitTerms, termIndex)
336 }
337 }
338}
339
340// applyLayerOverrideToImplicitTerms applies layer override only to terms not marked as explicit
341func (m *Mapper) applyLayerOverrideToImplicitTerms(node ast.Node, layerOverride string, explicitTerms map[int]bool, termIndex *int) {
342 if node == nil {
343 return
344 }
345
346 switch n := node.(type) {
347 case *ast.Term:
348 // Apply override only if this term wasn't explicit in the original rule
349 if !explicitTerms[*termIndex] && n.Layer != "" {
350 n.Layer = layerOverride
351 }
352 *termIndex++
353
354 case *ast.TermGroup:
355 for _, operand := range n.Operands {
356 m.applyLayerOverrideToImplicitTerms(operand, layerOverride, explicitTerms, termIndex)
357 }
358
359 case *ast.Token:
360 if n.Wrap != nil {
361 m.applyLayerOverrideToImplicitTerms(n.Wrap, layerOverride, explicitTerms, termIndex)
362 }
363 }
364}