Akron | a3675e9 | 2025-06-26 17:46:59 +0200 | [diff] [blame] | 1 | package mapper |
| 2 | |
| 3 | import ( |
| 4 | "fmt" |
| 5 | "strings" |
| 6 | |
| 7 | "github.com/KorAP/KoralPipe-TermMapper/ast" |
| 8 | "github.com/KorAP/KoralPipe-TermMapper/matcher" |
| 9 | ) |
| 10 | |
| 11 | // ApplyResponseMappings applies the specified mapping rules to a JSON object |
| 12 | func (m *Mapper) ApplyResponseMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) { |
| 13 | // Validate mapping ID |
| 14 | if _, exists := m.mappingLists[mappingID]; !exists { |
| 15 | return nil, fmt.Errorf("mapping list with ID %s not found", mappingID) |
| 16 | } |
| 17 | |
| 18 | // Get the parsed rules |
| 19 | rules := m.parsedRules[mappingID] |
| 20 | |
| 21 | // Check if we have a snippet to process |
| 22 | jsonMap, ok := jsonData.(map[string]any) |
| 23 | if !ok { |
| 24 | return jsonData, nil |
| 25 | } |
| 26 | |
| 27 | snippetValue, exists := jsonMap["snippet"] |
| 28 | if !exists { |
| 29 | return jsonData, nil |
| 30 | } |
| 31 | |
| 32 | snippet, ok := snippetValue.(string) |
| 33 | if !ok { |
| 34 | return jsonData, nil |
| 35 | } |
| 36 | |
| 37 | // Process the snippet with each rule |
| 38 | processedSnippet := snippet |
| 39 | for _, rule := range rules { |
| 40 | // Create pattern and replacement based on direction |
| 41 | var pattern, replacement ast.Node |
| 42 | if opts.Direction { // true means AtoB |
| 43 | pattern = rule.Upper |
| 44 | replacement = rule.Lower |
| 45 | } else { |
| 46 | pattern = rule.Lower |
| 47 | replacement = rule.Upper |
| 48 | } |
| 49 | |
| 50 | // Extract the inner nodes from the pattern and replacement tokens |
| 51 | if token, ok := pattern.(*ast.Token); ok { |
| 52 | pattern = token.Wrap |
| 53 | } |
| 54 | if token, ok := replacement.(*ast.Token); ok { |
| 55 | replacement = token.Wrap |
| 56 | } |
| 57 | |
| 58 | // Apply foundry and layer overrides to pattern and replacement |
| 59 | var patternFoundry, patternLayer, replacementFoundry, replacementLayer string |
| 60 | if opts.Direction { // true means AtoB |
| 61 | patternFoundry, patternLayer = opts.FoundryA, opts.LayerA |
| 62 | replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB |
| 63 | } else { |
| 64 | patternFoundry, patternLayer = opts.FoundryB, opts.LayerB |
| 65 | replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA |
| 66 | } |
| 67 | |
| 68 | // If foundry/layer are empty in options, get them from the mapping list |
Akron | 4de47a9 | 2025-06-27 11:58:11 +0200 | [diff] [blame] | 69 | mappingList := m.mappingLists[mappingID] |
| 70 | if replacementFoundry == "" { |
Akron | a3675e9 | 2025-06-26 17:46:59 +0200 | [diff] [blame] | 71 | if opts.Direction { // AtoB |
| 72 | replacementFoundry = mappingList.FoundryB |
Akron | a3675e9 | 2025-06-26 17:46:59 +0200 | [diff] [blame] | 73 | } else { |
| 74 | replacementFoundry = mappingList.FoundryA |
Akron | 4de47a9 | 2025-06-27 11:58:11 +0200 | [diff] [blame] | 75 | } |
| 76 | } |
| 77 | if replacementLayer == "" { |
| 78 | if opts.Direction { // AtoB |
| 79 | replacementLayer = mappingList.LayerB |
| 80 | } else { |
Akron | a3675e9 | 2025-06-26 17:46:59 +0200 | [diff] [blame] | 81 | replacementLayer = mappingList.LayerA |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | // Clone pattern and apply overrides |
| 86 | processedPattern := pattern.Clone() |
| 87 | if patternFoundry != "" || patternLayer != "" { |
| 88 | ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer) |
| 89 | } |
| 90 | |
Akron | a3675e9 | 2025-06-26 17:46:59 +0200 | [diff] [blame] | 91 | // Create snippet matcher for this rule |
| 92 | snippetMatcher, err := matcher.NewSnippetMatcher( |
| 93 | ast.Pattern{Root: processedPattern}, |
| 94 | ast.Replacement{Root: replacement}, |
| 95 | ) |
| 96 | if err != nil { |
| 97 | continue // Skip this rule if we can't create a matcher |
| 98 | } |
| 99 | |
| 100 | // Find matching tokens in the snippet |
| 101 | matchingTokens, err := snippetMatcher.FindMatchingTokens(processedSnippet) |
| 102 | if err != nil { |
| 103 | continue // Skip this rule if parsing fails |
| 104 | } |
| 105 | |
| 106 | if len(matchingTokens) == 0 { |
| 107 | continue // No matches, try next rule |
| 108 | } |
| 109 | |
| 110 | // Apply RestrictToObligatory to the replacement to get the annotations to add |
| 111 | // Note: Only pass foundry override, not layer, since replacement terms have correct layers |
| 112 | restrictedReplacement := ast.RestrictToObligatory(replacement, replacementFoundry, "") |
| 113 | if restrictedReplacement == nil { |
| 114 | continue // Nothing obligatory to add |
| 115 | } |
| 116 | |
| 117 | // Generate annotation strings from the restricted replacement |
| 118 | annotationStrings, err := m.generateAnnotationStrings(restrictedReplacement) |
| 119 | if err != nil { |
| 120 | continue // Skip if we can't generate annotations |
| 121 | } |
| 122 | |
| 123 | if len(annotationStrings) == 0 { |
| 124 | continue // Nothing to add |
| 125 | } |
| 126 | |
| 127 | // Apply annotations to matching tokens in the snippet |
| 128 | processedSnippet, err = m.addAnnotationsToSnippet(processedSnippet, matchingTokens, annotationStrings) |
| 129 | if err != nil { |
| 130 | continue // Skip if we can't apply annotations |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | // Create a copy of the input data and update the snippet |
| 135 | result := make(map[string]any) |
| 136 | for k, v := range jsonMap { |
| 137 | result[k] = v |
| 138 | } |
| 139 | result["snippet"] = processedSnippet |
| 140 | |
| 141 | return result, nil |
| 142 | } |
| 143 | |
| 144 | // generateAnnotationStrings converts a replacement AST node into annotation strings |
| 145 | func (m *Mapper) generateAnnotationStrings(node ast.Node) ([]string, error) { |
| 146 | if node == nil { |
| 147 | return nil, nil |
| 148 | } |
| 149 | |
| 150 | switch n := node.(type) { |
| 151 | case *ast.Term: |
| 152 | // Create annotation string in format "foundry/layer:key" or "foundry/layer:key:value" |
| 153 | annotation := n.Foundry + "/" + n.Layer + ":" + n.Key |
| 154 | if n.Value != "" { |
| 155 | annotation += ":" + n.Value |
| 156 | } |
| 157 | return []string{annotation}, nil |
| 158 | |
| 159 | case *ast.TermGroup: |
| 160 | if n.Relation == ast.AndRelation { |
| 161 | // For AND groups, collect all annotations |
| 162 | var allAnnotations []string |
| 163 | for _, operand := range n.Operands { |
| 164 | annotations, err := m.generateAnnotationStrings(operand) |
| 165 | if err != nil { |
| 166 | return nil, err |
| 167 | } |
| 168 | allAnnotations = append(allAnnotations, annotations...) |
| 169 | } |
| 170 | return allAnnotations, nil |
| 171 | } else { |
| 172 | // For OR groups (should not happen with RestrictToObligatory, but handle gracefully) |
| 173 | return nil, nil |
| 174 | } |
| 175 | |
| 176 | case *ast.Token: |
| 177 | // Handle wrapped tokens |
| 178 | if n.Wrap != nil { |
| 179 | return m.generateAnnotationStrings(n.Wrap) |
| 180 | } |
| 181 | return nil, nil |
| 182 | |
| 183 | default: |
| 184 | return nil, nil |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | // addAnnotationsToSnippet adds new annotations to matching tokens in the snippet |
| 189 | func (m *Mapper) addAnnotationsToSnippet(snippet string, matchingTokens []matcher.TokenSpan, annotationStrings []string) (string, error) { |
| 190 | if len(matchingTokens) == 0 || len(annotationStrings) == 0 { |
| 191 | return snippet, nil |
| 192 | } |
| 193 | |
| 194 | result := snippet |
| 195 | |
| 196 | // Process each matching token |
| 197 | for _, token := range matchingTokens { |
| 198 | // For nested span structure, we need to find the innermost text and wrap it |
| 199 | // Look for the actual token text within span tags |
| 200 | tokenText := token.Text |
| 201 | |
| 202 | // Find all occurrences of the token text in the current snippet |
| 203 | // We need to be careful about which occurrence to replace |
| 204 | startPos := 0 |
| 205 | for { |
| 206 | tokenStart := strings.Index(result[startPos:], tokenText) |
| 207 | if tokenStart == -1 { |
| 208 | break // No more occurrences |
| 209 | } |
| 210 | tokenStart += startPos |
| 211 | tokenEnd := tokenStart + len(tokenText) |
| 212 | |
| 213 | // Check if this token text is within the expected context |
| 214 | // Look backwards and forwards to see if we're in the right span context |
| 215 | beforeContext := result[:tokenStart] |
| 216 | afterContext := result[tokenEnd:] |
| 217 | |
| 218 | // Simple heuristic: if we're immediately preceded by a > and followed by a < |
| 219 | // then we're likely at the innermost text node |
| 220 | if strings.HasSuffix(beforeContext, ">") && (strings.HasPrefix(afterContext, "<") || len(afterContext) == 0 || afterContext[0] == ' ') { |
| 221 | // Build the replacement with nested spans for each annotation |
| 222 | replacement := tokenText |
| 223 | for i := len(annotationStrings) - 1; i >= 0; i-- { |
| 224 | replacement = fmt.Sprintf(`<span title="%s" class="notinindex">%s</span>`, annotationStrings[i], replacement) |
| 225 | } |
| 226 | |
| 227 | // Replace this occurrence |
| 228 | result = result[:tokenStart] + replacement + result[tokenEnd:] |
| 229 | break // Only replace the first appropriate occurrence for this token |
| 230 | } |
| 231 | |
| 232 | // Move past this occurrence |
| 233 | startPos = tokenEnd |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | return result, nil |
| 238 | } |