Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 1 | package ast |
| 2 | |
Akron | 753f49a | 2025-05-26 16:53:18 +0200 | [diff] [blame] | 3 | // ast is the abstract syntax tree for the query term mapper. |
Akron | bf5149c | 2025-05-20 15:53:41 +0200 | [diff] [blame] | 4 | |
Akron | 3295842 | 2025-05-16 16:33:05 +0200 | [diff] [blame] | 5 | import ( |
| 6 | "encoding/json" |
| 7 | ) |
| 8 | |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 9 | // NodeType represents the type of a node in the AST |
| 10 | type NodeType string |
| 11 | |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 12 | // RelationType represents the type of relation between nodes |
| 13 | type RelationType string |
| 14 | |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 15 | // MatchType represents the type of match operation |
| 16 | type MatchType string |
| 17 | |
| 18 | const ( |
Akron | 753f49a | 2025-05-26 16:53:18 +0200 | [diff] [blame] | 19 | TokenNode NodeType = "token" |
| 20 | TermGroupNode NodeType = "termGroup" |
| 21 | TermNode NodeType = "term" |
Akron | 1a5fccd | 2025-05-27 09:54:09 +0200 | [diff] [blame] | 22 | RewriteNode NodeType = "rewrite" |
Akron | 753f49a | 2025-05-26 16:53:18 +0200 | [diff] [blame] | 23 | AndRelation RelationType = "and" |
| 24 | OrRelation RelationType = "or" |
| 25 | MatchEqual MatchType = "eq" |
| 26 | MatchNotEqual MatchType = "ne" |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 27 | ) |
| 28 | |
| 29 | // Node represents a node in the AST |
| 30 | type Node interface { |
| 31 | Type() NodeType |
Akron | 441bd12 | 2025-05-30 14:19:50 +0200 | [diff] [blame] | 32 | Clone() Node |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 33 | } |
| 34 | |
Akron | 1a5fccd | 2025-05-27 09:54:09 +0200 | [diff] [blame] | 35 | // Rewrite represents a koral:rewrite |
| 36 | type Rewrite struct { |
| 37 | Editor string `json:"editor,omitempty"` |
| 38 | Operation string `json:"operation,omitempty"` |
| 39 | Scope string `json:"scope,omitempty"` |
| 40 | Src string `json:"src,omitempty"` |
| 41 | Comment string `json:"_comment,omitempty"` |
Akron | 8f1970f | 2025-05-30 12:52:03 +0200 | [diff] [blame] | 42 | Original any `json:"original,omitempty"` |
| 43 | } |
| 44 | |
| 45 | // UnmarshalJSON implements custom JSON unmarshaling for backward compatibility |
| 46 | func (r *Rewrite) UnmarshalJSON(data []byte) error { |
| 47 | // Create a temporary struct to hold all possible fields |
| 48 | var temp struct { |
| 49 | Type string `json:"@type,omitempty"` |
| 50 | Editor string `json:"editor,omitempty"` |
| 51 | Source string `json:"source,omitempty"` // legacy field |
| 52 | Operation string `json:"operation,omitempty"` // legacy field |
| 53 | Scope string `json:"scope,omitempty"` |
| 54 | Src string `json:"src,omitempty"` |
| 55 | Origin string `json:"origin,omitempty"` // legacy field |
| 56 | Original any `json:"original,omitempty"` |
| 57 | Comment string `json:"_comment,omitempty"` |
| 58 | } |
| 59 | |
| 60 | if err := json.Unmarshal(data, &temp); err != nil { |
| 61 | return err |
| 62 | } |
| 63 | |
| 64 | // Apply precedence for editor field: editor >> source |
| 65 | if temp.Editor != "" { |
| 66 | r.Editor = temp.Editor |
| 67 | } else if temp.Source != "" { |
| 68 | r.Editor = temp.Source |
| 69 | } |
| 70 | |
| 71 | // Apply precedence for original/src/origin: original >> src >> origin |
| 72 | if temp.Original != nil { |
| 73 | r.Original = temp.Original |
| 74 | } else if temp.Src != "" { |
| 75 | r.Src = temp.Src |
| 76 | } else if temp.Origin != "" { |
| 77 | r.Src = temp.Origin |
| 78 | } |
| 79 | |
| 80 | // Copy other fields |
| 81 | r.Operation = temp.Operation |
| 82 | r.Scope = temp.Scope |
| 83 | r.Comment = temp.Comment |
| 84 | |
| 85 | return nil |
Akron | 1a5fccd | 2025-05-27 09:54:09 +0200 | [diff] [blame] | 86 | } |
| 87 | |
| 88 | func (r *Rewrite) Type() NodeType { |
| 89 | return RewriteNode |
| 90 | } |
| 91 | |
Akron | 441bd12 | 2025-05-30 14:19:50 +0200 | [diff] [blame] | 92 | // Clone creates a deep copy of the Rewrite node |
| 93 | func (r *Rewrite) Clone() Node { |
| 94 | return &Rewrite{ |
| 95 | Editor: r.Editor, |
| 96 | Operation: r.Operation, |
| 97 | Scope: r.Scope, |
| 98 | Src: r.Src, |
| 99 | Comment: r.Comment, |
| 100 | Original: r.Original, // Note: this is a shallow copy of the Original field |
| 101 | } |
| 102 | } |
| 103 | |
Akron | 8f1970f | 2025-05-30 12:52:03 +0200 | [diff] [blame] | 104 | // MarshalJSON implements custom JSON marshaling to ensure clean output |
| 105 | func (r *Rewrite) MarshalJSON() ([]byte, error) { |
| 106 | // Create a map with only the modern field names |
| 107 | result := make(map[string]any) |
| 108 | |
| 109 | // Always include @type if this is a rewrite |
| 110 | result["@type"] = "koral:rewrite" |
| 111 | |
| 112 | if r.Editor != "" { |
| 113 | result["editor"] = r.Editor |
| 114 | } |
| 115 | if r.Operation != "" { |
| 116 | result["operation"] = r.Operation |
| 117 | } |
| 118 | if r.Scope != "" { |
| 119 | result["scope"] = r.Scope |
| 120 | } |
| 121 | if r.Src != "" { |
| 122 | result["src"] = r.Src |
| 123 | } |
| 124 | if r.Comment != "" { |
| 125 | result["_comment"] = r.Comment |
| 126 | } |
| 127 | if r.Original != nil { |
| 128 | result["original"] = r.Original |
| 129 | } |
| 130 | |
| 131 | return json.Marshal(result) |
| 132 | } |
| 133 | |
Akron | 753f49a | 2025-05-26 16:53:18 +0200 | [diff] [blame] | 134 | // Token represents a koral:token |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 135 | type Token struct { |
Akron | 1a5fccd | 2025-05-27 09:54:09 +0200 | [diff] [blame] | 136 | Wrap Node `json:"wrap"` |
| 137 | Rewrites []Rewrite `json:"rewrites,omitempty"` |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 138 | } |
| 139 | |
| 140 | func (t *Token) Type() NodeType { |
| 141 | return TokenNode |
| 142 | } |
| 143 | |
Akron | 441bd12 | 2025-05-30 14:19:50 +0200 | [diff] [blame] | 144 | // Clone creates a deep copy of the Token node |
| 145 | func (t *Token) Clone() Node { |
| 146 | var clonedWrap Node |
| 147 | if t.Wrap != nil { |
| 148 | clonedWrap = t.Wrap.Clone() |
| 149 | } |
| 150 | tc := &Token{ |
| 151 | Wrap: clonedWrap, |
| 152 | } |
| 153 | |
| 154 | if t.Rewrites != nil { |
| 155 | clonedRewrites := make([]Rewrite, len(t.Rewrites)) |
| 156 | for i, rewrite := range t.Rewrites { |
| 157 | clonedRewrites[i] = *rewrite.Clone().(*Rewrite) |
| 158 | } |
| 159 | tc.Rewrites = clonedRewrites |
| 160 | } |
| 161 | |
| 162 | return tc |
| 163 | } |
| 164 | |
Akron | 753f49a | 2025-05-26 16:53:18 +0200 | [diff] [blame] | 165 | // TermGroup represents a koral:termGroup |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 166 | type TermGroup struct { |
| 167 | Operands []Node `json:"operands"` |
| 168 | Relation RelationType `json:"relation"` |
Akron | 1a5fccd | 2025-05-27 09:54:09 +0200 | [diff] [blame] | 169 | Rewrites []Rewrite `json:"rewrites,omitempty"` |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 170 | } |
| 171 | |
| 172 | func (tg *TermGroup) Type() NodeType { |
| 173 | return TermGroupNode |
| 174 | } |
| 175 | |
Akron | 441bd12 | 2025-05-30 14:19:50 +0200 | [diff] [blame] | 176 | // Clone creates a deep copy of the TermGroup node |
| 177 | func (tg *TermGroup) Clone() Node { |
| 178 | clonedOperands := make([]Node, len(tg.Operands)) |
| 179 | for i, operand := range tg.Operands { |
| 180 | clonedOperands[i] = operand.Clone() |
| 181 | } |
| 182 | tgc := &TermGroup{ |
| 183 | Operands: clonedOperands, |
| 184 | Relation: tg.Relation, |
| 185 | } |
| 186 | if tg.Rewrites != nil { |
| 187 | clonedRewrites := make([]Rewrite, len(tg.Rewrites)) |
| 188 | for i, rewrite := range tg.Rewrites { |
| 189 | clonedRewrites[i] = *rewrite.Clone().(*Rewrite) |
| 190 | } |
| 191 | tgc.Rewrites = clonedRewrites |
| 192 | } |
| 193 | |
| 194 | return tgc |
| 195 | } |
| 196 | |
Akron | 753f49a | 2025-05-26 16:53:18 +0200 | [diff] [blame] | 197 | // Term represents a koral:term |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 198 | type Term struct { |
Akron | 1a5fccd | 2025-05-27 09:54:09 +0200 | [diff] [blame] | 199 | Foundry string `json:"foundry"` |
| 200 | Key string `json:"key"` |
| 201 | Layer string `json:"layer"` |
| 202 | Match MatchType `json:"match"` |
| 203 | Value string `json:"value,omitempty"` |
| 204 | Rewrites []Rewrite `json:"rewrites,omitempty"` |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 205 | } |
| 206 | |
| 207 | func (t *Term) Type() NodeType { |
| 208 | return TermNode |
| 209 | } |
| 210 | |
Akron | 441bd12 | 2025-05-30 14:19:50 +0200 | [diff] [blame] | 211 | // Clone creates a deep copy of the Term node |
| 212 | func (t *Term) Clone() Node { |
| 213 | |
| 214 | tc := &Term{ |
| 215 | Foundry: t.Foundry, |
| 216 | Key: t.Key, |
| 217 | Layer: t.Layer, |
| 218 | Match: t.Match, |
| 219 | Value: t.Value, |
| 220 | } |
| 221 | |
| 222 | if t.Rewrites != nil { |
| 223 | clonedRewrites := make([]Rewrite, len(t.Rewrites)) |
| 224 | for i, rewrite := range t.Rewrites { |
| 225 | clonedRewrites[i] = *rewrite.Clone().(*Rewrite) |
| 226 | } |
| 227 | tc.Rewrites = clonedRewrites |
| 228 | } |
| 229 | return tc |
| 230 | } |
| 231 | |
Akron | b7e1f35 | 2025-05-16 15:45:23 +0200 | [diff] [blame] | 232 | // Pattern represents a pattern to match in the AST |
| 233 | type Pattern struct { |
| 234 | Root Node |
| 235 | } |
| 236 | |
| 237 | // Replacement represents a replacement pattern |
| 238 | type Replacement struct { |
| 239 | Root Node |
| 240 | } |
Akron | 3295842 | 2025-05-16 16:33:05 +0200 | [diff] [blame] | 241 | |
| 242 | // CatchallNode represents any node type not explicitly handled |
| 243 | type CatchallNode struct { |
| 244 | NodeType string // The original @type value |
| 245 | RawContent json.RawMessage // The original JSON content |
| 246 | Wrap Node // Optional wrapped node |
| 247 | Operands []Node // Optional operands |
| 248 | } |
| 249 | |
| 250 | func (c *CatchallNode) Type() NodeType { |
| 251 | return NodeType(c.NodeType) |
| 252 | } |
Akron | 441bd12 | 2025-05-30 14:19:50 +0200 | [diff] [blame] | 253 | |
| 254 | // Clone creates a deep copy of the CatchallNode |
| 255 | func (c *CatchallNode) Clone() Node { |
| 256 | newNode := &CatchallNode{ |
| 257 | NodeType: c.NodeType, |
| 258 | } |
| 259 | |
| 260 | // Handle RawContent properly - preserve nil if it's nil |
| 261 | if c.RawContent != nil { |
| 262 | newNode.RawContent = make(json.RawMessage, len(c.RawContent)) |
| 263 | copy(newNode.RawContent, c.RawContent) |
| 264 | } |
| 265 | |
| 266 | if c.Wrap != nil { |
| 267 | newNode.Wrap = c.Wrap.Clone() |
| 268 | } |
| 269 | |
| 270 | if len(c.Operands) > 0 { |
| 271 | newNode.Operands = make([]Node, len(c.Operands)) |
| 272 | for i, operand := range c.Operands { |
| 273 | newNode.Operands[i] = operand.Clone() |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | return newNode |
| 278 | } |
| 279 | |
| 280 | // ApplyFoundryAndLayerOverrides recursively applies foundry and layer overrides to terms |
| 281 | func ApplyFoundryAndLayerOverrides(node Node, foundry, layer string) { |
| 282 | if node == nil { |
| 283 | return |
| 284 | } |
| 285 | |
| 286 | switch n := node.(type) { |
| 287 | case *Term: |
| 288 | if foundry != "" { |
| 289 | n.Foundry = foundry |
| 290 | } |
| 291 | if layer != "" { |
| 292 | n.Layer = layer |
| 293 | } |
| 294 | case *TermGroup: |
| 295 | for _, op := range n.Operands { |
| 296 | ApplyFoundryAndLayerOverrides(op, foundry, layer) |
| 297 | } |
| 298 | case *Token: |
| 299 | if n.Wrap != nil { |
| 300 | ApplyFoundryAndLayerOverrides(n.Wrap, foundry, layer) |
| 301 | } |
| 302 | case *CatchallNode: |
| 303 | if n.Wrap != nil { |
| 304 | ApplyFoundryAndLayerOverrides(n.Wrap, foundry, layer) |
| 305 | } |
| 306 | for _, op := range n.Operands { |
| 307 | ApplyFoundryAndLayerOverrides(op, foundry, layer) |
| 308 | } |
| 309 | } |
| 310 | } |
Akron | 7c91cde | 2025-06-24 17:11:22 +0200 | [diff] [blame] | 311 | |
Akron | 497cfe8 | 2025-07-03 13:26:54 +0200 | [diff] [blame] | 312 | // ApplyFoundryAndLayerOverridesWithPrecedence applies foundry and layer overrides while respecting precedence: |
| 313 | // 1. Mapping rule foundry/layer (highest priority - don't override if already set) |
| 314 | // 2. Passed overwrite foundry/layer (from MappingOptions) |
| 315 | // 3. Mapping list foundry/layer (lowest priority - defaults) |
| 316 | func ApplyFoundryAndLayerOverridesWithPrecedence(node Node, foundry, layer string) { |
| 317 | if node == nil { |
| 318 | return |
| 319 | } |
| 320 | |
| 321 | switch n := node.(type) { |
| 322 | case *Term: |
| 323 | // Only override if the term doesn't already have explicit values (respecting precedence) |
| 324 | if foundry != "" && n.Foundry == "" { |
| 325 | n.Foundry = foundry |
| 326 | } |
| 327 | if layer != "" && n.Layer == "" { |
| 328 | n.Layer = layer |
| 329 | } |
| 330 | case *TermGroup: |
| 331 | for _, op := range n.Operands { |
| 332 | ApplyFoundryAndLayerOverridesWithPrecedence(op, foundry, layer) |
| 333 | } |
| 334 | case *Token: |
| 335 | if n.Wrap != nil { |
| 336 | ApplyFoundryAndLayerOverridesWithPrecedence(n.Wrap, foundry, layer) |
| 337 | } |
| 338 | case *CatchallNode: |
| 339 | if n.Wrap != nil { |
| 340 | ApplyFoundryAndLayerOverridesWithPrecedence(n.Wrap, foundry, layer) |
| 341 | } |
| 342 | for _, op := range n.Operands { |
| 343 | ApplyFoundryAndLayerOverridesWithPrecedence(op, foundry, layer) |
| 344 | } |
| 345 | } |
| 346 | } |
| 347 | |
Akron | 7c91cde | 2025-06-24 17:11:22 +0200 | [diff] [blame] | 348 | // RestrictToObligatory takes a replacement node from a mapping rule and reduces the boolean structure |
| 349 | // to only obligatory operations by removing optional OR-relations and keeping required AND-relations. |
| 350 | // It also applies foundry and layer overrides like ApplyFoundryAndLayerOverrides(). |
| 351 | // Note: This function is designed for mapping rule replacement nodes and does not handle CatchallNodes. |
| 352 | // For efficiency, restriction is performed first, then foundry/layer overrides are applied to the smaller result. |
| 353 | // |
| 354 | // Examples: |
| 355 | // - (a & b & c) -> (a & b & c) (kept as is) |
| 356 | // - (a & b & (c | d) & e) -> (a & b & e) (OR-relation removed) |
| 357 | // - (a | b) -> nil (completely optional) |
| 358 | func RestrictToObligatory(node Node, foundry, layer string) Node { |
Akron | 13e8646 | 2025-07-03 13:44:16 +0200 | [diff] [blame^] | 359 | return restrictToObligatoryWithOverrides(node, foundry, layer, false) |
| 360 | } |
| 361 | |
| 362 | // RestrictToObligatoryWithPrecedence is like RestrictToObligatory but respects precedence rules |
| 363 | // when applying foundry and layer overrides |
| 364 | func RestrictToObligatoryWithPrecedence(node Node, foundry, layer string) Node { |
| 365 | return restrictToObligatoryWithOverrides(node, foundry, layer, true) |
| 366 | } |
| 367 | |
| 368 | // restrictToObligatoryWithOverrides performs the restriction and applies overrides with optional precedence |
| 369 | func restrictToObligatoryWithOverrides(node Node, foundry, layer string, withPrecedence bool) Node { |
Akron | 7c91cde | 2025-06-24 17:11:22 +0200 | [diff] [blame] | 370 | if node == nil { |
| 371 | return nil |
| 372 | } |
| 373 | |
| 374 | // First, clone and restrict to obligatory operations |
| 375 | cloned := node.Clone() |
| 376 | restricted := restrictToObligatoryRecursive(cloned) |
| 377 | |
| 378 | // Then apply foundry and layer overrides to the smaller, restricted tree |
| 379 | if restricted != nil { |
Akron | 13e8646 | 2025-07-03 13:44:16 +0200 | [diff] [blame^] | 380 | if withPrecedence { |
| 381 | ApplyFoundryAndLayerOverridesWithPrecedence(restricted, foundry, layer) |
| 382 | } else { |
| 383 | ApplyFoundryAndLayerOverrides(restricted, foundry, layer) |
| 384 | } |
Akron | 497cfe8 | 2025-07-03 13:26:54 +0200 | [diff] [blame] | 385 | } |
| 386 | |
| 387 | return restricted |
| 388 | } |
| 389 | |
Akron | 7c91cde | 2025-06-24 17:11:22 +0200 | [diff] [blame] | 390 | // restrictToObligatoryRecursive performs the actual restriction logic |
| 391 | func restrictToObligatoryRecursive(node Node) Node { |
| 392 | if node == nil { |
| 393 | return nil |
| 394 | } |
| 395 | |
| 396 | switch n := node.(type) { |
| 397 | case *Term: |
| 398 | // Terms are always obligatory |
| 399 | return n |
| 400 | |
| 401 | case *Token: |
| 402 | // Process the wrapped node |
| 403 | if n.Wrap != nil { |
| 404 | restricted := restrictToObligatoryRecursive(n.Wrap) |
| 405 | if restricted == nil { |
| 406 | return nil |
| 407 | } |
| 408 | return &Token{ |
| 409 | Wrap: restricted, |
| 410 | Rewrites: n.Rewrites, |
| 411 | } |
| 412 | } |
| 413 | return n |
| 414 | |
| 415 | case *TermGroup: |
| 416 | if n.Relation == OrRelation { |
| 417 | // OR-relations are optional, so remove them |
| 418 | return nil |
Akron | 13e8646 | 2025-07-03 13:44:16 +0200 | [diff] [blame^] | 419 | } |
| 420 | |
| 421 | if n.Relation == AndRelation { |
Akron | 7c91cde | 2025-06-24 17:11:22 +0200 | [diff] [blame] | 422 | // AND-relations are obligatory, but we need to process operands |
| 423 | var obligatoryOperands []Node |
| 424 | for _, operand := range n.Operands { |
| 425 | restricted := restrictToObligatoryRecursive(operand) |
| 426 | if restricted != nil { |
| 427 | obligatoryOperands = append(obligatoryOperands, restricted) |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | // If no operands remain, return nil |
| 432 | if len(obligatoryOperands) == 0 { |
| 433 | return nil |
| 434 | } |
| 435 | |
| 436 | // If only one operand remains, return it directly |
| 437 | if len(obligatoryOperands) == 1 { |
| 438 | return obligatoryOperands[0] |
| 439 | } |
| 440 | |
| 441 | // Return the group with obligatory operands |
| 442 | return &TermGroup{ |
| 443 | Operands: obligatoryOperands, |
| 444 | Relation: AndRelation, |
| 445 | Rewrites: n.Rewrites, |
| 446 | } |
| 447 | } |
| 448 | } |
| 449 | |
| 450 | // For unknown node types, return as is |
| 451 | return node |
| 452 | } |