blob: df21ba97a6b7d269b9d86a03f6b19ecd8cdcddad [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package ast
2
Akron753f49a2025-05-26 16:53:18 +02003// ast is the abstract syntax tree for the query term mapper.
Akronbf5149c2025-05-20 15:53:41 +02004
Akron32958422025-05-16 16:33:05 +02005import (
6 "encoding/json"
7)
8
Akronb7e1f352025-05-16 15:45:23 +02009// NodeType represents the type of a node in the AST
10type NodeType string
11
Akronb7e1f352025-05-16 15:45:23 +020012// RelationType represents the type of relation between nodes
13type RelationType string
14
Akronb7e1f352025-05-16 15:45:23 +020015// MatchType represents the type of match operation
16type MatchType string
17
18const (
Akron753f49a2025-05-26 16:53:18 +020019 TokenNode NodeType = "token"
20 TermGroupNode NodeType = "termGroup"
21 TermNode NodeType = "term"
Akron1a5fccd2025-05-27 09:54:09 +020022 RewriteNode NodeType = "rewrite"
Akron753f49a2025-05-26 16:53:18 +020023 AndRelation RelationType = "and"
24 OrRelation RelationType = "or"
25 MatchEqual MatchType = "eq"
26 MatchNotEqual MatchType = "ne"
Akronb7e1f352025-05-16 15:45:23 +020027)
28
29// Node represents a node in the AST
30type Node interface {
31 Type() NodeType
Akron441bd122025-05-30 14:19:50 +020032 Clone() Node
Akronb7e1f352025-05-16 15:45:23 +020033}
34
Akron1a5fccd2025-05-27 09:54:09 +020035// Rewrite represents a koral:rewrite
36type Rewrite struct {
37 Editor string `json:"editor,omitempty"`
38 Operation string `json:"operation,omitempty"`
39 Scope string `json:"scope,omitempty"`
40 Src string `json:"src,omitempty"`
41 Comment string `json:"_comment,omitempty"`
Akron8f1970f2025-05-30 12:52:03 +020042 Original any `json:"original,omitempty"`
43}
44
45// UnmarshalJSON implements custom JSON unmarshaling for backward compatibility
46func (r *Rewrite) UnmarshalJSON(data []byte) error {
47 // Create a temporary struct to hold all possible fields
48 var temp struct {
49 Type string `json:"@type,omitempty"`
50 Editor string `json:"editor,omitempty"`
51 Source string `json:"source,omitempty"` // legacy field
52 Operation string `json:"operation,omitempty"` // legacy field
53 Scope string `json:"scope,omitempty"`
54 Src string `json:"src,omitempty"`
55 Origin string `json:"origin,omitempty"` // legacy field
56 Original any `json:"original,omitempty"`
57 Comment string `json:"_comment,omitempty"`
58 }
59
60 if err := json.Unmarshal(data, &temp); err != nil {
61 return err
62 }
63
64 // Apply precedence for editor field: editor >> source
65 if temp.Editor != "" {
66 r.Editor = temp.Editor
67 } else if temp.Source != "" {
68 r.Editor = temp.Source
69 }
70
71 // Apply precedence for original/src/origin: original >> src >> origin
72 if temp.Original != nil {
73 r.Original = temp.Original
74 } else if temp.Src != "" {
75 r.Src = temp.Src
76 } else if temp.Origin != "" {
77 r.Src = temp.Origin
78 }
79
80 // Copy other fields
81 r.Operation = temp.Operation
82 r.Scope = temp.Scope
83 r.Comment = temp.Comment
84
85 return nil
Akron1a5fccd2025-05-27 09:54:09 +020086}
87
88func (r *Rewrite) Type() NodeType {
89 return RewriteNode
90}
91
Akron441bd122025-05-30 14:19:50 +020092// Clone creates a deep copy of the Rewrite node
93func (r *Rewrite) Clone() Node {
94 return &Rewrite{
95 Editor: r.Editor,
96 Operation: r.Operation,
97 Scope: r.Scope,
98 Src: r.Src,
99 Comment: r.Comment,
100 Original: r.Original, // Note: this is a shallow copy of the Original field
101 }
102}
103
Akron2ecb4d22026-05-19 13:44:09 +0200104// MarshalJSON implements custom JSON marshaling to ensure clean output.
105// Uses a value receiver so both json.Marshal(rw) and json.Marshal(&rw)
106// produce identical output including the @type field.
107func (r Rewrite) MarshalJSON() ([]byte, error) {
Akron8f1970f2025-05-30 12:52:03 +0200108 // Create a map with only the modern field names
109 result := make(map[string]any)
110
111 // Always include @type if this is a rewrite
112 result["@type"] = "koral:rewrite"
113
114 if r.Editor != "" {
115 result["editor"] = r.Editor
116 }
117 if r.Operation != "" {
118 result["operation"] = r.Operation
119 }
120 if r.Scope != "" {
121 result["scope"] = r.Scope
122 }
123 if r.Src != "" {
124 result["src"] = r.Src
125 }
126 if r.Comment != "" {
127 result["_comment"] = r.Comment
128 }
129 if r.Original != nil {
130 result["original"] = r.Original
131 }
132
133 return json.Marshal(result)
134}
135
Akron2ecb4d22026-05-19 13:44:09 +0200136// ToMap converts the Rewrite to a map[string]any suitable for direct
137// injection into map-based JSON structures. The output is identical to
138// what MarshalJSON produces.
139func (r *Rewrite) ToMap() map[string]any {
140 result := map[string]any{
141 "@type": "koral:rewrite",
142 }
143 if r.Editor != "" {
144 result["editor"] = r.Editor
145 }
146 if r.Operation != "" {
147 result["operation"] = r.Operation
148 }
149 if r.Scope != "" {
150 result["scope"] = r.Scope
151 }
152 if r.Src != "" {
153 result["src"] = r.Src
154 }
155 if r.Comment != "" {
156 result["_comment"] = r.Comment
157 }
158 if r.Original != nil {
159 result["original"] = r.Original
160 }
161 return result
162}
163
Akron753f49a2025-05-26 16:53:18 +0200164// Token represents a koral:token
Akronb7e1f352025-05-16 15:45:23 +0200165type Token struct {
Akron1a5fccd2025-05-27 09:54:09 +0200166 Wrap Node `json:"wrap"`
167 Rewrites []Rewrite `json:"rewrites,omitempty"`
Akronb7e1f352025-05-16 15:45:23 +0200168}
169
170func (t *Token) Type() NodeType {
171 return TokenNode
172}
173
Akron441bd122025-05-30 14:19:50 +0200174// Clone creates a deep copy of the Token node
175func (t *Token) Clone() Node {
176 var clonedWrap Node
177 if t.Wrap != nil {
178 clonedWrap = t.Wrap.Clone()
179 }
180 tc := &Token{
181 Wrap: clonedWrap,
182 }
183
184 if t.Rewrites != nil {
185 clonedRewrites := make([]Rewrite, len(t.Rewrites))
186 for i, rewrite := range t.Rewrites {
187 clonedRewrites[i] = *rewrite.Clone().(*Rewrite)
188 }
189 tc.Rewrites = clonedRewrites
190 }
191
192 return tc
193}
194
Akron753f49a2025-05-26 16:53:18 +0200195// TermGroup represents a koral:termGroup
Akronb7e1f352025-05-16 15:45:23 +0200196type TermGroup struct {
197 Operands []Node `json:"operands"`
198 Relation RelationType `json:"relation"`
Akron1a5fccd2025-05-27 09:54:09 +0200199 Rewrites []Rewrite `json:"rewrites,omitempty"`
Akronb7e1f352025-05-16 15:45:23 +0200200}
201
202func (tg *TermGroup) Type() NodeType {
203 return TermGroupNode
204}
205
Akron441bd122025-05-30 14:19:50 +0200206// Clone creates a deep copy of the TermGroup node
207func (tg *TermGroup) Clone() Node {
208 clonedOperands := make([]Node, len(tg.Operands))
209 for i, operand := range tg.Operands {
210 clonedOperands[i] = operand.Clone()
211 }
212 tgc := &TermGroup{
213 Operands: clonedOperands,
214 Relation: tg.Relation,
215 }
216 if tg.Rewrites != nil {
217 clonedRewrites := make([]Rewrite, len(tg.Rewrites))
218 for i, rewrite := range tg.Rewrites {
219 clonedRewrites[i] = *rewrite.Clone().(*Rewrite)
220 }
221 tgc.Rewrites = clonedRewrites
222 }
223
224 return tgc
225}
226
Akron753f49a2025-05-26 16:53:18 +0200227// Term represents a koral:term
Akronb7e1f352025-05-16 15:45:23 +0200228type Term struct {
Akron1a5fccd2025-05-27 09:54:09 +0200229 Foundry string `json:"foundry"`
230 Key string `json:"key"`
231 Layer string `json:"layer"`
232 Match MatchType `json:"match"`
233 Value string `json:"value,omitempty"`
234 Rewrites []Rewrite `json:"rewrites,omitempty"`
Akronb7e1f352025-05-16 15:45:23 +0200235}
236
237func (t *Term) Type() NodeType {
238 return TermNode
239}
240
Akron441bd122025-05-30 14:19:50 +0200241// Clone creates a deep copy of the Term node
242func (t *Term) Clone() Node {
243
244 tc := &Term{
245 Foundry: t.Foundry,
246 Key: t.Key,
247 Layer: t.Layer,
248 Match: t.Match,
249 Value: t.Value,
250 }
251
252 if t.Rewrites != nil {
253 clonedRewrites := make([]Rewrite, len(t.Rewrites))
254 for i, rewrite := range t.Rewrites {
255 clonedRewrites[i] = *rewrite.Clone().(*Rewrite)
256 }
257 tc.Rewrites = clonedRewrites
258 }
259 return tc
260}
261
Akronb7e1f352025-05-16 15:45:23 +0200262// Pattern represents a pattern to match in the AST
263type Pattern struct {
264 Root Node
265}
266
267// Replacement represents a replacement pattern
268type Replacement struct {
269 Root Node
270}
Akron32958422025-05-16 16:33:05 +0200271
272// CatchallNode represents any node type not explicitly handled
273type CatchallNode struct {
274 NodeType string // The original @type value
275 RawContent json.RawMessage // The original JSON content
276 Wrap Node // Optional wrapped node
277 Operands []Node // Optional operands
278}
279
280func (c *CatchallNode) Type() NodeType {
281 return NodeType(c.NodeType)
282}
Akron441bd122025-05-30 14:19:50 +0200283
284// Clone creates a deep copy of the CatchallNode
285func (c *CatchallNode) Clone() Node {
286 newNode := &CatchallNode{
287 NodeType: c.NodeType,
288 }
289
290 // Handle RawContent properly - preserve nil if it's nil
291 if c.RawContent != nil {
292 newNode.RawContent = make(json.RawMessage, len(c.RawContent))
293 copy(newNode.RawContent, c.RawContent)
294 }
295
296 if c.Wrap != nil {
297 newNode.Wrap = c.Wrap.Clone()
298 }
299
300 if len(c.Operands) > 0 {
301 newNode.Operands = make([]Node, len(c.Operands))
302 for i, operand := range c.Operands {
303 newNode.Operands[i] = operand.Clone()
304 }
305 }
306
307 return newNode
308}
309
310// ApplyFoundryAndLayerOverrides recursively applies foundry and layer overrides to terms
311func ApplyFoundryAndLayerOverrides(node Node, foundry, layer string) {
312 if node == nil {
313 return
314 }
315
316 switch n := node.(type) {
317 case *Term:
318 if foundry != "" {
319 n.Foundry = foundry
320 }
321 if layer != "" {
322 n.Layer = layer
323 }
324 case *TermGroup:
325 for _, op := range n.Operands {
326 ApplyFoundryAndLayerOverrides(op, foundry, layer)
327 }
328 case *Token:
329 if n.Wrap != nil {
330 ApplyFoundryAndLayerOverrides(n.Wrap, foundry, layer)
331 }
332 case *CatchallNode:
333 if n.Wrap != nil {
334 ApplyFoundryAndLayerOverrides(n.Wrap, foundry, layer)
335 }
336 for _, op := range n.Operands {
337 ApplyFoundryAndLayerOverrides(op, foundry, layer)
338 }
339 }
340}
Akron7c91cde2025-06-24 17:11:22 +0200341
Akron497cfe82025-07-03 13:26:54 +0200342// ApplyFoundryAndLayerOverridesWithPrecedence applies foundry and layer overrides while respecting precedence:
343// 1. Mapping rule foundry/layer (highest priority - don't override if already set)
344// 2. Passed overwrite foundry/layer (from MappingOptions)
345// 3. Mapping list foundry/layer (lowest priority - defaults)
346func ApplyFoundryAndLayerOverridesWithPrecedence(node Node, foundry, layer string) {
347 if node == nil {
348 return
349 }
350
351 switch n := node.(type) {
352 case *Term:
353 // Only override if the term doesn't already have explicit values (respecting precedence)
354 if foundry != "" && n.Foundry == "" {
355 n.Foundry = foundry
356 }
357 if layer != "" && n.Layer == "" {
358 n.Layer = layer
359 }
360 case *TermGroup:
361 for _, op := range n.Operands {
362 ApplyFoundryAndLayerOverridesWithPrecedence(op, foundry, layer)
363 }
364 case *Token:
365 if n.Wrap != nil {
366 ApplyFoundryAndLayerOverridesWithPrecedence(n.Wrap, foundry, layer)
367 }
368 case *CatchallNode:
369 if n.Wrap != nil {
370 ApplyFoundryAndLayerOverridesWithPrecedence(n.Wrap, foundry, layer)
371 }
372 for _, op := range n.Operands {
373 ApplyFoundryAndLayerOverridesWithPrecedence(op, foundry, layer)
374 }
375 }
376}
377
Akron7c91cde2025-06-24 17:11:22 +0200378// RestrictToObligatory takes a replacement node from a mapping rule and reduces the boolean structure
379// to only obligatory operations by removing optional OR-relations and keeping required AND-relations.
380// It also applies foundry and layer overrides like ApplyFoundryAndLayerOverrides().
381// Note: This function is designed for mapping rule replacement nodes and does not handle CatchallNodes.
382// For efficiency, restriction is performed first, then foundry/layer overrides are applied to the smaller result.
383//
384// Examples:
385// - (a & b & c) -> (a & b & c) (kept as is)
386// - (a & b & (c | d) & e) -> (a & b & e) (OR-relation removed)
387// - (a | b) -> nil (completely optional)
388func RestrictToObligatory(node Node, foundry, layer string) Node {
Akron13e86462025-07-03 13:44:16 +0200389 return restrictToObligatoryWithOverrides(node, foundry, layer, false)
390}
391
392// RestrictToObligatoryWithPrecedence is like RestrictToObligatory but respects precedence rules
393// when applying foundry and layer overrides
394func RestrictToObligatoryWithPrecedence(node Node, foundry, layer string) Node {
395 return restrictToObligatoryWithOverrides(node, foundry, layer, true)
396}
397
398// restrictToObligatoryWithOverrides performs the restriction and applies overrides with optional precedence
399func restrictToObligatoryWithOverrides(node Node, foundry, layer string, withPrecedence bool) Node {
Akron7c91cde2025-06-24 17:11:22 +0200400 if node == nil {
401 return nil
402 }
403
404 // First, clone and restrict to obligatory operations
405 cloned := node.Clone()
406 restricted := restrictToObligatoryRecursive(cloned)
407
408 // Then apply foundry and layer overrides to the smaller, restricted tree
409 if restricted != nil {
Akron13e86462025-07-03 13:44:16 +0200410 if withPrecedence {
411 ApplyFoundryAndLayerOverridesWithPrecedence(restricted, foundry, layer)
412 } else {
413 ApplyFoundryAndLayerOverrides(restricted, foundry, layer)
414 }
Akron497cfe82025-07-03 13:26:54 +0200415 }
416
417 return restricted
418}
419
Akron7c91cde2025-06-24 17:11:22 +0200420// restrictToObligatoryRecursive performs the actual restriction logic
421func restrictToObligatoryRecursive(node Node) Node {
422 if node == nil {
423 return nil
424 }
425
426 switch n := node.(type) {
427 case *Term:
428 // Terms are always obligatory
429 return n
430
431 case *Token:
432 // Process the wrapped node
433 if n.Wrap != nil {
434 restricted := restrictToObligatoryRecursive(n.Wrap)
435 if restricted == nil {
436 return nil
437 }
438 return &Token{
439 Wrap: restricted,
440 Rewrites: n.Rewrites,
441 }
442 }
443 return n
444
445 case *TermGroup:
446 if n.Relation == OrRelation {
447 // OR-relations are optional, so remove them
448 return nil
Akron13e86462025-07-03 13:44:16 +0200449 }
450
451 if n.Relation == AndRelation {
Akron7c91cde2025-06-24 17:11:22 +0200452 // AND-relations are obligatory, but we need to process operands
453 var obligatoryOperands []Node
454 for _, operand := range n.Operands {
455 restricted := restrictToObligatoryRecursive(operand)
456 if restricted != nil {
457 obligatoryOperands = append(obligatoryOperands, restricted)
458 }
459 }
460
461 // If no operands remain, return nil
462 if len(obligatoryOperands) == 0 {
463 return nil
464 }
465
466 // If only one operand remains, return it directly
467 if len(obligatoryOperands) == 1 {
468 return obligatoryOperands[0]
469 }
470
471 // Return the group with obligatory operands
472 return &TermGroup{
473 Operands: obligatoryOperands,
474 Relation: AndRelation,
475 Rewrites: n.Rewrites,
476 }
477 }
478 }
479
480 // For unknown node types, return as is
481 return node
482}
Akron8414ae52026-05-19 13:31:14 +0200483
484// Rewriteable is implemented by AST nodes that carry a rewrites slice.
485type Rewriteable interface {
486 GetRewrites() []Rewrite
487 SetRewrites([]Rewrite)
488}
489
Akronb4e36f62026-05-21 11:44:25 +0200490func (t *Term) GetRewrites() []Rewrite { return t.Rewrites }
491func (t *Term) SetRewrites(r []Rewrite) { t.Rewrites = r }
492func (tg *TermGroup) GetRewrites() []Rewrite { return tg.Rewrites }
Akron8414ae52026-05-19 13:31:14 +0200493func (tg *TermGroup) SetRewrites(r []Rewrite) { tg.Rewrites = r }
Akronb4e36f62026-05-21 11:44:25 +0200494func (t *Token) GetRewrites() []Rewrite { return t.Rewrites }
Akron8414ae52026-05-19 13:31:14 +0200495func (t *Token) SetRewrites(r []Rewrite) { t.Rewrites = r }
496
497// AppendRewrite appends a rewrite to any Rewriteable node.
498// Non-Rewriteable nodes (e.g. CatchallNode) are silently ignored.
499func AppendRewrite(node Node, rw Rewrite) {
500 if r, ok := node.(Rewriteable); ok {
501 r.SetRewrites(append(r.GetRewrites(), rw))
502 }
503}
504
Akronb4e36f62026-05-21 11:44:25 +0200505// Specificity returns the specificity score of an AST node.
506// Specificity is the count of AND-connected leaf constraints:
507// - Term -> 1
508// - TermGroup(AND) -> sum of Specificity of all operands
509// - TermGroup(OR) -> 0 (alternatives, not additional constraints)
510// - Token -> Specificity(Wrap)
511// - CatchallNode / nil -> 0
512func Specificity(node Node) int {
513 if node == nil {
514 return 0
515 }
516 switch n := node.(type) {
517 case *Term:
518 return 1
519 case *TermGroup:
520 if n.Relation == AndRelation {
521 total := 0
522 for _, op := range n.Operands {
523 total += Specificity(op)
524 }
525 return total
526 }
527 return 0
528 case *Token:
529 return Specificity(n.Wrap)
530 default:
531 return 0
532 }
533}
534
Akron8414ae52026-05-19 13:31:14 +0200535// StripRewrites recursively removes all rewrites from an AST tree.
536func StripRewrites(node Node) {
537 if node == nil {
538 return
539 }
540 if r, ok := node.(Rewriteable); ok {
541 r.SetRewrites(nil)
542 }
543 switch n := node.(type) {
544 case *Token:
545 StripRewrites(n.Wrap)
546 case *TermGroup:
547 for _, op := range n.Operands {
548 StripRewrites(op)
549 }
550 case *CatchallNode:
551 StripRewrites(n.Wrap)
552 for _, op := range n.Operands {
553 StripRewrites(op)
554 }
555 }
556}