blob: a26404b5296154937fa9e90131835f0d8bee42df [file] [log] [blame]
Akronb7e1f352025-05-16 15:45:23 +02001package matcher
2
3import (
4 "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
5)
6
7// Matcher handles pattern matching and replacement in the AST
8type Matcher struct {
9 pattern ast.Pattern
10 replacement ast.Replacement
11}
12
13// NewMatcher creates a new Matcher with the given pattern and replacement
14func NewMatcher(pattern ast.Pattern, replacement ast.Replacement) *Matcher {
15 return &Matcher{
16 pattern: pattern,
17 replacement: replacement,
18 }
19}
20
21// Match checks if the given node matches the pattern
22func (m *Matcher) Match(node ast.Node) bool {
23 return m.matchNode(node, m.pattern.Root)
24}
25
26// Replace replaces all occurrences of the pattern in the given node with the replacement
27func (m *Matcher) Replace(node ast.Node) ast.Node {
Akronbf5149c2025-05-20 15:53:41 +020028 // If this node matches the pattern, create replacement while preserving outer structure
Akronb7e1f352025-05-16 15:45:23 +020029 if m.Match(node) {
Akronbf5149c2025-05-20 15:53:41 +020030 switch node.(type) {
31 case *ast.Token:
32 // For Token nodes, preserve the Token wrapper but replace its wrap
33 newToken := &ast.Token{
34 Wrap: m.cloneNode(m.replacement.Root),
35 }
36 return newToken
37 default:
38 return m.cloneNode(m.replacement.Root)
39 }
Akronb7e1f352025-05-16 15:45:23 +020040 }
41
Akronbf5149c2025-05-20 15:53:41 +020042 // Otherwise recursively process children
Akronb7e1f352025-05-16 15:45:23 +020043 switch n := node.(type) {
44 case *ast.Token:
Akronbf5149c2025-05-20 15:53:41 +020045 newToken := &ast.Token{
46 Wrap: m.Replace(n.Wrap),
47 }
48 return newToken
Akronb7e1f352025-05-16 15:45:23 +020049
50 case *ast.TermGroup:
51 newOperands := make([]ast.Node, len(n.Operands))
52 for i, op := range n.Operands {
53 newOperands[i] = m.Replace(op)
54 }
Akronbf5149c2025-05-20 15:53:41 +020055 return &ast.TermGroup{
56 Operands: newOperands,
57 Relation: n.Relation,
58 }
Akronb7e1f352025-05-16 15:45:23 +020059
Akron32958422025-05-16 16:33:05 +020060 case *ast.CatchallNode:
61 newNode := &ast.CatchallNode{
62 NodeType: n.NodeType,
63 RawContent: n.RawContent,
64 }
65 if n.Wrap != nil {
66 newNode.Wrap = m.Replace(n.Wrap)
67 }
68 if len(n.Operands) > 0 {
69 newNode.Operands = make([]ast.Node, len(n.Operands))
70 for i, op := range n.Operands {
71 newNode.Operands[i] = m.Replace(op)
72 }
73 }
74 return newNode
75
Akronb7e1f352025-05-16 15:45:23 +020076 default:
77 return node
78 }
79}
80
81// matchNode recursively checks if two nodes match
82func (m *Matcher) matchNode(node, pattern ast.Node) bool {
83 if pattern == nil {
84 return true
85 }
86 if node == nil {
87 return false
88 }
89
Akronbf5149c2025-05-20 15:53:41 +020090 // Handle pattern being a Token
91 if pToken, ok := pattern.(*ast.Token); ok {
92 if nToken, ok := node.(*ast.Token); ok {
93 return m.matchNode(nToken.Wrap, pToken.Wrap)
Akronb7e1f352025-05-16 15:45:23 +020094 }
Akron32958422025-05-16 16:33:05 +020095 return false
Akronbf5149c2025-05-20 15:53:41 +020096 }
Akronb7e1f352025-05-16 15:45:23 +020097
Akronbf5149c2025-05-20 15:53:41 +020098 // Handle pattern being a Term
99 if pTerm, ok := pattern.(*ast.Term); ok {
100 // Direct term to term matching
101 if t, ok := node.(*ast.Term); ok {
102 return t.Foundry == pTerm.Foundry &&
103 t.Key == pTerm.Key &&
104 t.Layer == pTerm.Layer &&
105 t.Match == pTerm.Match &&
106 (pTerm.Value == "" || t.Value == pTerm.Value)
107 }
108 // If node is a Token, check its wrap
109 if tkn, ok := node.(*ast.Token); ok {
110 if tkn.Wrap == nil {
111 return false
112 }
113 return m.matchNode(tkn.Wrap, pattern)
114 }
115 // If node is a TermGroup, check its operands
116 if tg, ok := node.(*ast.TermGroup); ok {
117 for _, op := range tg.Operands {
118 if m.matchNode(op, pattern) {
119 return true
120 }
121 }
122 return false
123 }
124 // If node is a CatchallNode, check its wrap and operands
125 if c, ok := node.(*ast.CatchallNode); ok {
126 if c.Wrap != nil && m.matchNode(c.Wrap, pattern) {
127 return true
128 }
129 for _, op := range c.Operands {
130 if m.matchNode(op, pattern) {
131 return true
132 }
133 }
134 return false
135 }
136 return false
137 }
138
139 // Handle pattern being a TermGroup
140 if pGroup, ok := pattern.(*ast.TermGroup); ok {
141 // For OR relations, check if any operand matches the node
142 if pGroup.Relation == ast.OrRelation {
143 for _, pOp := range pGroup.Operands {
144 if m.matchNode(node, pOp) {
Akronb7e1f352025-05-16 15:45:23 +0200145 return true
146 }
147 }
148 return false
149 }
150
Akronbf5149c2025-05-20 15:53:41 +0200151 // For AND relations, node must be a TermGroup with matching relation
152 if tg, ok := node.(*ast.TermGroup); ok {
153 if tg.Relation != pGroup.Relation {
Akronb7e1f352025-05-16 15:45:23 +0200154 return false
155 }
Akronbf5149c2025-05-20 15:53:41 +0200156 // Check that all pattern operands match in any order
157 if len(tg.Operands) < len(pGroup.Operands) {
Akronb7e1f352025-05-16 15:45:23 +0200158 return false
159 }
Akronbf5149c2025-05-20 15:53:41 +0200160 matched := make([]bool, len(tg.Operands))
161 for _, pOp := range pGroup.Operands {
Akronb7e1f352025-05-16 15:45:23 +0200162 found := false
Akronbf5149c2025-05-20 15:53:41 +0200163 for j, tOp := range tg.Operands {
Akronb7e1f352025-05-16 15:45:23 +0200164 if !matched[j] && m.matchNode(tOp, pOp) {
165 matched[j] = true
166 found = true
167 break
168 }
169 }
170 if !found {
171 return false
172 }
173 }
174 return true
175 }
Akron32958422025-05-16 16:33:05 +0200176
Akronbf5149c2025-05-20 15:53:41 +0200177 // If node is a Token, check its wrap
178 if tkn, ok := node.(*ast.Token); ok {
179 if tkn.Wrap == nil {
Akron32958422025-05-16 16:33:05 +0200180 return false
181 }
Akronbf5149c2025-05-20 15:53:41 +0200182 return m.matchNode(tkn.Wrap, pattern)
183 }
Akron32958422025-05-16 16:33:05 +0200184
Akronbf5149c2025-05-20 15:53:41 +0200185 // If node is a CatchallNode, check its wrap and operands
186 if c, ok := node.(*ast.CatchallNode); ok {
187 if c.Wrap != nil && m.matchNode(c.Wrap, pattern) {
Akron32958422025-05-16 16:33:05 +0200188 return true
189 }
Akronbf5149c2025-05-20 15:53:41 +0200190 for _, op := range c.Operands {
191 if m.matchNode(op, pattern) {
Akronb7e1f352025-05-16 15:45:23 +0200192 return true
193 }
194 }
195 return false
196 }
197
Akron32958422025-05-16 16:33:05 +0200198 return false
Akronb7e1f352025-05-16 15:45:23 +0200199 }
200
201 return false
202}
203
204// cloneNode creates a deep copy of a node
205func (m *Matcher) cloneNode(node ast.Node) ast.Node {
206 if node == nil {
207 return nil
208 }
209
210 switch n := node.(type) {
211 case *ast.Token:
212 return &ast.Token{
213 Wrap: m.cloneNode(n.Wrap),
214 }
215
216 case *ast.TermGroup:
217 operands := make([]ast.Node, len(n.Operands))
218 for i, op := range n.Operands {
219 operands[i] = m.cloneNode(op)
220 }
221 return &ast.TermGroup{
222 Operands: operands,
223 Relation: n.Relation,
224 }
225
226 case *ast.Term:
227 return &ast.Term{
228 Foundry: n.Foundry,
229 Key: n.Key,
230 Layer: n.Layer,
231 Match: n.Match,
232 Value: n.Value,
233 }
234
Akron32958422025-05-16 16:33:05 +0200235 case *ast.CatchallNode:
236 newNode := &ast.CatchallNode{
237 NodeType: n.NodeType,
238 RawContent: n.RawContent,
239 }
240 if n.Wrap != nil {
241 newNode.Wrap = m.cloneNode(n.Wrap)
242 }
243 if len(n.Operands) > 0 {
244 newNode.Operands = make([]ast.Node, len(n.Operands))
245 for i, op := range n.Operands {
246 newNode.Operands[i] = m.cloneNode(op)
247 }
248 }
249 return newNode
250
Akronb7e1f352025-05-16 15:45:23 +0200251 default:
252 return nil
253 }
254}