blob: 18cd5c70ff5ec2ec14229a5503495171e3873e29 [file] [log] [blame]
Akron22322ec2025-05-21 11:17:30 +02001package parser
2
3import (
4 "fmt"
5 "strings"
6
7 "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
8 "github.com/alecthomas/participle/v2"
9 "github.com/alecthomas/participle/v2/lexer"
10)
11
12// GrammarParser parses a simple grammar into AST nodes
13type GrammarParser struct {
14 defaultFoundry string
15 defaultLayer string
16 parser *participle.Parser[Grammar]
17}
18
19// Grammar represents the root of our grammar
20type Grammar struct {
21 Token *TokenExpr `@@`
22}
23
24// TokenExpr represents a token expression in square brackets
25type TokenExpr struct {
26 Expr *Expr `"[" @@ "]"`
27}
28
29// Expr represents a sequence of terms and operators
30type Expr struct {
31 First *Term `@@`
32 Rest []*Op `@@*`
33}
34
35type Op struct {
36 Operator string `@("&" | "|")`
37 Term *Term `@@`
38}
39
40// Term represents either a simple term or a parenthesized expression
41type Term struct {
42 Simple *SimpleTerm `@@`
43 Paren *ParenExpr `| @@`
44}
45
46type ParenExpr struct {
47 Expr *Expr `"(" @@ ")"`
48}
49
50// SimpleTerm represents any valid term form
51type SimpleTerm struct {
52 WithFoundryLayer *FoundryLayerTerm `@@`
53 WithFoundryKey *FoundryKeyTerm `| @@`
54 WithLayer *LayerTerm `| @@`
55 SimpleKey *KeyTerm `| @@`
56}
57
58// FoundryLayerTerm represents foundry/layer=key:value
59type FoundryLayerTerm struct {
60 Foundry string `@Ident "/"`
61 Layer string `@Ident "="`
62 Key string `@Ident`
63 Value string `(":" @Ident)?`
64}
65
66// FoundryKeyTerm represents foundry/key
67type FoundryKeyTerm struct {
68 Foundry string `@Ident "/"`
69 Key string `@Ident`
70}
71
72// LayerTerm represents layer=key:value
73type LayerTerm struct {
74 Layer string `@Ident "="`
75 Key string `@Ident`
76 Value string `(":" @Ident)?`
77}
78
79// KeyTerm represents key:value
80type KeyTerm struct {
81 Key string `@Ident`
82 Value string `(":" @Ident)?`
83}
84
85// NewGrammarParser creates a new grammar parser with optional default foundry and layer
86func NewGrammarParser(defaultFoundry, defaultLayer string) (*GrammarParser, error) {
87 lex := lexer.MustSimple([]lexer.SimpleRule{
88 {Name: "Ident", Pattern: `[a-zA-Z][a-zA-Z0-9_]*`},
89 {Name: "Punct", Pattern: `[\[\]()&\|=:/]`},
90 {Name: "Whitespace", Pattern: `\s+`},
91 })
92
93 parser, err := participle.Build[Grammar](
94 participle.Lexer(lex),
95 participle.UseLookahead(2),
96 participle.Elide("Whitespace"),
97 )
98 if err != nil {
99 return nil, fmt.Errorf("failed to build parser: %w", err)
100 }
101
102 return &GrammarParser{
103 defaultFoundry: defaultFoundry,
104 defaultLayer: defaultLayer,
105 parser: parser,
106 }, nil
107}
108
109// Parse parses a grammar string into an AST node
110func (p *GrammarParser) Parse(input string) (ast.Node, error) {
111 // Remove extra spaces around operators to help the parser
112 input = strings.ReplaceAll(input, " & ", "&")
113 input = strings.ReplaceAll(input, " | ", "|")
114
115 // Add spaces around parentheses to help the parser
116 input = strings.ReplaceAll(input, "(", " ( ")
117 input = strings.ReplaceAll(input, ")", " ) ")
118
119 // Remove any extra spaces
120 input = strings.TrimSpace(input)
121
122 grammar, err := p.parser.ParseString("", input)
123 if err != nil {
124 return nil, fmt.Errorf("failed to parse grammar: %w", err)
125 }
126
127 wrap, err := p.parseExpr(grammar.Token.Expr)
128 if err != nil {
129 return nil, err
130 }
131 return &ast.Token{Wrap: wrap}, nil
132}
133
134// parseExpr builds the AST from the parsed Expr
135func (p *GrammarParser) parseExpr(expr *Expr) (ast.Node, error) {
136 var operands []ast.Node
137 var operators []string
138
139 // Parse the first term
140 first, err := p.parseTerm(expr.First)
141 if err != nil {
142 return nil, err
143 }
144 operands = append(operands, first)
145
146 // Parse the rest
147 for _, op := range expr.Rest {
148 node, err := p.parseTerm(op.Term)
149 if err != nil {
150 return nil, err
151 }
152 operands = append(operands, node)
153 operators = append(operators, op.Operator)
154 }
155
156 // If only one operand, return it
157 if len(operands) == 1 {
158 return operands[0], nil
159 }
160
161 // Group operands by operator precedence (left-to-right, no precedence between & and |)
162 // We'll group by runs of the same operator
163 var groupOperands []ast.Node
164 var currentOp string
165 var currentGroup []ast.Node
166 for i, op := range operators {
167 if i == 0 {
168 currentOp = op
169 currentGroup = append(currentGroup, operands[i])
170 }
171 if op == currentOp {
172 currentGroup = append(currentGroup, operands[i+1])
173 } else {
174 groupOperands = append(groupOperands, &ast.TermGroup{
175 Operands: append([]ast.Node{}, currentGroup...),
176 Relation: toRelation(currentOp),
177 })
178 currentOp = op
179 currentGroup = []ast.Node{operands[i+1]}
180 }
181 }
182 if len(currentGroup) > 0 {
183 groupOperands = append(groupOperands, &ast.TermGroup{
184 Operands: append([]ast.Node{}, currentGroup...),
185 Relation: toRelation(currentOp),
186 })
187 }
188 if len(groupOperands) == 1 {
189 return groupOperands[0], nil
190 }
191 // If mixed operators, nest them left-to-right
192 result := groupOperands[0]
193 for i := 1; i < len(groupOperands); i++ {
194 result = &ast.TermGroup{
195 Operands: []ast.Node{result, groupOperands[i]},
196 Relation: toRelation(operators[0]),
197 }
198 }
199 return result, nil
200}
201
202// parseTerm converts a Term into an AST node
203func (p *GrammarParser) parseTerm(term *Term) (ast.Node, error) {
204 if term.Simple != nil {
205 return p.parseSimpleTerm(term.Simple)
206 }
207 if term.Paren != nil {
208 return p.parseExpr(term.Paren.Expr)
209 }
210 return nil, fmt.Errorf("invalid term: neither simple nor parenthesized")
211}
212
213func toRelation(op string) ast.RelationType {
214 if op == "|" {
215 return ast.OrRelation
216 }
217 return ast.AndRelation
218}
219
220// parseSimpleTerm converts a SimpleTerm into an AST Term node
221func (p *GrammarParser) parseSimpleTerm(term *SimpleTerm) (ast.Node, error) {
222 var foundry, layer, key, value string
223
224 switch {
225 case term.WithFoundryLayer != nil:
226 foundry = term.WithFoundryLayer.Foundry
227 layer = term.WithFoundryLayer.Layer
228 key = term.WithFoundryLayer.Key
229 value = term.WithFoundryLayer.Value
230 case term.WithFoundryKey != nil:
231 foundry = term.WithFoundryKey.Foundry
232 key = term.WithFoundryKey.Key
233 case term.WithLayer != nil:
234 layer = term.WithLayer.Layer
235 key = term.WithLayer.Key
236 value = term.WithLayer.Value
237 case term.SimpleKey != nil:
238 key = term.SimpleKey.Key
239 value = term.SimpleKey.Value
240 default:
241 return nil, fmt.Errorf("invalid term: no valid form found")
242 }
243
244 if foundry == "" {
245 foundry = p.defaultFoundry
246 }
247 if layer == "" {
248 layer = p.defaultLayer
249 }
250
251 return &ast.Term{
252 Foundry: foundry,
253 Key: key,
254 Layer: layer,
255 Match: ast.MatchEqual,
256 Value: value,
257 }, nil
258}
259
260// indexRune returns the index of the first instance of any of the runes in chars in s, or -1 if no rune is present.
261func indexRune(s string, chars ...rune) int {
262 for i, c := range s {
263 for _, ch := range chars {
264 if c == ch {
265 return i
266 }
267 }
268 }
269 return -1
270}