blob: 6e677b6f3072bbda4135ce1069b9a3e5a0027e0d [file] [log] [blame]
Akron22322ec2025-05-21 11:17:30 +02001package parser
2
3import (
4 "fmt"
5 "strings"
6
7 "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
8 "github.com/alecthomas/participle/v2"
9 "github.com/alecthomas/participle/v2/lexer"
10)
11
12// GrammarParser parses a simple grammar into AST nodes
13type GrammarParser struct {
14 defaultFoundry string
15 defaultLayer string
Akronbb5065f2025-05-21 12:44:05 +020016 tokenParser *participle.Parser[TokenGrammar]
17 mappingParser *participle.Parser[MappingGrammar]
Akron22322ec2025-05-21 11:17:30 +020018}
19
Akronbb5065f2025-05-21 12:44:05 +020020// TokenGrammar represents a single token expression
21type TokenGrammar struct {
22 Token *TokenExpr `parser:"@@"`
23}
24
25// MappingGrammar represents a mapping rule
26type MappingGrammar struct {
27 Mapping *MappingRule `parser:"@@"`
28}
29
30/*
Akron22322ec2025-05-21 11:17:30 +020031// Grammar represents the root of our grammar
32type Grammar struct {
Akronbb5065f2025-05-21 12:44:05 +020033 Token *TokenExpr `parser:" @@"`
34 Mapping *MappingRule `parser:"| @@"`
35}*/
36
37// MappingRule represents a mapping between two token expressions
38type MappingRule struct {
39 Upper *TokenExpr `parser:"@@"`
40 Lower *TokenExpr `parser:"'<>' @@"`
Akron22322ec2025-05-21 11:17:30 +020041}
42
43// TokenExpr represents a token expression in square brackets
44type TokenExpr struct {
Akronb40f5ac2025-05-21 11:22:33 +020045 Expr *Expr `parser:"'[' @@ ']'"`
Akron22322ec2025-05-21 11:17:30 +020046}
47
48// Expr represents a sequence of terms and operators
49type Expr struct {
Akronb40f5ac2025-05-21 11:22:33 +020050 First *Term `parser:"@@"`
51 Rest []*Op `parser:"@@*"`
Akron22322ec2025-05-21 11:17:30 +020052}
53
54type Op struct {
Akronb40f5ac2025-05-21 11:22:33 +020055 Operator string `parser:"@('&' | '|')"`
56 Term *Term `parser:"@@"`
Akron22322ec2025-05-21 11:17:30 +020057}
58
59// Term represents either a simple term or a parenthesized expression
60type Term struct {
Akronb40f5ac2025-05-21 11:22:33 +020061 Simple *SimpleTerm `parser:"@@"`
62 Paren *ParenExpr `parser:"| @@"`
Akron22322ec2025-05-21 11:17:30 +020063}
64
65type ParenExpr struct {
Akronb40f5ac2025-05-21 11:22:33 +020066 Expr *Expr `parser:"'(' @@ ')'"`
Akron22322ec2025-05-21 11:17:30 +020067}
68
69// SimpleTerm represents any valid term form
70type SimpleTerm struct {
Akronb40f5ac2025-05-21 11:22:33 +020071 WithFoundryLayer *FoundryLayerTerm `parser:"@@"`
72 WithFoundryKey *FoundryKeyTerm `parser:"| @@"`
73 WithLayer *LayerTerm `parser:"| @@"`
74 SimpleKey *KeyTerm `parser:"| @@"`
Akron22322ec2025-05-21 11:17:30 +020075}
76
77// FoundryLayerTerm represents foundry/layer=key:value
78type FoundryLayerTerm struct {
Akronb40f5ac2025-05-21 11:22:33 +020079 Foundry string `parser:"@Ident '/'"`
80 Layer string `parser:"@Ident '='"`
81 Key string `parser:"@Ident"`
82 Value string `parser:"(':' @Ident)?"`
Akron22322ec2025-05-21 11:17:30 +020083}
84
85// FoundryKeyTerm represents foundry/key
86type FoundryKeyTerm struct {
Akronb40f5ac2025-05-21 11:22:33 +020087 Foundry string `parser:"@Ident '/'"`
88 Key string `parser:"@Ident"`
Akron22322ec2025-05-21 11:17:30 +020089}
90
91// LayerTerm represents layer=key:value
92type LayerTerm struct {
Akronb40f5ac2025-05-21 11:22:33 +020093 Layer string `parser:"@Ident '='"`
94 Key string `parser:"@Ident"`
95 Value string `parser:"(':' @Ident)?"`
Akron22322ec2025-05-21 11:17:30 +020096}
97
98// KeyTerm represents key:value
99type KeyTerm struct {
Akronb40f5ac2025-05-21 11:22:33 +0200100 Key string `parser:"@Ident"`
101 Value string `parser:"(':' @Ident)?"`
Akron22322ec2025-05-21 11:17:30 +0200102}
103
104// NewGrammarParser creates a new grammar parser with optional default foundry and layer
105func NewGrammarParser(defaultFoundry, defaultLayer string) (*GrammarParser, error) {
106 lex := lexer.MustSimple([]lexer.SimpleRule{
107 {Name: "Ident", Pattern: `[a-zA-Z][a-zA-Z0-9_]*`},
Akronbb5065f2025-05-21 12:44:05 +0200108 {Name: "Punct", Pattern: `[\[\]()&\|=:/]|<>`},
Akron22322ec2025-05-21 11:17:30 +0200109 {Name: "Whitespace", Pattern: `\s+`},
110 })
111
Akronbb5065f2025-05-21 12:44:05 +0200112 tokenParser, err := participle.Build[TokenGrammar](
Akron22322ec2025-05-21 11:17:30 +0200113 participle.Lexer(lex),
114 participle.UseLookahead(2),
115 participle.Elide("Whitespace"),
116 )
117 if err != nil {
Akronbb5065f2025-05-21 12:44:05 +0200118 return nil, fmt.Errorf("failed to build token parser: %w", err)
119 }
120
121 mappingParser, err := participle.Build[MappingGrammar](
122 participle.Lexer(lex),
123 participle.UseLookahead(2),
124 participle.Elide("Whitespace"),
125 )
126 if err != nil {
127 return nil, fmt.Errorf("failed to build mapping parser: %w", err)
Akron22322ec2025-05-21 11:17:30 +0200128 }
129
130 return &GrammarParser{
131 defaultFoundry: defaultFoundry,
132 defaultLayer: defaultLayer,
Akronbb5065f2025-05-21 12:44:05 +0200133 tokenParser: tokenParser,
134 mappingParser: mappingParser,
Akron22322ec2025-05-21 11:17:30 +0200135 }, nil
136}
137
Akronbb5065f2025-05-21 12:44:05 +0200138// Parse parses a grammar string into an AST node (for backward compatibility)
Akron22322ec2025-05-21 11:17:30 +0200139func (p *GrammarParser) Parse(input string) (ast.Node, error) {
140 // Remove extra spaces around operators to help the parser
141 input = strings.ReplaceAll(input, " & ", "&")
142 input = strings.ReplaceAll(input, " | ", "|")
143
144 // Add spaces around parentheses to help the parser
145 input = strings.ReplaceAll(input, "(", " ( ")
146 input = strings.ReplaceAll(input, ")", " ) ")
147
148 // Remove any extra spaces
149 input = strings.TrimSpace(input)
150
Akronbb5065f2025-05-21 12:44:05 +0200151 grammar, err := p.tokenParser.ParseString("", input)
Akron22322ec2025-05-21 11:17:30 +0200152 if err != nil {
153 return nil, fmt.Errorf("failed to parse grammar: %w", err)
154 }
155
Akronbb5065f2025-05-21 12:44:05 +0200156 if grammar.Token == nil {
157 return nil, fmt.Errorf("expected token expression, got mapping rule")
158 }
159
Akron22322ec2025-05-21 11:17:30 +0200160 wrap, err := p.parseExpr(grammar.Token.Expr)
161 if err != nil {
162 return nil, err
163 }
164 return &ast.Token{Wrap: wrap}, nil
165}
166
Akronbb5065f2025-05-21 12:44:05 +0200167// ParseMapping parses a mapping rule string into a MappingResult
168func (p *GrammarParser) ParseMapping(input string) (*MappingResult, error) {
169 // Remove extra spaces around operators to help the parser
170 input = strings.ReplaceAll(input, " & ", "&")
171 input = strings.ReplaceAll(input, " | ", "|")
172 input = strings.ReplaceAll(input, " <> ", "<>")
173
174 // Add spaces around parentheses to help the parser
175 input = strings.ReplaceAll(input, "(", " ( ")
176 input = strings.ReplaceAll(input, ")", " ) ")
177
178 // Remove any extra spaces
179 input = strings.TrimSpace(input)
180
181 grammar, err := p.mappingParser.ParseString("", input)
182 if err != nil {
183 return nil, fmt.Errorf("failed to parse grammar: %w", err)
184 }
185
186 if grammar.Mapping == nil {
187 return nil, fmt.Errorf("expected mapping rule, got token expression")
188 }
189
190 upper, err := p.parseExpr(grammar.Mapping.Upper.Expr)
191 if err != nil {
192 return nil, err
193 }
194
195 lower, err := p.parseExpr(grammar.Mapping.Lower.Expr)
196 if err != nil {
197 return nil, err
198 }
199
200 return &MappingResult{
201 Upper: &ast.Token{Wrap: upper},
202 Lower: &ast.Token{Wrap: lower},
203 }, nil
204}
205
206// MappingResult represents the parsed mapping rule
207type MappingResult struct {
208 Upper *ast.Token
209 Lower *ast.Token
210}
211
Akron22322ec2025-05-21 11:17:30 +0200212// parseExpr builds the AST from the parsed Expr
213func (p *GrammarParser) parseExpr(expr *Expr) (ast.Node, error) {
214 var operands []ast.Node
215 var operators []string
216
217 // Parse the first term
218 first, err := p.parseTerm(expr.First)
219 if err != nil {
220 return nil, err
221 }
222 operands = append(operands, first)
223
224 // Parse the rest
225 for _, op := range expr.Rest {
226 node, err := p.parseTerm(op.Term)
227 if err != nil {
228 return nil, err
229 }
230 operands = append(operands, node)
231 operators = append(operators, op.Operator)
232 }
233
234 // If only one operand, return it
235 if len(operands) == 1 {
236 return operands[0], nil
237 }
238
239 // Group operands by operator precedence (left-to-right, no precedence between & and |)
240 // We'll group by runs of the same operator
241 var groupOperands []ast.Node
242 var currentOp string
243 var currentGroup []ast.Node
244 for i, op := range operators {
245 if i == 0 {
246 currentOp = op
247 currentGroup = append(currentGroup, operands[i])
248 }
249 if op == currentOp {
250 currentGroup = append(currentGroup, operands[i+1])
251 } else {
252 groupOperands = append(groupOperands, &ast.TermGroup{
253 Operands: append([]ast.Node{}, currentGroup...),
254 Relation: toRelation(currentOp),
255 })
256 currentOp = op
257 currentGroup = []ast.Node{operands[i+1]}
258 }
259 }
260 if len(currentGroup) > 0 {
261 groupOperands = append(groupOperands, &ast.TermGroup{
262 Operands: append([]ast.Node{}, currentGroup...),
263 Relation: toRelation(currentOp),
264 })
265 }
266 if len(groupOperands) == 1 {
267 return groupOperands[0], nil
268 }
269 // If mixed operators, nest them left-to-right
270 result := groupOperands[0]
271 for i := 1; i < len(groupOperands); i++ {
272 result = &ast.TermGroup{
273 Operands: []ast.Node{result, groupOperands[i]},
274 Relation: toRelation(operators[0]),
275 }
276 }
277 return result, nil
278}
279
280// parseTerm converts a Term into an AST node
281func (p *GrammarParser) parseTerm(term *Term) (ast.Node, error) {
282 if term.Simple != nil {
283 return p.parseSimpleTerm(term.Simple)
284 }
285 if term.Paren != nil {
286 return p.parseExpr(term.Paren.Expr)
287 }
288 return nil, fmt.Errorf("invalid term: neither simple nor parenthesized")
289}
290
291func toRelation(op string) ast.RelationType {
292 if op == "|" {
293 return ast.OrRelation
294 }
295 return ast.AndRelation
296}
297
298// parseSimpleTerm converts a SimpleTerm into an AST Term node
299func (p *GrammarParser) parseSimpleTerm(term *SimpleTerm) (ast.Node, error) {
300 var foundry, layer, key, value string
301
302 switch {
303 case term.WithFoundryLayer != nil:
304 foundry = term.WithFoundryLayer.Foundry
305 layer = term.WithFoundryLayer.Layer
306 key = term.WithFoundryLayer.Key
307 value = term.WithFoundryLayer.Value
308 case term.WithFoundryKey != nil:
309 foundry = term.WithFoundryKey.Foundry
310 key = term.WithFoundryKey.Key
311 case term.WithLayer != nil:
312 layer = term.WithLayer.Layer
313 key = term.WithLayer.Key
314 value = term.WithLayer.Value
315 case term.SimpleKey != nil:
316 key = term.SimpleKey.Key
317 value = term.SimpleKey.Value
318 default:
319 return nil, fmt.Errorf("invalid term: no valid form found")
320 }
321
322 if foundry == "" {
323 foundry = p.defaultFoundry
324 }
325 if layer == "" {
326 layer = p.defaultLayer
327 }
328
329 return &ast.Term{
330 Foundry: foundry,
331 Key: key,
332 Layer: layer,
333 Match: ast.MatchEqual,
334 Value: value,
335 }, nil
336}