blob: 8e466118d4def3244528b28504c12e47ec547a48 [file] [log] [blame]
Akron22322ec2025-05-21 11:17:30 +02001package parser
2
3import (
4 "fmt"
5 "strings"
6
Akronfa55bb22025-05-26 15:10:42 +02007 "github.com/KorAP/KoralPipe-TermMapper/ast"
Akron22322ec2025-05-21 11:17:30 +02008 "github.com/alecthomas/participle/v2"
9 "github.com/alecthomas/participle/v2/lexer"
10)
11
12// GrammarParser parses a simple grammar into AST nodes
13type GrammarParser struct {
14 defaultFoundry string
15 defaultLayer string
Akronbb5065f2025-05-21 12:44:05 +020016 tokenParser *participle.Parser[TokenGrammar]
17 mappingParser *participle.Parser[MappingGrammar]
Akron22322ec2025-05-21 11:17:30 +020018}
19
Akronbb5065f2025-05-21 12:44:05 +020020// TokenGrammar represents a single token expression
21type TokenGrammar struct {
22 Token *TokenExpr `parser:"@@"`
23}
24
25// MappingGrammar represents a mapping rule
26type MappingGrammar struct {
27 Mapping *MappingRule `parser:"@@"`
28}
29
Akronbb5065f2025-05-21 12:44:05 +020030// MappingRule represents a mapping between two token expressions
31type MappingRule struct {
32 Upper *TokenExpr `parser:"@@"`
33 Lower *TokenExpr `parser:"'<>' @@"`
Akron22322ec2025-05-21 11:17:30 +020034}
35
36// TokenExpr represents a token expression in square brackets
37type TokenExpr struct {
Akronb40f5ac2025-05-21 11:22:33 +020038 Expr *Expr `parser:"'[' @@ ']'"`
Akron22322ec2025-05-21 11:17:30 +020039}
40
41// Expr represents a sequence of terms and operators
42type Expr struct {
Akronb40f5ac2025-05-21 11:22:33 +020043 First *Term `parser:"@@"`
44 Rest []*Op `parser:"@@*"`
Akron22322ec2025-05-21 11:17:30 +020045}
46
47type Op struct {
Akronb40f5ac2025-05-21 11:22:33 +020048 Operator string `parser:"@('&' | '|')"`
49 Term *Term `parser:"@@"`
Akron22322ec2025-05-21 11:17:30 +020050}
51
52// Term represents either a simple term or a parenthesized expression
53type Term struct {
Akronb40f5ac2025-05-21 11:22:33 +020054 Simple *SimpleTerm `parser:"@@"`
55 Paren *ParenExpr `parser:"| @@"`
Akron22322ec2025-05-21 11:17:30 +020056}
57
58type ParenExpr struct {
Akronb40f5ac2025-05-21 11:22:33 +020059 Expr *Expr `parser:"'(' @@ ')'"`
Akron22322ec2025-05-21 11:17:30 +020060}
61
62// SimpleTerm represents any valid term form
63type SimpleTerm struct {
Akronb40f5ac2025-05-21 11:22:33 +020064 WithFoundryLayer *FoundryLayerTerm `parser:"@@"`
65 WithFoundryKey *FoundryKeyTerm `parser:"| @@"`
66 WithLayer *LayerTerm `parser:"| @@"`
67 SimpleKey *KeyTerm `parser:"| @@"`
Akron22322ec2025-05-21 11:17:30 +020068}
69
70// FoundryLayerTerm represents foundry/layer=key:value
71type FoundryLayerTerm struct {
Akronb40f5ac2025-05-21 11:22:33 +020072 Foundry string `parser:"@Ident '/'"`
73 Layer string `parser:"@Ident '='"`
74 Key string `parser:"@Ident"`
75 Value string `parser:"(':' @Ident)?"`
Akron22322ec2025-05-21 11:17:30 +020076}
77
78// FoundryKeyTerm represents foundry/key
79type FoundryKeyTerm struct {
Akronb40f5ac2025-05-21 11:22:33 +020080 Foundry string `parser:"@Ident '/'"`
81 Key string `parser:"@Ident"`
Akron22322ec2025-05-21 11:17:30 +020082}
83
84// LayerTerm represents layer=key:value
85type LayerTerm struct {
Akronb40f5ac2025-05-21 11:22:33 +020086 Layer string `parser:"@Ident '='"`
87 Key string `parser:"@Ident"`
88 Value string `parser:"(':' @Ident)?"`
Akron22322ec2025-05-21 11:17:30 +020089}
90
91// KeyTerm represents key:value
92type KeyTerm struct {
Akronb40f5ac2025-05-21 11:22:33 +020093 Key string `parser:"@Ident"`
94 Value string `parser:"(':' @Ident)?"`
Akron22322ec2025-05-21 11:17:30 +020095}
96
97// NewGrammarParser creates a new grammar parser with optional default foundry and layer
98func NewGrammarParser(defaultFoundry, defaultLayer string) (*GrammarParser, error) {
99 lex := lexer.MustSimple([]lexer.SimpleRule{
100 {Name: "Ident", Pattern: `[a-zA-Z][a-zA-Z0-9_]*`},
Akronbb5065f2025-05-21 12:44:05 +0200101 {Name: "Punct", Pattern: `[\[\]()&\|=:/]|<>`},
Akron22322ec2025-05-21 11:17:30 +0200102 {Name: "Whitespace", Pattern: `\s+`},
103 })
104
Akronbb5065f2025-05-21 12:44:05 +0200105 tokenParser, err := participle.Build[TokenGrammar](
Akron22322ec2025-05-21 11:17:30 +0200106 participle.Lexer(lex),
107 participle.UseLookahead(2),
108 participle.Elide("Whitespace"),
109 )
110 if err != nil {
Akronbb5065f2025-05-21 12:44:05 +0200111 return nil, fmt.Errorf("failed to build token parser: %w", err)
112 }
113
114 mappingParser, err := participle.Build[MappingGrammar](
115 participle.Lexer(lex),
116 participle.UseLookahead(2),
117 participle.Elide("Whitespace"),
118 )
119 if err != nil {
120 return nil, fmt.Errorf("failed to build mapping parser: %w", err)
Akron22322ec2025-05-21 11:17:30 +0200121 }
122
123 return &GrammarParser{
124 defaultFoundry: defaultFoundry,
125 defaultLayer: defaultLayer,
Akronbb5065f2025-05-21 12:44:05 +0200126 tokenParser: tokenParser,
127 mappingParser: mappingParser,
Akron22322ec2025-05-21 11:17:30 +0200128 }, nil
129}
130
Akronbb5065f2025-05-21 12:44:05 +0200131// Parse parses a grammar string into an AST node (for backward compatibility)
Akron22322ec2025-05-21 11:17:30 +0200132func (p *GrammarParser) Parse(input string) (ast.Node, error) {
133 // Remove extra spaces around operators to help the parser
134 input = strings.ReplaceAll(input, " & ", "&")
135 input = strings.ReplaceAll(input, " | ", "|")
136
137 // Add spaces around parentheses to help the parser
138 input = strings.ReplaceAll(input, "(", " ( ")
139 input = strings.ReplaceAll(input, ")", " ) ")
140
141 // Remove any extra spaces
142 input = strings.TrimSpace(input)
143
Akronbb5065f2025-05-21 12:44:05 +0200144 grammar, err := p.tokenParser.ParseString("", input)
Akron22322ec2025-05-21 11:17:30 +0200145 if err != nil {
146 return nil, fmt.Errorf("failed to parse grammar: %w", err)
147 }
148
Akronbb5065f2025-05-21 12:44:05 +0200149 if grammar.Token == nil {
150 return nil, fmt.Errorf("expected token expression, got mapping rule")
151 }
152
Akron22322ec2025-05-21 11:17:30 +0200153 wrap, err := p.parseExpr(grammar.Token.Expr)
154 if err != nil {
155 return nil, err
156 }
157 return &ast.Token{Wrap: wrap}, nil
158}
159
Akronbb5065f2025-05-21 12:44:05 +0200160// ParseMapping parses a mapping rule string into a MappingResult
161func (p *GrammarParser) ParseMapping(input string) (*MappingResult, error) {
162 // Remove extra spaces around operators to help the parser
163 input = strings.ReplaceAll(input, " & ", "&")
164 input = strings.ReplaceAll(input, " | ", "|")
165 input = strings.ReplaceAll(input, " <> ", "<>")
166
167 // Add spaces around parentheses to help the parser
168 input = strings.ReplaceAll(input, "(", " ( ")
169 input = strings.ReplaceAll(input, ")", " ) ")
170
171 // Remove any extra spaces
172 input = strings.TrimSpace(input)
173
174 grammar, err := p.mappingParser.ParseString("", input)
175 if err != nil {
176 return nil, fmt.Errorf("failed to parse grammar: %w", err)
177 }
178
179 if grammar.Mapping == nil {
180 return nil, fmt.Errorf("expected mapping rule, got token expression")
181 }
182
183 upper, err := p.parseExpr(grammar.Mapping.Upper.Expr)
184 if err != nil {
185 return nil, err
186 }
187
188 lower, err := p.parseExpr(grammar.Mapping.Lower.Expr)
189 if err != nil {
190 return nil, err
191 }
192
193 return &MappingResult{
194 Upper: &ast.Token{Wrap: upper},
195 Lower: &ast.Token{Wrap: lower},
196 }, nil
197}
198
199// MappingResult represents the parsed mapping rule
200type MappingResult struct {
201 Upper *ast.Token
202 Lower *ast.Token
203}
204
Akron22322ec2025-05-21 11:17:30 +0200205// parseExpr builds the AST from the parsed Expr
206func (p *GrammarParser) parseExpr(expr *Expr) (ast.Node, error) {
207 var operands []ast.Node
208 var operators []string
209
210 // Parse the first term
211 first, err := p.parseTerm(expr.First)
212 if err != nil {
213 return nil, err
214 }
215 operands = append(operands, first)
216
217 // Parse the rest
218 for _, op := range expr.Rest {
219 node, err := p.parseTerm(op.Term)
220 if err != nil {
221 return nil, err
222 }
223 operands = append(operands, node)
224 operators = append(operators, op.Operator)
225 }
226
227 // If only one operand, return it
228 if len(operands) == 1 {
229 return operands[0], nil
230 }
231
232 // Group operands by operator precedence (left-to-right, no precedence between & and |)
233 // We'll group by runs of the same operator
234 var groupOperands []ast.Node
235 var currentOp string
236 var currentGroup []ast.Node
237 for i, op := range operators {
238 if i == 0 {
239 currentOp = op
240 currentGroup = append(currentGroup, operands[i])
241 }
242 if op == currentOp {
243 currentGroup = append(currentGroup, operands[i+1])
244 } else {
245 groupOperands = append(groupOperands, &ast.TermGroup{
246 Operands: append([]ast.Node{}, currentGroup...),
247 Relation: toRelation(currentOp),
248 })
249 currentOp = op
250 currentGroup = []ast.Node{operands[i+1]}
251 }
252 }
253 if len(currentGroup) > 0 {
254 groupOperands = append(groupOperands, &ast.TermGroup{
255 Operands: append([]ast.Node{}, currentGroup...),
256 Relation: toRelation(currentOp),
257 })
258 }
259 if len(groupOperands) == 1 {
260 return groupOperands[0], nil
261 }
262 // If mixed operators, nest them left-to-right
263 result := groupOperands[0]
264 for i := 1; i < len(groupOperands); i++ {
265 result = &ast.TermGroup{
266 Operands: []ast.Node{result, groupOperands[i]},
267 Relation: toRelation(operators[0]),
268 }
269 }
270 return result, nil
271}
272
273// parseTerm converts a Term into an AST node
274func (p *GrammarParser) parseTerm(term *Term) (ast.Node, error) {
275 if term.Simple != nil {
276 return p.parseSimpleTerm(term.Simple)
277 }
278 if term.Paren != nil {
279 return p.parseExpr(term.Paren.Expr)
280 }
281 return nil, fmt.Errorf("invalid term: neither simple nor parenthesized")
282}
283
284func toRelation(op string) ast.RelationType {
285 if op == "|" {
286 return ast.OrRelation
287 }
288 return ast.AndRelation
289}
290
291// parseSimpleTerm converts a SimpleTerm into an AST Term node
292func (p *GrammarParser) parseSimpleTerm(term *SimpleTerm) (ast.Node, error) {
293 var foundry, layer, key, value string
294
295 switch {
296 case term.WithFoundryLayer != nil:
297 foundry = term.WithFoundryLayer.Foundry
298 layer = term.WithFoundryLayer.Layer
299 key = term.WithFoundryLayer.Key
300 value = term.WithFoundryLayer.Value
301 case term.WithFoundryKey != nil:
302 foundry = term.WithFoundryKey.Foundry
303 key = term.WithFoundryKey.Key
304 case term.WithLayer != nil:
305 layer = term.WithLayer.Layer
306 key = term.WithLayer.Key
307 value = term.WithLayer.Value
308 case term.SimpleKey != nil:
309 key = term.SimpleKey.Key
310 value = term.SimpleKey.Value
311 default:
312 return nil, fmt.Errorf("invalid term: no valid form found")
313 }
314
315 if foundry == "" {
316 foundry = p.defaultFoundry
317 }
318 if layer == "" {
319 layer = p.defaultLayer
320 }
321
322 return &ast.Term{
323 Foundry: foundry,
324 Key: key,
325 Layer: layer,
326 Match: ast.MatchEqual,
327 Value: value,
328 }, nil
329}