blob: 3124b828c098e3638016cd0865a6307d67390dd4 [file] [log] [blame]
package parser
import (
"fmt"
"strings"
"github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
"github.com/alecthomas/participle/v2"
"github.com/alecthomas/participle/v2/lexer"
)
// GrammarParser parses a simple grammar into AST nodes
type GrammarParser struct {
defaultFoundry string
defaultLayer string
parser *participle.Parser[Grammar]
}
// Grammar represents the root of our grammar
type Grammar struct {
Token *TokenExpr `parser:"@@"`
}
// TokenExpr represents a token expression in square brackets
type TokenExpr struct {
Expr *Expr `parser:"'[' @@ ']'"`
}
// Expr represents a sequence of terms and operators
type Expr struct {
First *Term `parser:"@@"`
Rest []*Op `parser:"@@*"`
}
type Op struct {
Operator string `parser:"@('&' | '|')"`
Term *Term `parser:"@@"`
}
// Term represents either a simple term or a parenthesized expression
type Term struct {
Simple *SimpleTerm `parser:"@@"`
Paren *ParenExpr `parser:"| @@"`
}
type ParenExpr struct {
Expr *Expr `parser:"'(' @@ ')'"`
}
// SimpleTerm represents any valid term form
type SimpleTerm struct {
WithFoundryLayer *FoundryLayerTerm `parser:"@@"`
WithFoundryKey *FoundryKeyTerm `parser:"| @@"`
WithLayer *LayerTerm `parser:"| @@"`
SimpleKey *KeyTerm `parser:"| @@"`
}
// FoundryLayerTerm represents foundry/layer=key:value
type FoundryLayerTerm struct {
Foundry string `parser:"@Ident '/'"`
Layer string `parser:"@Ident '='"`
Key string `parser:"@Ident"`
Value string `parser:"(':' @Ident)?"`
}
// FoundryKeyTerm represents foundry/key
type FoundryKeyTerm struct {
Foundry string `parser:"@Ident '/'"`
Key string `parser:"@Ident"`
}
// LayerTerm represents layer=key:value
type LayerTerm struct {
Layer string `parser:"@Ident '='"`
Key string `parser:"@Ident"`
Value string `parser:"(':' @Ident)?"`
}
// KeyTerm represents key:value
type KeyTerm struct {
Key string `parser:"@Ident"`
Value string `parser:"(':' @Ident)?"`
}
// NewGrammarParser creates a new grammar parser with optional default foundry and layer
func NewGrammarParser(defaultFoundry, defaultLayer string) (*GrammarParser, error) {
lex := lexer.MustSimple([]lexer.SimpleRule{
{Name: "Ident", Pattern: `[a-zA-Z][a-zA-Z0-9_]*`},
{Name: "Punct", Pattern: `[\[\]()&\|=:/]`},
{Name: "Whitespace", Pattern: `\s+`},
})
parser, err := participle.Build[Grammar](
participle.Lexer(lex),
participle.UseLookahead(2),
participle.Elide("Whitespace"),
)
if err != nil {
return nil, fmt.Errorf("failed to build parser: %w", err)
}
return &GrammarParser{
defaultFoundry: defaultFoundry,
defaultLayer: defaultLayer,
parser: parser,
}, nil
}
// Parse parses a grammar string into an AST node
func (p *GrammarParser) Parse(input string) (ast.Node, error) {
// Remove extra spaces around operators to help the parser
input = strings.ReplaceAll(input, " & ", "&")
input = strings.ReplaceAll(input, " | ", "|")
// Add spaces around parentheses to help the parser
input = strings.ReplaceAll(input, "(", " ( ")
input = strings.ReplaceAll(input, ")", " ) ")
// Remove any extra spaces
input = strings.TrimSpace(input)
grammar, err := p.parser.ParseString("", input)
if err != nil {
return nil, fmt.Errorf("failed to parse grammar: %w", err)
}
wrap, err := p.parseExpr(grammar.Token.Expr)
if err != nil {
return nil, err
}
return &ast.Token{Wrap: wrap}, nil
}
// parseExpr builds the AST from the parsed Expr
func (p *GrammarParser) parseExpr(expr *Expr) (ast.Node, error) {
var operands []ast.Node
var operators []string
// Parse the first term
first, err := p.parseTerm(expr.First)
if err != nil {
return nil, err
}
operands = append(operands, first)
// Parse the rest
for _, op := range expr.Rest {
node, err := p.parseTerm(op.Term)
if err != nil {
return nil, err
}
operands = append(operands, node)
operators = append(operators, op.Operator)
}
// If only one operand, return it
if len(operands) == 1 {
return operands[0], nil
}
// Group operands by operator precedence (left-to-right, no precedence between & and |)
// We'll group by runs of the same operator
var groupOperands []ast.Node
var currentOp string
var currentGroup []ast.Node
for i, op := range operators {
if i == 0 {
currentOp = op
currentGroup = append(currentGroup, operands[i])
}
if op == currentOp {
currentGroup = append(currentGroup, operands[i+1])
} else {
groupOperands = append(groupOperands, &ast.TermGroup{
Operands: append([]ast.Node{}, currentGroup...),
Relation: toRelation(currentOp),
})
currentOp = op
currentGroup = []ast.Node{operands[i+1]}
}
}
if len(currentGroup) > 0 {
groupOperands = append(groupOperands, &ast.TermGroup{
Operands: append([]ast.Node{}, currentGroup...),
Relation: toRelation(currentOp),
})
}
if len(groupOperands) == 1 {
return groupOperands[0], nil
}
// If mixed operators, nest them left-to-right
result := groupOperands[0]
for i := 1; i < len(groupOperands); i++ {
result = &ast.TermGroup{
Operands: []ast.Node{result, groupOperands[i]},
Relation: toRelation(operators[0]),
}
}
return result, nil
}
// parseTerm converts a Term into an AST node
func (p *GrammarParser) parseTerm(term *Term) (ast.Node, error) {
if term.Simple != nil {
return p.parseSimpleTerm(term.Simple)
}
if term.Paren != nil {
return p.parseExpr(term.Paren.Expr)
}
return nil, fmt.Errorf("invalid term: neither simple nor parenthesized")
}
func toRelation(op string) ast.RelationType {
if op == "|" {
return ast.OrRelation
}
return ast.AndRelation
}
// parseSimpleTerm converts a SimpleTerm into an AST Term node
func (p *GrammarParser) parseSimpleTerm(term *SimpleTerm) (ast.Node, error) {
var foundry, layer, key, value string
switch {
case term.WithFoundryLayer != nil:
foundry = term.WithFoundryLayer.Foundry
layer = term.WithFoundryLayer.Layer
key = term.WithFoundryLayer.Key
value = term.WithFoundryLayer.Value
case term.WithFoundryKey != nil:
foundry = term.WithFoundryKey.Foundry
key = term.WithFoundryKey.Key
case term.WithLayer != nil:
layer = term.WithLayer.Layer
key = term.WithLayer.Key
value = term.WithLayer.Value
case term.SimpleKey != nil:
key = term.SimpleKey.Key
value = term.SimpleKey.Value
default:
return nil, fmt.Errorf("invalid term: no valid form found")
}
if foundry == "" {
foundry = p.defaultFoundry
}
if layer == "" {
layer = p.defaultLayer
}
return &ast.Term{
Foundry: foundry,
Key: key,
Layer: layer,
Match: ast.MatchEqual,
Value: value,
}, nil
}