blob: 18cd5c70ff5ec2ec14229a5503495171e3873e29 [file] [log] [blame]
package parser
import (
"fmt"
"strings"
"github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
"github.com/alecthomas/participle/v2"
"github.com/alecthomas/participle/v2/lexer"
)
// GrammarParser parses a simple grammar into AST nodes
type GrammarParser struct {
defaultFoundry string
defaultLayer string
parser *participle.Parser[Grammar]
}
// Grammar represents the root of our grammar
type Grammar struct {
Token *TokenExpr `@@`
}
// TokenExpr represents a token expression in square brackets
type TokenExpr struct {
Expr *Expr `"[" @@ "]"`
}
// Expr represents a sequence of terms and operators
type Expr struct {
First *Term `@@`
Rest []*Op `@@*`
}
type Op struct {
Operator string `@("&" | "|")`
Term *Term `@@`
}
// Term represents either a simple term or a parenthesized expression
type Term struct {
Simple *SimpleTerm `@@`
Paren *ParenExpr `| @@`
}
type ParenExpr struct {
Expr *Expr `"(" @@ ")"`
}
// SimpleTerm represents any valid term form
type SimpleTerm struct {
WithFoundryLayer *FoundryLayerTerm `@@`
WithFoundryKey *FoundryKeyTerm `| @@`
WithLayer *LayerTerm `| @@`
SimpleKey *KeyTerm `| @@`
}
// FoundryLayerTerm represents foundry/layer=key:value
type FoundryLayerTerm struct {
Foundry string `@Ident "/"`
Layer string `@Ident "="`
Key string `@Ident`
Value string `(":" @Ident)?`
}
// FoundryKeyTerm represents foundry/key
type FoundryKeyTerm struct {
Foundry string `@Ident "/"`
Key string `@Ident`
}
// LayerTerm represents layer=key:value
type LayerTerm struct {
Layer string `@Ident "="`
Key string `@Ident`
Value string `(":" @Ident)?`
}
// KeyTerm represents key:value
type KeyTerm struct {
Key string `@Ident`
Value string `(":" @Ident)?`
}
// NewGrammarParser creates a new grammar parser with optional default foundry and layer
func NewGrammarParser(defaultFoundry, defaultLayer string) (*GrammarParser, error) {
lex := lexer.MustSimple([]lexer.SimpleRule{
{Name: "Ident", Pattern: `[a-zA-Z][a-zA-Z0-9_]*`},
{Name: "Punct", Pattern: `[\[\]()&\|=:/]`},
{Name: "Whitespace", Pattern: `\s+`},
})
parser, err := participle.Build[Grammar](
participle.Lexer(lex),
participle.UseLookahead(2),
participle.Elide("Whitespace"),
)
if err != nil {
return nil, fmt.Errorf("failed to build parser: %w", err)
}
return &GrammarParser{
defaultFoundry: defaultFoundry,
defaultLayer: defaultLayer,
parser: parser,
}, nil
}
// Parse parses a grammar string into an AST node
func (p *GrammarParser) Parse(input string) (ast.Node, error) {
// Remove extra spaces around operators to help the parser
input = strings.ReplaceAll(input, " & ", "&")
input = strings.ReplaceAll(input, " | ", "|")
// Add spaces around parentheses to help the parser
input = strings.ReplaceAll(input, "(", " ( ")
input = strings.ReplaceAll(input, ")", " ) ")
// Remove any extra spaces
input = strings.TrimSpace(input)
grammar, err := p.parser.ParseString("", input)
if err != nil {
return nil, fmt.Errorf("failed to parse grammar: %w", err)
}
wrap, err := p.parseExpr(grammar.Token.Expr)
if err != nil {
return nil, err
}
return &ast.Token{Wrap: wrap}, nil
}
// parseExpr builds the AST from the parsed Expr
func (p *GrammarParser) parseExpr(expr *Expr) (ast.Node, error) {
var operands []ast.Node
var operators []string
// Parse the first term
first, err := p.parseTerm(expr.First)
if err != nil {
return nil, err
}
operands = append(operands, first)
// Parse the rest
for _, op := range expr.Rest {
node, err := p.parseTerm(op.Term)
if err != nil {
return nil, err
}
operands = append(operands, node)
operators = append(operators, op.Operator)
}
// If only one operand, return it
if len(operands) == 1 {
return operands[0], nil
}
// Group operands by operator precedence (left-to-right, no precedence between & and |)
// We'll group by runs of the same operator
var groupOperands []ast.Node
var currentOp string
var currentGroup []ast.Node
for i, op := range operators {
if i == 0 {
currentOp = op
currentGroup = append(currentGroup, operands[i])
}
if op == currentOp {
currentGroup = append(currentGroup, operands[i+1])
} else {
groupOperands = append(groupOperands, &ast.TermGroup{
Operands: append([]ast.Node{}, currentGroup...),
Relation: toRelation(currentOp),
})
currentOp = op
currentGroup = []ast.Node{operands[i+1]}
}
}
if len(currentGroup) > 0 {
groupOperands = append(groupOperands, &ast.TermGroup{
Operands: append([]ast.Node{}, currentGroup...),
Relation: toRelation(currentOp),
})
}
if len(groupOperands) == 1 {
return groupOperands[0], nil
}
// If mixed operators, nest them left-to-right
result := groupOperands[0]
for i := 1; i < len(groupOperands); i++ {
result = &ast.TermGroup{
Operands: []ast.Node{result, groupOperands[i]},
Relation: toRelation(operators[0]),
}
}
return result, nil
}
// parseTerm converts a Term into an AST node
func (p *GrammarParser) parseTerm(term *Term) (ast.Node, error) {
if term.Simple != nil {
return p.parseSimpleTerm(term.Simple)
}
if term.Paren != nil {
return p.parseExpr(term.Paren.Expr)
}
return nil, fmt.Errorf("invalid term: neither simple nor parenthesized")
}
func toRelation(op string) ast.RelationType {
if op == "|" {
return ast.OrRelation
}
return ast.AndRelation
}
// parseSimpleTerm converts a SimpleTerm into an AST Term node
func (p *GrammarParser) parseSimpleTerm(term *SimpleTerm) (ast.Node, error) {
var foundry, layer, key, value string
switch {
case term.WithFoundryLayer != nil:
foundry = term.WithFoundryLayer.Foundry
layer = term.WithFoundryLayer.Layer
key = term.WithFoundryLayer.Key
value = term.WithFoundryLayer.Value
case term.WithFoundryKey != nil:
foundry = term.WithFoundryKey.Foundry
key = term.WithFoundryKey.Key
case term.WithLayer != nil:
layer = term.WithLayer.Layer
key = term.WithLayer.Key
value = term.WithLayer.Value
case term.SimpleKey != nil:
key = term.SimpleKey.Key
value = term.SimpleKey.Value
default:
return nil, fmt.Errorf("invalid term: no valid form found")
}
if foundry == "" {
foundry = p.defaultFoundry
}
if layer == "" {
layer = p.defaultLayer
}
return &ast.Term{
Foundry: foundry,
Key: key,
Layer: layer,
Match: ast.MatchEqual,
Value: value,
}, nil
}
// indexRune returns the index of the first instance of any of the runes in chars in s, or -1 if no rune is present.
func indexRune(s string, chars ...rune) int {
for i, c := range s {
for _, ch := range chars {
if c == ch {
return i
}
}
}
return -1
}