Remove pkg subfolder
diff --git a/parser/grammar_parser.go b/parser/grammar_parser.go
new file mode 100644
index 0000000..3701d49
--- /dev/null
+++ b/parser/grammar_parser.go
@@ -0,0 +1,336 @@
+package parser
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/ast"
+ "github.com/alecthomas/participle/v2"
+ "github.com/alecthomas/participle/v2/lexer"
+)
+
+// GrammarParser parses a simple grammar into AST nodes
+type GrammarParser struct {
+ defaultFoundry string
+ defaultLayer string
+ tokenParser *participle.Parser[TokenGrammar]
+ mappingParser *participle.Parser[MappingGrammar]
+}
+
+// TokenGrammar represents a single token expression
+type TokenGrammar struct {
+ Token *TokenExpr `parser:"@@"`
+}
+
+// MappingGrammar represents a mapping rule
+type MappingGrammar struct {
+ Mapping *MappingRule `parser:"@@"`
+}
+
+/*
+// Grammar represents the root of our grammar
+type Grammar struct {
+ Token *TokenExpr `parser:" @@"`
+ Mapping *MappingRule `parser:"| @@"`
+}*/
+
+// MappingRule represents a mapping between two token expressions
+type MappingRule struct {
+ Upper *TokenExpr `parser:"@@"`
+ Lower *TokenExpr `parser:"'<>' @@"`
+}
+
+// TokenExpr represents a token expression in square brackets
+type TokenExpr struct {
+ Expr *Expr `parser:"'[' @@ ']'"`
+}
+
+// Expr represents a sequence of terms and operators
+type Expr struct {
+ First *Term `parser:"@@"`
+ Rest []*Op `parser:"@@*"`
+}
+
+type Op struct {
+ Operator string `parser:"@('&' | '|')"`
+ Term *Term `parser:"@@"`
+}
+
+// Term represents either a simple term or a parenthesized expression
+type Term struct {
+ Simple *SimpleTerm `parser:"@@"`
+ Paren *ParenExpr `parser:"| @@"`
+}
+
+type ParenExpr struct {
+ Expr *Expr `parser:"'(' @@ ')'"`
+}
+
+// SimpleTerm represents any valid term form
+type SimpleTerm struct {
+ WithFoundryLayer *FoundryLayerTerm `parser:"@@"`
+ WithFoundryKey *FoundryKeyTerm `parser:"| @@"`
+ WithLayer *LayerTerm `parser:"| @@"`
+ SimpleKey *KeyTerm `parser:"| @@"`
+}
+
+// FoundryLayerTerm represents foundry/layer=key:value
+type FoundryLayerTerm struct {
+ Foundry string `parser:"@Ident '/'"`
+ Layer string `parser:"@Ident '='"`
+ Key string `parser:"@Ident"`
+ Value string `parser:"(':' @Ident)?"`
+}
+
+// FoundryKeyTerm represents foundry/key
+type FoundryKeyTerm struct {
+ Foundry string `parser:"@Ident '/'"`
+ Key string `parser:"@Ident"`
+}
+
+// LayerTerm represents layer=key:value
+type LayerTerm struct {
+ Layer string `parser:"@Ident '='"`
+ Key string `parser:"@Ident"`
+ Value string `parser:"(':' @Ident)?"`
+}
+
+// KeyTerm represents key:value
+type KeyTerm struct {
+ Key string `parser:"@Ident"`
+ Value string `parser:"(':' @Ident)?"`
+}
+
+// NewGrammarParser creates a new grammar parser with optional default foundry and layer
+func NewGrammarParser(defaultFoundry, defaultLayer string) (*GrammarParser, error) {
+ lex := lexer.MustSimple([]lexer.SimpleRule{
+ {Name: "Ident", Pattern: `[a-zA-Z][a-zA-Z0-9_]*`},
+ {Name: "Punct", Pattern: `[\[\]()&\|=:/]|<>`},
+ {Name: "Whitespace", Pattern: `\s+`},
+ })
+
+ tokenParser, err := participle.Build[TokenGrammar](
+ participle.Lexer(lex),
+ participle.UseLookahead(2),
+ participle.Elide("Whitespace"),
+ )
+ if err != nil {
+ return nil, fmt.Errorf("failed to build token parser: %w", err)
+ }
+
+ mappingParser, err := participle.Build[MappingGrammar](
+ participle.Lexer(lex),
+ participle.UseLookahead(2),
+ participle.Elide("Whitespace"),
+ )
+ if err != nil {
+ return nil, fmt.Errorf("failed to build mapping parser: %w", err)
+ }
+
+ return &GrammarParser{
+ defaultFoundry: defaultFoundry,
+ defaultLayer: defaultLayer,
+ tokenParser: tokenParser,
+ mappingParser: mappingParser,
+ }, nil
+}
+
+// Parse parses a grammar string into an AST node (for backward compatibility)
+func (p *GrammarParser) Parse(input string) (ast.Node, error) {
+ // Remove extra spaces around operators to help the parser
+ input = strings.ReplaceAll(input, " & ", "&")
+ input = strings.ReplaceAll(input, " | ", "|")
+
+ // Add spaces around parentheses to help the parser
+ input = strings.ReplaceAll(input, "(", " ( ")
+ input = strings.ReplaceAll(input, ")", " ) ")
+
+ // Remove any extra spaces
+ input = strings.TrimSpace(input)
+
+ grammar, err := p.tokenParser.ParseString("", input)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse grammar: %w", err)
+ }
+
+ if grammar.Token == nil {
+ return nil, fmt.Errorf("expected token expression, got mapping rule")
+ }
+
+ wrap, err := p.parseExpr(grammar.Token.Expr)
+ if err != nil {
+ return nil, err
+ }
+ return &ast.Token{Wrap: wrap}, nil
+}
+
+// ParseMapping parses a mapping rule string into a MappingResult
+func (p *GrammarParser) ParseMapping(input string) (*MappingResult, error) {
+ // Remove extra spaces around operators to help the parser
+ input = strings.ReplaceAll(input, " & ", "&")
+ input = strings.ReplaceAll(input, " | ", "|")
+ input = strings.ReplaceAll(input, " <> ", "<>")
+
+ // Add spaces around parentheses to help the parser
+ input = strings.ReplaceAll(input, "(", " ( ")
+ input = strings.ReplaceAll(input, ")", " ) ")
+
+ // Remove any extra spaces
+ input = strings.TrimSpace(input)
+
+ grammar, err := p.mappingParser.ParseString("", input)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse grammar: %w", err)
+ }
+
+ if grammar.Mapping == nil {
+ return nil, fmt.Errorf("expected mapping rule, got token expression")
+ }
+
+ upper, err := p.parseExpr(grammar.Mapping.Upper.Expr)
+ if err != nil {
+ return nil, err
+ }
+
+ lower, err := p.parseExpr(grammar.Mapping.Lower.Expr)
+ if err != nil {
+ return nil, err
+ }
+
+ return &MappingResult{
+ Upper: &ast.Token{Wrap: upper},
+ Lower: &ast.Token{Wrap: lower},
+ }, nil
+}
+
+// MappingResult represents the parsed mapping rule
+type MappingResult struct {
+ Upper *ast.Token
+ Lower *ast.Token
+}
+
+// parseExpr builds the AST from the parsed Expr
+func (p *GrammarParser) parseExpr(expr *Expr) (ast.Node, error) {
+ var operands []ast.Node
+ var operators []string
+
+ // Parse the first term
+ first, err := p.parseTerm(expr.First)
+ if err != nil {
+ return nil, err
+ }
+ operands = append(operands, first)
+
+ // Parse the rest
+ for _, op := range expr.Rest {
+ node, err := p.parseTerm(op.Term)
+ if err != nil {
+ return nil, err
+ }
+ operands = append(operands, node)
+ operators = append(operators, op.Operator)
+ }
+
+ // If only one operand, return it
+ if len(operands) == 1 {
+ return operands[0], nil
+ }
+
+ // Group operands by operator precedence (left-to-right, no precedence between & and |)
+ // We'll group by runs of the same operator
+ var groupOperands []ast.Node
+ var currentOp string
+ var currentGroup []ast.Node
+ for i, op := range operators {
+ if i == 0 {
+ currentOp = op
+ currentGroup = append(currentGroup, operands[i])
+ }
+ if op == currentOp {
+ currentGroup = append(currentGroup, operands[i+1])
+ } else {
+ groupOperands = append(groupOperands, &ast.TermGroup{
+ Operands: append([]ast.Node{}, currentGroup...),
+ Relation: toRelation(currentOp),
+ })
+ currentOp = op
+ currentGroup = []ast.Node{operands[i+1]}
+ }
+ }
+ if len(currentGroup) > 0 {
+ groupOperands = append(groupOperands, &ast.TermGroup{
+ Operands: append([]ast.Node{}, currentGroup...),
+ Relation: toRelation(currentOp),
+ })
+ }
+ if len(groupOperands) == 1 {
+ return groupOperands[0], nil
+ }
+ // If mixed operators, nest them left-to-right
+ result := groupOperands[0]
+ for i := 1; i < len(groupOperands); i++ {
+ result = &ast.TermGroup{
+ Operands: []ast.Node{result, groupOperands[i]},
+ Relation: toRelation(operators[0]),
+ }
+ }
+ return result, nil
+}
+
+// parseTerm converts a Term into an AST node
+func (p *GrammarParser) parseTerm(term *Term) (ast.Node, error) {
+ if term.Simple != nil {
+ return p.parseSimpleTerm(term.Simple)
+ }
+ if term.Paren != nil {
+ return p.parseExpr(term.Paren.Expr)
+ }
+ return nil, fmt.Errorf("invalid term: neither simple nor parenthesized")
+}
+
+func toRelation(op string) ast.RelationType {
+ if op == "|" {
+ return ast.OrRelation
+ }
+ return ast.AndRelation
+}
+
+// parseSimpleTerm converts a SimpleTerm into an AST Term node
+func (p *GrammarParser) parseSimpleTerm(term *SimpleTerm) (ast.Node, error) {
+ var foundry, layer, key, value string
+
+ switch {
+ case term.WithFoundryLayer != nil:
+ foundry = term.WithFoundryLayer.Foundry
+ layer = term.WithFoundryLayer.Layer
+ key = term.WithFoundryLayer.Key
+ value = term.WithFoundryLayer.Value
+ case term.WithFoundryKey != nil:
+ foundry = term.WithFoundryKey.Foundry
+ key = term.WithFoundryKey.Key
+ case term.WithLayer != nil:
+ layer = term.WithLayer.Layer
+ key = term.WithLayer.Key
+ value = term.WithLayer.Value
+ case term.SimpleKey != nil:
+ key = term.SimpleKey.Key
+ value = term.SimpleKey.Value
+ default:
+ return nil, fmt.Errorf("invalid term: no valid form found")
+ }
+
+ if foundry == "" {
+ foundry = p.defaultFoundry
+ }
+ if layer == "" {
+ layer = p.defaultLayer
+ }
+
+ return &ast.Term{
+ Foundry: foundry,
+ Key: key,
+ Layer: layer,
+ Match: ast.MatchEqual,
+ Value: value,
+ }, nil
+}
diff --git a/parser/grammar_parser_test.go b/parser/grammar_parser_test.go
new file mode 100644
index 0000000..d9e5534
--- /dev/null
+++ b/parser/grammar_parser_test.go
@@ -0,0 +1,314 @@
+package parser
+
+import (
+ "testing"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/ast"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestGrammarParserSimpleTerm(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ defaultFoundry string
+ defaultLayer string
+ expected *SimpleTerm
+ expectError bool
+ }{
+ {
+ name: "Foundry layer key value",
+ input: "[opennlp/p=PIDAT:new]",
+ defaultFoundry: "opennlp",
+ defaultLayer: "p",
+ expected: &SimpleTerm{
+ WithFoundryLayer: &FoundryLayerTerm{
+ Foundry: "opennlp",
+ Layer: "p",
+ Key: "PIDAT",
+ Value: "new",
+ },
+ },
+ },
+ {
+ name: "Foundry layer key",
+ input: "[opennlp/p=PIDAT]",
+ defaultFoundry: "opennlp",
+ defaultLayer: "p",
+ expected: &SimpleTerm{
+ WithFoundryLayer: &FoundryLayerTerm{
+ Foundry: "opennlp",
+ Layer: "p",
+ Key: "PIDAT",
+ },
+ },
+ },
+ {
+ name: "Layer key",
+ input: "[p=PIDAT]",
+ defaultFoundry: "opennlp",
+ defaultLayer: "p",
+ expected: &SimpleTerm{
+ WithLayer: &LayerTerm{
+ Layer: "p",
+ Key: "PIDAT",
+ },
+ },
+ },
+ {
+ name: "Simple key",
+ input: "[PIDAT]",
+ defaultFoundry: "opennlp",
+ defaultLayer: "p",
+ expected: &SimpleTerm{
+ SimpleKey: &KeyTerm{
+ Key: "PIDAT",
+ },
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ parser, err := NewGrammarParser(tt.defaultFoundry, tt.defaultLayer)
+ require.NoError(t, err)
+
+ grammar, err := parser.tokenParser.ParseString("", tt.input)
+ if tt.expectError {
+ assert.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ require.NotNil(t, grammar.Token, "Expected token expression")
+ assert.Equal(t, tt.expected, grammar.Token.Expr.First.Simple)
+ })
+ }
+}
+
+func TestGrammarParser(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ defaultFoundry string
+ defaultLayer string
+ expected ast.Node
+ expectError bool
+ }{
+ {
+ name: "Simple term with foundry and layer",
+ input: "[opennlp/p=PIDAT]",
+ defaultFoundry: "opennlp",
+ defaultLayer: "p",
+ expected: &ast.Token{
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "PIDAT",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ {
+ name: "Term group with and relation",
+ input: "[opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
+ defaultFoundry: "opennlp",
+ defaultLayer: "p",
+ expected: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PIDAT",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ },
+ {
+ name: "Term group with or relation",
+ input: "[opennlp/p=PronType:Ind | opennlp/p=PronType:Neg]",
+ defaultFoundry: "opennlp",
+ defaultLayer: "p",
+ expected: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ Value: "Ind",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ Value: "Neg",
+ },
+ },
+ Relation: ast.OrRelation,
+ },
+ },
+ },
+ {
+ name: "Complex term group",
+ input: "[opennlp/p=PIDAT & (opennlp/p=PronType:Ind | opennlp/p=PronType:Neg)]",
+ defaultFoundry: "opennlp",
+ defaultLayer: "p",
+ expected: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PIDAT",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ Value: "Ind",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ Value: "Neg",
+ },
+ },
+ Relation: ast.OrRelation,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ parser, err := NewGrammarParser(tt.defaultFoundry, tt.defaultLayer)
+ require.NoError(t, err)
+
+ result, err := parser.Parse(tt.input)
+ if tt.expectError {
+ assert.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestMappingRules(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected *MappingResult
+ wantErr bool
+ }{
+ {
+ name: "Simple PIDAT mapping",
+ input: "[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
+ expected: &MappingResult{
+ Upper: &ast.Token{
+ Wrap: &ast.Term{
+ Key: "PIDAT",
+ Match: ast.MatchEqual,
+ },
+ },
+ Lower: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Relation: ast.AndRelation,
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Layer: "p",
+ Key: "PIDAT",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Layer: "p",
+ Key: "AdjType",
+ Value: "Pdt",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ name: "PAV mapping",
+ input: "[PAV] <> [ADV & PronType:Dem]",
+ expected: &MappingResult{
+ Upper: &ast.Token{
+ Wrap: &ast.Term{
+ Key: "PAV",
+ Match: ast.MatchEqual,
+ },
+ },
+ Lower: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Relation: ast.AndRelation,
+ Operands: []ast.Node{
+ &ast.Term{
+ Key: "ADV",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Key: "PronType",
+ Value: "Dem",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ name: "Invalid mapping syntax",
+ input: "[PAV] -> [ADV]",
+ wantErr: true,
+ },
+ {
+ name: "Missing closing bracket",
+ input: "[PAV <> [ADV]",
+ wantErr: true,
+ },
+ }
+
+ parser, err := NewGrammarParser("", "")
+ assert.NoError(t, err)
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := parser.ParseMapping(tt.input)
+ if tt.wantErr {
+ assert.Error(t, err)
+ return
+ }
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
diff --git a/parser/parser.go b/parser/parser.go
new file mode 100644
index 0000000..d81c3b0
--- /dev/null
+++ b/parser/parser.go
@@ -0,0 +1,346 @@
+package parser
+
+// parser is a function that takes a JSON string and returns an AST node.
+// It is used to parse a JSON string into an AST node.
+
+import (
+ "encoding/json"
+ "fmt"
+ "strings"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/ast"
+)
+
+// rawNode represents the raw JSON structure
+type rawNode struct {
+ Type string `json:"@type"`
+ Wrap json.RawMessage `json:"wrap,omitempty"`
+ Operands []rawNode `json:"operands,omitempty"`
+ Relation string `json:"relation,omitempty"`
+ Foundry string `json:"foundry,omitempty"`
+ Key string `json:"key,omitempty"`
+ Layer string `json:"layer,omitempty"`
+ Match string `json:"match,omitempty"`
+ Value string `json:"value,omitempty"`
+ // Store any additional fields
+ Extra map[string]interface{} `json:"-"`
+}
+
+// UnmarshalJSON implements the json.Unmarshaler interface
+func (r *rawNode) UnmarshalJSON(data []byte) error {
+ // First unmarshal into a map to capture all fields
+ var raw map[string]interface{}
+ if err := json.Unmarshal(data, &raw); err != nil {
+ return err
+ }
+
+ // Create a temporary struct to unmarshal known fields
+ type tempNode rawNode
+ var temp tempNode
+ if err := json.Unmarshal(data, &temp); err != nil {
+ return err
+ }
+ *r = rawNode(temp)
+
+ // Store any fields not in the struct in Extra
+ r.Extra = make(map[string]interface{})
+ for k, v := range raw {
+ switch k {
+ case "@type", "wrap", "operands", "relation", "foundry", "key", "layer", "match", "value":
+ continue
+ default:
+ r.Extra[k] = v
+ }
+ }
+
+ return nil
+}
+
+// MarshalJSON implements the json.Marshaler interface
+func (r rawNode) MarshalJSON() ([]byte, error) {
+ // Create a map with all fields
+ raw := make(map[string]interface{})
+
+ // Add the known fields if they're not empty
+ raw["@type"] = r.Type
+ if r.Wrap != nil {
+ raw["wrap"] = r.Wrap
+ }
+ if len(r.Operands) > 0 {
+ raw["operands"] = r.Operands
+ }
+ if r.Relation != "" {
+ raw["relation"] = r.Relation
+ }
+ if r.Foundry != "" {
+ raw["foundry"] = r.Foundry
+ }
+ if r.Key != "" {
+ raw["key"] = r.Key
+ }
+ if r.Layer != "" {
+ raw["layer"] = r.Layer
+ }
+ if r.Match != "" {
+ raw["match"] = r.Match
+ }
+ if r.Value != "" {
+ raw["value"] = r.Value
+ }
+
+ // Add any extra fields
+ for k, v := range r.Extra {
+ raw[k] = v
+ }
+
+ return json.Marshal(raw)
+}
+
+// ParseJSON parses a JSON string into our AST representation
+func ParseJSON(data []byte) (ast.Node, error) {
+ var raw rawNode
+ if err := json.Unmarshal(data, &raw); err != nil {
+ return nil, fmt.Errorf("failed to parse JSON: %w", err)
+ }
+ if raw.Type == "" {
+ return nil, fmt.Errorf("missing required field '@type' in JSON")
+ }
+ return parseNode(raw)
+}
+
+// parseNode converts a raw node into an AST node
+func parseNode(raw rawNode) (ast.Node, error) {
+ switch raw.Type {
+ case "koral:token":
+ if raw.Wrap == nil {
+ return nil, fmt.Errorf("token node of type '%s' missing required 'wrap' field", raw.Type)
+ }
+ var wrapRaw rawNode
+ if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
+ return nil, fmt.Errorf("failed to parse 'wrap' field in token node: %w", err)
+ }
+ wrap, err := parseNode(wrapRaw)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing wrapped node: %w", err)
+ }
+ return &ast.Token{Wrap: wrap}, nil
+
+ case "koral:termGroup":
+ if len(raw.Operands) == 0 {
+ return nil, fmt.Errorf("term group must have at least one operand")
+ }
+
+ operands := make([]ast.Node, len(raw.Operands))
+ for i, op := range raw.Operands {
+ node, err := parseNode(op)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing operand %d: %w", i+1, err)
+ }
+ operands[i] = node
+ }
+
+ if raw.Relation == "" {
+ return nil, fmt.Errorf("term group must have a 'relation' field")
+ }
+
+ relation := ast.AndRelation
+ if strings.HasSuffix(raw.Relation, "or") {
+ relation = ast.OrRelation
+ } else if !strings.HasSuffix(raw.Relation, "and") {
+ return nil, fmt.Errorf("invalid relation type '%s', must be one of: 'relation:and', 'relation:or'", raw.Relation)
+ }
+
+ return &ast.TermGroup{
+ Operands: operands,
+ Relation: relation,
+ }, nil
+
+ case "koral:term":
+ if raw.Key == "" {
+ return nil, fmt.Errorf("term must have a 'key' field")
+ }
+
+ match := ast.MatchEqual
+ if raw.Match != "" {
+ if strings.HasSuffix(raw.Match, "ne") {
+ match = ast.MatchNotEqual
+ } else if !strings.HasSuffix(raw.Match, "eq") {
+ return nil, fmt.Errorf("invalid match type '%s', must be one of: 'match:eq', 'match:ne'", raw.Match)
+ }
+ }
+
+ return &ast.Term{
+ Foundry: raw.Foundry,
+ Key: raw.Key,
+ Layer: raw.Layer,
+ Match: match,
+ Value: raw.Value,
+ }, nil
+
+ default:
+ // Store the original JSON content
+ rawContent, err := json.Marshal(raw)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal unknown node type '%s': %w", raw.Type, err)
+ }
+
+ // Create a catchall node
+ catchall := &ast.CatchallNode{
+ NodeType: raw.Type,
+ RawContent: rawContent,
+ }
+
+ // Parse wrap if present
+ if raw.Wrap != nil {
+ var wrapRaw rawNode
+ if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
+ return nil, fmt.Errorf("failed to parse 'wrap' field in unknown node type '%s': %w", raw.Type, err)
+ }
+
+ // Check if the wrapped node is a known type
+ if wrapRaw.Type == "koral:term" || wrapRaw.Type == "koral:token" || wrapRaw.Type == "koral:termGroup" {
+ wrap, err := parseNode(wrapRaw)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
+ }
+ catchall.Wrap = wrap
+ } else {
+ // For unknown types, recursively parse
+ wrap, err := parseNode(wrapRaw)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing wrapped node in unknown node type '%s': %w", raw.Type, err)
+ }
+ catchall.Wrap = wrap
+ }
+ }
+
+ // Parse operands if present
+ if len(raw.Operands) > 0 {
+ operands := make([]ast.Node, len(raw.Operands))
+ for i, op := range raw.Operands {
+ // Check if the operand is a known type
+ if op.Type == "koral:term" || op.Type == "koral:token" || op.Type == "koral:termGroup" {
+ node, err := parseNode(op)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
+ }
+ operands[i] = node
+ } else {
+ // For unknown types, recursively parse
+ node, err := parseNode(op)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing operand %d in unknown node type '%s': %w", i+1, raw.Type, err)
+ }
+ operands[i] = node
+ }
+ }
+ catchall.Operands = operands
+ }
+
+ return catchall, nil
+ }
+}
+
+// SerializeToJSON converts an AST node back to JSON
+func SerializeToJSON(node ast.Node) ([]byte, error) {
+ raw := nodeToRaw(node)
+ return json.MarshalIndent(raw, "", " ")
+}
+
+// nodeToRaw converts an AST node to a raw node for JSON serialization
+func nodeToRaw(node ast.Node) rawNode {
+ switch n := node.(type) {
+ case *ast.Token:
+ if n.Wrap == nil {
+ return rawNode{
+ Type: "koral:token",
+ }
+ }
+ return rawNode{
+ Type: "koral:token",
+ Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
+ }
+
+ case *ast.TermGroup:
+ operands := make([]rawNode, len(n.Operands))
+ for i, op := range n.Operands {
+ operands[i] = nodeToRaw(op)
+ }
+ return rawNode{
+ Type: "koral:termGroup",
+ Operands: operands,
+ Relation: "relation:" + string(n.Relation),
+ }
+
+ case *ast.Term:
+ raw := rawNode{
+ Type: "koral:term",
+ Key: n.Key,
+ Match: "match:" + string(n.Match),
+ }
+ if n.Foundry != "" {
+ raw.Foundry = n.Foundry
+ }
+ if n.Layer != "" {
+ raw.Layer = n.Layer
+ }
+ if n.Value != "" {
+ raw.Value = n.Value
+ }
+ return raw
+
+ case *ast.CatchallNode:
+ // For catchall nodes, use the stored raw content if available
+ if n.RawContent != nil {
+ var raw rawNode
+ if err := json.Unmarshal(n.RawContent, &raw); err == nil {
+ // Ensure we preserve the node type
+ raw.Type = n.NodeType
+
+ // Handle wrap and operands if present
+ if n.Wrap != nil {
+ raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
+ }
+ if len(n.Operands) > 0 {
+ operands := make([]rawNode, len(n.Operands))
+ for i, op := range n.Operands {
+ operands[i] = nodeToRaw(op)
+ }
+ raw.Operands = operands
+ }
+ return raw
+ }
+ }
+
+ // If RawContent is nil or invalid, create a minimal raw node
+ raw := rawNode{
+ Type: n.NodeType,
+ }
+ if n.Wrap != nil {
+ raw.Wrap = json.RawMessage(nodeToRaw(n.Wrap).toJSON())
+ }
+ if len(n.Operands) > 0 {
+ operands := make([]rawNode, len(n.Operands))
+ for i, op := range n.Operands {
+ operands[i] = nodeToRaw(op)
+ }
+ raw.Operands = operands
+ }
+ return raw
+ }
+
+ // Return a minimal raw node for unknown types
+ return rawNode{
+ Type: "koral:unknown",
+ }
+}
+
+// toJSON converts a raw node to JSON bytes
+func (r rawNode) toJSON() []byte {
+ data, err := json.Marshal(r)
+ if err != nil {
+ // Return a minimal valid JSON object if marshaling fails
+ return []byte(`{"@type":"koral:unknown"}`)
+ }
+ return data
+}
diff --git a/parser/parser_test.go b/parser/parser_test.go
new file mode 100644
index 0000000..93bb2e5
--- /dev/null
+++ b/parser/parser_test.go
@@ -0,0 +1,803 @@
+package parser
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/ast"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// normalizeJSON normalizes JSON by parsing and re-marshaling it
+func normalizeJSON(t *testing.T, data json.RawMessage) json.RawMessage {
+ var v interface{}
+ err := json.Unmarshal(data, &v)
+ require.NoError(t, err)
+
+ // Convert to canonical form (sorted keys, no whitespace)
+ normalized, err := json.Marshal(v)
+ require.NoError(t, err)
+ return normalized
+}
+
+// compareJSON compares two JSON strings for equality, ignoring whitespace and field order
+func compareJSON(t *testing.T, expected, actual string) bool {
+ // Parse both JSON strings
+ var expectedObj, actualObj interface{}
+ err := json.Unmarshal([]byte(expected), &expectedObj)
+ require.NoError(t, err, "Failed to parse expected JSON")
+ err = json.Unmarshal([]byte(actual), &actualObj)
+ require.NoError(t, err, "Failed to parse actual JSON")
+
+ // Convert both to canonical form
+ expectedBytes, err := json.Marshal(expectedObj)
+ require.NoError(t, err)
+ actualBytes, err := json.Marshal(actualObj)
+ require.NoError(t, err)
+
+ // Compare the canonical forms
+ return string(expectedBytes) == string(actualBytes)
+}
+
+// compareNodes compares two AST nodes, normalizing JSON content in CatchallNodes
+func compareNodes(t *testing.T, expected, actual ast.Node) bool {
+ // If both nodes are CatchallNodes, normalize their JSON content before comparison
+ if expectedCatchall, ok := expected.(*ast.CatchallNode); ok {
+ if actualCatchall, ok := actual.(*ast.CatchallNode); ok {
+ // Compare NodeType
+ if !assert.Equal(t, expectedCatchall.NodeType, actualCatchall.NodeType) {
+ t.Logf("NodeType mismatch: expected '%s', got '%s'", expectedCatchall.NodeType, actualCatchall.NodeType)
+ return false
+ }
+
+ // Normalize and compare RawContent
+ if expectedCatchall.RawContent != nil && actualCatchall.RawContent != nil {
+ expectedNorm := normalizeJSON(t, expectedCatchall.RawContent)
+ actualNorm := normalizeJSON(t, actualCatchall.RawContent)
+ if !assert.Equal(t, string(expectedNorm), string(actualNorm)) {
+ t.Logf("RawContent mismatch:\nExpected: %s\nActual: %s", expectedNorm, actualNorm)
+ return false
+ }
+ } else if !assert.Equal(t, expectedCatchall.RawContent == nil, actualCatchall.RawContent == nil) {
+ t.Log("One node has RawContent while the other doesn't")
+ return false
+ }
+
+ // Compare Operands
+ if !assert.Equal(t, len(expectedCatchall.Operands), len(actualCatchall.Operands)) {
+ t.Logf("Operands length mismatch: expected %d, got %d", len(expectedCatchall.Operands), len(actualCatchall.Operands))
+ return false
+ }
+ for i := range expectedCatchall.Operands {
+ if !compareNodes(t, expectedCatchall.Operands[i], actualCatchall.Operands[i]) {
+ t.Logf("Operand %d mismatch", i)
+ return false
+ }
+ }
+
+ // Compare Wrap
+ if expectedCatchall.Wrap != nil || actualCatchall.Wrap != nil {
+ if !assert.Equal(t, expectedCatchall.Wrap != nil, actualCatchall.Wrap != nil) {
+ t.Log("One node has Wrap while the other doesn't")
+ return false
+ }
+ if expectedCatchall.Wrap != nil {
+ if !compareNodes(t, expectedCatchall.Wrap, actualCatchall.Wrap) {
+ t.Log("Wrap node mismatch")
+ return false
+ }
+ }
+ }
+
+ return true
+ }
+ }
+
+ // For Token nodes, compare their Wrap fields using compareNodes
+ if expectedToken, ok := expected.(*ast.Token); ok {
+ if actualToken, ok := actual.(*ast.Token); ok {
+ if expectedToken.Wrap == nil || actualToken.Wrap == nil {
+ return assert.Equal(t, expectedToken.Wrap == nil, actualToken.Wrap == nil)
+ }
+ return compareNodes(t, expectedToken.Wrap, actualToken.Wrap)
+ }
+ }
+
+ // For TermGroup nodes, compare relation and operands
+ if expectedGroup, ok := expected.(*ast.TermGroup); ok {
+ if actualGroup, ok := actual.(*ast.TermGroup); ok {
+ if !assert.Equal(t, expectedGroup.Relation, actualGroup.Relation) {
+ t.Logf("Relation mismatch: expected '%s', got '%s'", expectedGroup.Relation, actualGroup.Relation)
+ return false
+ }
+ if !assert.Equal(t, len(expectedGroup.Operands), len(actualGroup.Operands)) {
+ t.Logf("Operands length mismatch: expected %d, got %d", len(expectedGroup.Operands), len(actualGroup.Operands))
+ return false
+ }
+ for i := range expectedGroup.Operands {
+ if !compareNodes(t, expectedGroup.Operands[i], actualGroup.Operands[i]) {
+ t.Logf("Operand %d mismatch", i)
+ return false
+ }
+ }
+ return true
+ }
+ }
+
+ // For Term nodes, compare all fields
+ if expectedTerm, ok := expected.(*ast.Term); ok {
+ if actualTerm, ok := actual.(*ast.Term); ok {
+ equal := assert.Equal(t, expectedTerm.Foundry, actualTerm.Foundry) &&
+ assert.Equal(t, expectedTerm.Key, actualTerm.Key) &&
+ assert.Equal(t, expectedTerm.Layer, actualTerm.Layer) &&
+ assert.Equal(t, expectedTerm.Match, actualTerm.Match) &&
+ assert.Equal(t, expectedTerm.Value, actualTerm.Value)
+ if !equal {
+ t.Logf("Term mismatch:\nExpected: %+v\nActual: %+v", expectedTerm, actualTerm)
+ }
+ return equal
+ }
+ }
+
+ // For other node types or mismatched types, use regular equality comparison
+ equal := assert.Equal(t, expected, actual)
+ if !equal {
+ t.Logf("Node type mismatch:\nExpected type: %T\nActual type: %T", expected, actual)
+ }
+ return equal
+}
+
+func TestParseJSON(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected ast.Node
+ wantErr bool
+ }{
+ {
+ name: "Parse simple term",
+ input: `{
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }`,
+ expected: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ wantErr: false,
+ },
+ {
+ name: "Parse term group with AND relation",
+ input: `{
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+ }`,
+ expected: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ wantErr: false,
+ },
+ {
+ name: "Parse token with wrapped term",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`,
+ expected: &ast.Token{
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Parse complex nested structure",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PronType",
+ "layer": "m",
+ "match": "match:ne",
+ "value": "Neg"
+ }
+ ],
+ "relation": "relation:or"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`,
+ expected: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "m",
+ Match: ast.MatchNotEqual,
+ Value: "Neg",
+ },
+ },
+ Relation: ast.OrRelation,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Invalid JSON",
+ input: `{"invalid": json`,
+ wantErr: true,
+ },
+ {
+ name: "Empty JSON",
+ input: `{}`,
+ wantErr: true,
+ },
+ {
+ name: "Unknown node type",
+ input: `{
+ "@type": "koral:unknown",
+ "key": "value"
+ }`,
+ expected: &ast.CatchallNode{
+ NodeType: "koral:unknown",
+ RawContent: json.RawMessage(`{"@type":"koral:unknown","key":"value"}`),
+ },
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := ParseJSON([]byte(tt.input))
+ if tt.wantErr {
+ assert.Error(t, err)
+ return
+ }
+
+ require.NoError(t, err)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestSerializeToJSON(t *testing.T) {
+ tests := []struct {
+ name string
+ input ast.Node
+ expected string
+ wantErr bool
+ }{
+ {
+ name: "Serialize simple term",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: `{
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+}`,
+ wantErr: false,
+ },
+ {
+ name: "Serialize term group",
+ input: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ expected: `{
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+}`,
+ wantErr: false,
+ },
+ {
+ name: "Serialize unknown node type",
+ input: &ast.CatchallNode{
+ NodeType: "koral:unknown",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:unknown",
+ "key": "value"
+}`),
+ },
+ expected: `{
+ "@type": "koral:unknown",
+ "key": "value"
+}`,
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := SerializeToJSON(tt.input)
+ if tt.wantErr {
+ assert.Error(t, err)
+ return
+ }
+
+ require.NoError(t, err)
+ // Compare JSON objects instead of raw strings to avoid whitespace issues
+ var expected, actual any
+ err = json.Unmarshal([]byte(tt.expected), &expected)
+ require.NoError(t, err)
+ err = json.Unmarshal(result, &actual)
+ require.NoError(t, err)
+ assert.Equal(t, expected, actual)
+ })
+ }
+}
+
+func TestRoundTrip(t *testing.T) {
+ // Test that parsing and then serializing produces equivalent JSON
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+
+ // Parse JSON to AST
+ node, err := ParseJSON([]byte(input))
+ require.NoError(t, err)
+
+ // Serialize AST back to JSON
+ output, err := SerializeToJSON(node)
+ require.NoError(t, err)
+
+ // Compare JSON objects
+ var expected, actual interface{}
+ err = json.Unmarshal([]byte(input), &expected)
+ require.NoError(t, err)
+ err = json.Unmarshal(output, &actual)
+ require.NoError(t, err)
+ assert.Equal(t, expected, actual)
+}
+
+func TestRoundTripUnknownType(t *testing.T) {
+ // Test that parsing and then serializing an unknown node type preserves the structure
+ input := `{
+ "@type": "koral:unknown",
+ "key": "value",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ]
+ }`
+
+ // Parse JSON to AST
+ node, err := ParseJSON([]byte(input))
+ require.NoError(t, err)
+
+ // Check that it's a CatchallNode
+ catchall, ok := node.(*ast.CatchallNode)
+ require.True(t, ok)
+ assert.Equal(t, "koral:unknown", catchall.NodeType)
+
+ // Check that wrap and operands were parsed
+ require.NotNil(t, catchall.Wrap)
+ require.Len(t, catchall.Operands, 1)
+
+ // Serialize AST back to JSON
+ output, err := SerializeToJSON(node)
+ require.NoError(t, err)
+
+ // Compare JSON objects
+ var expected, actual interface{}
+ err = json.Unmarshal([]byte(input), &expected)
+ require.NoError(t, err)
+ err = json.Unmarshal(output, &actual)
+ require.NoError(t, err)
+ assert.Equal(t, expected, actual)
+}
+
+func TestParseJSONEdgeCases(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected ast.Node
+ wantErr bool
+ }{
+ {
+ name: "Unknown node type",
+ input: `{
+ "@type": "koral:unknown",
+ "customField": "value",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }`,
+ expected: &ast.CatchallNode{
+ NodeType: "koral:unknown",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:unknown",
+ "customField": "value",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }`),
+ Wrap: &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Unknown node with operands",
+ input: `{
+ "@type": "koral:unknown",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ },
+ {
+ "@type": "koral:term",
+ "key": "NOUN"
+ }
+ ]
+ }`,
+ expected: &ast.CatchallNode{
+ NodeType: "koral:unknown",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:unknown",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ },
+ {
+ "@type": "koral:term",
+ "key": "NOUN"
+ }
+ ]
+ }`),
+ Operands: []ast.Node{
+ &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Key: "NOUN",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Deeply nested unknown nodes",
+ input: `{
+ "@type": "koral:outer",
+ "wrap": {
+ "@type": "koral:middle",
+ "wrap": {
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }
+ }
+ }`,
+ expected: &ast.CatchallNode{
+ NodeType: "koral:outer",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:outer",
+ "wrap": {
+ "@type": "koral:middle",
+ "wrap": {
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }
+ }
+ }`),
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:middle",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:middle",
+ "wrap": {
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }
+ }`),
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:inner",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:inner",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ }`),
+ Wrap: &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Mixed known and unknown nodes",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:custom",
+ "customField": "value",
+ "operands": [
+ {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ ]
+ }
+ }`,
+ expected: &ast.Token{
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:custom",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:custom",
+ "customField": "value",
+ "operands": [
+ {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ ]
+ }`),
+ Operands: []ast.Node{
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Invalid match type",
+ input: `{
+ "@type": "koral:term",
+ "key": "DET",
+ "match": "match:invalid"
+ }`,
+ wantErr: true,
+ },
+ {
+ name: "Invalid relation type",
+ input: `{
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "key": "DET"
+ }
+ ],
+ "relation": "relation:invalid"
+ }`,
+ wantErr: true,
+ },
+ {
+ name: "Empty operands in term group",
+ input: `{
+ "@type": "koral:termGroup",
+ "operands": [],
+ "relation": "relation:and"
+ }`,
+ wantErr: true,
+ },
+ {
+ name: "Null values in term",
+ input: `{
+ "@type": "koral:term",
+ "foundry": null,
+ "key": "DET",
+ "layer": null,
+ "match": null,
+ "value": null
+ }`,
+ expected: &ast.Term{
+ Key: "DET",
+ Match: ast.MatchEqual,
+ },
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := ParseJSON([]byte(tt.input))
+ if tt.wantErr {
+ assert.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ compareNodes(t, tt.expected, result)
+ })
+ }
+}