Test new approach with AST (AI assisted)
diff --git a/pkg/ast/ast.go b/pkg/ast/ast.go
new file mode 100644
index 0000000..aac7016
--- /dev/null
+++ b/pkg/ast/ast.go
@@ -0,0 +1,73 @@
+package ast
+
+// NodeType represents the type of a node in the AST
+type NodeType string
+
+const (
+ TokenNode NodeType = "token"
+ TermGroupNode NodeType = "termGroup"
+ TermNode NodeType = "term"
+)
+
+// RelationType represents the type of relation between nodes
+type RelationType string
+
+const (
+ AndRelation RelationType = "and"
+ OrRelation RelationType = "or"
+)
+
+// MatchType represents the type of match operation
+type MatchType string
+
+const (
+ MatchEqual MatchType = "eq"
+ MatchNotEqual MatchType = "ne"
+)
+
+// Node represents a node in the AST
+type Node interface {
+ Type() NodeType
+}
+
+// Token represents a token node in the query
+type Token struct {
+ Wrap Node `json:"wrap"`
+}
+
+func (t *Token) Type() NodeType {
+ return TokenNode
+}
+
+// TermGroup represents a group of terms with a relation
+type TermGroup struct {
+ Operands []Node `json:"operands"`
+ Relation RelationType `json:"relation"`
+}
+
+func (tg *TermGroup) Type() NodeType {
+ return TermGroupNode
+}
+
+// Term represents a terminal node with matching criteria
+type Term struct {
+ Foundry string `json:"foundry"`
+ Key string `json:"key"`
+ Layer string `json:"layer"`
+ Match MatchType `json:"match"`
+ Value string `json:"value,omitempty"`
+}
+
+func (t *Term) Type() NodeType {
+ return TermNode
+}
+
+// Pattern represents a pattern to match in the AST
+type Pattern struct {
+ Root Node
+}
+
+// Replacement represents a replacement pattern
+type Replacement struct {
+ Root Node
+}
diff --git a/pkg/ast/ast_test.go b/pkg/ast/ast_test.go
new file mode 100644
index 0000000..1a01d0b
--- /dev/null
+++ b/pkg/ast/ast_test.go
@@ -0,0 +1,190 @@
+package ast
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestNodeTypes(t *testing.T) {
+ tests := []struct {
+ name string
+ node Node
+ expected NodeType
+ }{
+ {
+ name: "Token node returns correct type",
+ node: &Token{Wrap: &Term{}},
+ expected: TokenNode,
+ },
+ {
+ name: "TermGroup node returns correct type",
+ node: &TermGroup{
+ Operands: []Node{&Term{}},
+ Relation: AndRelation,
+ },
+ expected: TermGroupNode,
+ },
+ {
+ name: "Term node returns correct type",
+ node: &Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: MatchEqual,
+ },
+ expected: TermNode,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.Equal(t, tt.expected, tt.node.Type())
+ })
+ }
+}
+
+func TestTermGroupConstruction(t *testing.T) {
+ term1 := &Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: MatchEqual,
+ }
+
+ term2 := &Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: MatchEqual,
+ Value: "Pdt",
+ }
+
+ group := &TermGroup{
+ Operands: []Node{term1, term2},
+ Relation: AndRelation,
+ }
+
+ assert.Len(t, group.Operands, 2)
+ assert.Equal(t, AndRelation, group.Relation)
+ assert.Equal(t, TermGroupNode, group.Type())
+
+ // Test operands are correctly set
+ assert.Equal(t, term1, group.Operands[0])
+ assert.Equal(t, term2, group.Operands[1])
+}
+
+func TestTokenConstruction(t *testing.T) {
+ term := &Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: MatchEqual,
+ }
+
+ token := &Token{Wrap: term}
+
+ assert.Equal(t, TokenNode, token.Type())
+ assert.Equal(t, term, token.Wrap)
+}
+
+func TestTermConstruction(t *testing.T) {
+ tests := []struct {
+ name string
+ term *Term
+ foundry string
+ key string
+ layer string
+ match MatchType
+ hasValue bool
+ value string
+ }{
+ {
+ name: "Term without value",
+ term: &Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: MatchEqual,
+ },
+ foundry: "opennlp",
+ key: "DET",
+ layer: "p",
+ match: MatchEqual,
+ hasValue: false,
+ },
+ {
+ name: "Term with value",
+ term: &Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: MatchEqual,
+ Value: "Pdt",
+ },
+ foundry: "opennlp",
+ key: "AdjType",
+ layer: "m",
+ match: MatchEqual,
+ hasValue: true,
+ value: "Pdt",
+ },
+ {
+ name: "Term with not equal match",
+ term: &Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: MatchNotEqual,
+ },
+ foundry: "opennlp",
+ key: "DET",
+ layer: "p",
+ match: MatchNotEqual,
+ hasValue: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ assert.Equal(t, TermNode, tt.term.Type())
+ assert.Equal(t, tt.foundry, tt.term.Foundry)
+ assert.Equal(t, tt.key, tt.term.Key)
+ assert.Equal(t, tt.layer, tt.term.Layer)
+ assert.Equal(t, tt.match, tt.term.Match)
+ if tt.hasValue {
+ assert.Equal(t, tt.value, tt.term.Value)
+ } else {
+ assert.Empty(t, tt.term.Value)
+ }
+ })
+ }
+}
+
+func TestPatternAndReplacement(t *testing.T) {
+ // Create a simple pattern
+ patternTerm := &Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: MatchEqual,
+ }
+ pattern := Pattern{Root: patternTerm}
+
+ // Create a simple replacement
+ replacementTerm := &Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: MatchEqual,
+ }
+ replacement := Replacement{Root: replacementTerm}
+
+ // Test pattern
+ assert.NotNil(t, pattern.Root)
+ assert.Equal(t, patternTerm, pattern.Root)
+
+ // Test replacement
+ assert.NotNil(t, replacement.Root)
+ assert.Equal(t, replacementTerm, replacement.Root)
+}
diff --git a/pkg/matcher/matcher.go b/pkg/matcher/matcher.go
new file mode 100644
index 0000000..46b4cc4
--- /dev/null
+++ b/pkg/matcher/matcher.go
@@ -0,0 +1,176 @@
+package matcher
+
+import (
+ "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
+)
+
+// Matcher handles pattern matching and replacement in the AST
+type Matcher struct {
+ pattern ast.Pattern
+ replacement ast.Replacement
+}
+
+// NewMatcher creates a new Matcher with the given pattern and replacement
+func NewMatcher(pattern ast.Pattern, replacement ast.Replacement) *Matcher {
+ return &Matcher{
+ pattern: pattern,
+ replacement: replacement,
+ }
+}
+
+// Match checks if the given node matches the pattern
+func (m *Matcher) Match(node ast.Node) bool {
+ return m.matchNode(node, m.pattern.Root)
+}
+
+// Replace replaces all occurrences of the pattern in the given node with the replacement
+func (m *Matcher) Replace(node ast.Node) ast.Node {
+ if m.Match(node) {
+ return m.cloneNode(m.replacement.Root)
+ }
+
+ switch n := node.(type) {
+ case *ast.Token:
+ n.Wrap = m.Replace(n.Wrap)
+ return n
+
+ case *ast.TermGroup:
+ newOperands := make([]ast.Node, len(n.Operands))
+ for i, op := range n.Operands {
+ newOperands[i] = m.Replace(op)
+ }
+ n.Operands = newOperands
+ return n
+
+ default:
+ return node
+ }
+}
+
+// matchNode recursively checks if two nodes match
+func (m *Matcher) matchNode(node, pattern ast.Node) bool {
+ if pattern == nil {
+ return true
+ }
+ if node == nil {
+ return false
+ }
+
+ switch p := pattern.(type) {
+ case *ast.Token:
+ if t, ok := node.(*ast.Token); ok {
+ return m.matchNode(t.Wrap, p.Wrap)
+ }
+
+ case *ast.TermGroup:
+ // If we're matching against a term, try to match it against any operand
+ if t, ok := node.(*ast.Term); ok && p.Relation == ast.OrRelation {
+ for _, op := range p.Operands {
+ if m.matchNode(t, op) {
+ return true
+ }
+ }
+ return false
+ }
+
+ // If we're matching against a term group
+ if t, ok := node.(*ast.TermGroup); ok {
+ if t.Relation != p.Relation {
+ return false
+ }
+
+ if p.Relation == ast.OrRelation {
+ // For OR relation, at least one operand must match
+ for _, pOp := range p.Operands {
+ for _, tOp := range t.Operands {
+ if m.matchNode(tOp, pOp) {
+ return true
+ }
+ }
+ }
+ return false
+ }
+
+ // For AND relation, all pattern operands must match
+ if len(t.Operands) < len(p.Operands) {
+ return false
+ }
+
+ // Try to match pattern operands against node operands in any order
+ matched := make([]bool, len(t.Operands))
+ for _, pOp := range p.Operands {
+ found := false
+ for j, tOp := range t.Operands {
+ if !matched[j] && m.matchNode(tOp, pOp) {
+ matched[j] = true
+ found = true
+ break
+ }
+ }
+ if !found {
+ return false
+ }
+ }
+ return true
+ }
+
+ case *ast.Term:
+ // If we're matching against a term group with OR relation,
+ // try to match against any of its operands
+ if t, ok := node.(*ast.TermGroup); ok && t.Relation == ast.OrRelation {
+ for _, op := range t.Operands {
+ if m.matchNode(op, p) {
+ return true
+ }
+ }
+ return false
+ }
+
+ // Direct term to term matching
+ if t, ok := node.(*ast.Term); ok {
+ return t.Foundry == p.Foundry &&
+ t.Key == p.Key &&
+ t.Layer == p.Layer &&
+ t.Match == p.Match &&
+ (p.Value == "" || t.Value == p.Value)
+ }
+ }
+
+ return false
+}
+
+// cloneNode creates a deep copy of a node
+func (m *Matcher) cloneNode(node ast.Node) ast.Node {
+ if node == nil {
+ return nil
+ }
+
+ switch n := node.(type) {
+ case *ast.Token:
+ return &ast.Token{
+ Wrap: m.cloneNode(n.Wrap),
+ }
+
+ case *ast.TermGroup:
+ operands := make([]ast.Node, len(n.Operands))
+ for i, op := range n.Operands {
+ operands[i] = m.cloneNode(op)
+ }
+ return &ast.TermGroup{
+ Operands: operands,
+ Relation: n.Relation,
+ }
+
+ case *ast.Term:
+ return &ast.Term{
+ Foundry: n.Foundry,
+ Key: n.Key,
+ Layer: n.Layer,
+ Match: n.Match,
+ Value: n.Value,
+ }
+
+ default:
+ return nil
+ }
+}
diff --git a/pkg/matcher/matcher_test.go b/pkg/matcher/matcher_test.go
new file mode 100644
index 0000000..d058a7c
--- /dev/null
+++ b/pkg/matcher/matcher_test.go
@@ -0,0 +1,504 @@
+package matcher
+
+import (
+ "testing"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestMatchSimplePattern(t *testing.T) {
+ // Create a simple pattern: match a term with DET
+ pattern := ast.Pattern{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ // Create a simple replacement
+ replacement := ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ m := NewMatcher(pattern, replacement)
+
+ tests := []struct {
+ name string
+ input ast.Node
+ expected bool
+ }{
+ {
+ name: "Exact match",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: true,
+ },
+ {
+ name: "Different key",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: false,
+ },
+ {
+ name: "Different foundry",
+ input: &ast.Term{
+ Foundry: "different",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: false,
+ },
+ {
+ name: "Different match type",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchNotEqual,
+ },
+ expected: false,
+ },
+ {
+ name: "Wrong node type",
+ input: &ast.Token{
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expected: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := m.Match(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestMatchComplexPattern(t *testing.T) {
+ // Create a complex pattern: DET AND (AdjType=Pdt OR PronType=Ind)
+ pattern := ast.Pattern{
+ Root: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Ind",
+ },
+ },
+ Relation: ast.OrRelation,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ }
+
+ replacement := ast.Replacement{
+ Root: &ast.Token{
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ }
+
+ m := NewMatcher(pattern, replacement)
+
+ tests := []struct {
+ name string
+ input ast.Node
+ expected bool
+ }{
+ {
+ name: "Match with AdjType=Pdt",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: true,
+ },
+ {
+ name: "Match with PronType=Ind",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Ind",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: true,
+ },
+ {
+ name: "No match - missing DET",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: false,
+ },
+ {
+ name: "No match - wrong value",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Wrong",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := m.Match(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestReplace(t *testing.T) {
+ // Create pattern and replacement
+ pattern := ast.Pattern{
+ Root: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ }
+
+ replacement := ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ m := NewMatcher(pattern, replacement)
+
+ tests := []struct {
+ name string
+ input ast.Node
+ expected ast.Node
+ }{
+ {
+ name: "Replace matching pattern",
+ input: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ expected: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ {
+ name: "No replacement for non-matching pattern",
+ input: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ expected: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ {
+ name: "Replace in nested structure",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := m.Replace(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestMatchNodeOrder(t *testing.T) {
+ // Test that operands can match in any order
+ pattern := ast.Pattern{
+ Root: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ }
+
+ replacement := ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ m := NewMatcher(pattern, replacement)
+
+ // Test with operands in different orders
+ input1 := &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ }
+
+ input2 := &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ }
+
+ assert.True(t, m.Match(input1), "Should match with original order")
+ assert.True(t, m.Match(input2), "Should match with reversed order")
+}
diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go
new file mode 100644
index 0000000..be11da9
--- /dev/null
+++ b/pkg/parser/parser.go
@@ -0,0 +1,134 @@
+package parser
+
+import (
+ "encoding/json"
+ "fmt"
+ "strings"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
+)
+
+// rawNode represents the raw JSON structure
+type rawNode struct {
+ Type string `json:"@type"`
+ Wrap json.RawMessage `json:"wrap,omitempty"`
+ Operands []rawNode `json:"operands,omitempty"`
+ Relation string `json:"relation,omitempty"`
+ Foundry string `json:"foundry,omitempty"`
+ Key string `json:"key,omitempty"`
+ Layer string `json:"layer,omitempty"`
+ Match string `json:"match,omitempty"`
+ Value string `json:"value,omitempty"`
+}
+
+// ParseJSON parses a JSON string into our AST representation
+func ParseJSON(data []byte) (ast.Node, error) {
+ var raw rawNode
+ if err := json.Unmarshal(data, &raw); err != nil {
+ return nil, fmt.Errorf("failed to parse JSON: %w", err)
+ }
+ return parseNode(raw)
+}
+
+// parseNode converts a raw node into an AST node
+func parseNode(raw rawNode) (ast.Node, error) {
+ switch raw.Type {
+ case "koral:token":
+ if raw.Wrap == nil {
+ return nil, fmt.Errorf("token node missing wrap field")
+ }
+ var wrapRaw rawNode
+ if err := json.Unmarshal(raw.Wrap, &wrapRaw); err != nil {
+ return nil, fmt.Errorf("failed to parse wrap: %w", err)
+ }
+ wrap, err := parseNode(wrapRaw)
+ if err != nil {
+ return nil, err
+ }
+ return &ast.Token{Wrap: wrap}, nil
+
+ case "koral:termGroup":
+ operands := make([]ast.Node, len(raw.Operands))
+ for i, op := range raw.Operands {
+ node, err := parseNode(op)
+ if err != nil {
+ return nil, err
+ }
+ operands[i] = node
+ }
+
+ relation := ast.AndRelation
+ if strings.HasSuffix(raw.Relation, "or") {
+ relation = ast.OrRelation
+ }
+
+ return &ast.TermGroup{
+ Operands: operands,
+ Relation: relation,
+ }, nil
+
+ case "koral:term":
+ match := ast.MatchEqual
+ if strings.HasSuffix(raw.Match, "ne") {
+ match = ast.MatchNotEqual
+ }
+
+ return &ast.Term{
+ Foundry: raw.Foundry,
+ Key: raw.Key,
+ Layer: raw.Layer,
+ Match: match,
+ Value: raw.Value,
+ }, nil
+
+ default:
+ return nil, fmt.Errorf("unknown node type: %s", raw.Type)
+ }
+}
+
+// SerializeToJSON converts an AST node back to JSON
+func SerializeToJSON(node ast.Node) ([]byte, error) {
+ raw := nodeToRaw(node)
+ return json.MarshalIndent(raw, "", " ")
+}
+
+// nodeToRaw converts an AST node to a raw node for JSON serialization
+func nodeToRaw(node ast.Node) rawNode {
+ switch n := node.(type) {
+ case *ast.Token:
+ return rawNode{
+ Type: "koral:token",
+ Wrap: json.RawMessage(nodeToRaw(n.Wrap).toJSON()),
+ }
+
+ case *ast.TermGroup:
+ operands := make([]rawNode, len(n.Operands))
+ for i, op := range n.Operands {
+ operands[i] = nodeToRaw(op)
+ }
+ return rawNode{
+ Type: "koral:termGroup",
+ Operands: operands,
+ Relation: "relation:" + string(n.Relation),
+ }
+
+ case *ast.Term:
+ return rawNode{
+ Type: "koral:term",
+ Foundry: n.Foundry,
+ Key: n.Key,
+ Layer: n.Layer,
+ Match: "match:" + string(n.Match),
+ Value: n.Value,
+ }
+
+ default:
+ return rawNode{}
+ }
+}
+
+// toJSON converts a raw node to JSON bytes
+func (r rawNode) toJSON() []byte {
+ data, _ := json.Marshal(r)
+ return data
+}
diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go
new file mode 100644
index 0000000..464c497
--- /dev/null
+++ b/pkg/parser/parser_test.go
@@ -0,0 +1,340 @@
+package parser
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestParseJSON(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected ast.Node
+ wantErr bool
+ }{
+ {
+ name: "Parse simple term",
+ input: `{
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }`,
+ expected: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ wantErr: false,
+ },
+ {
+ name: "Parse term group with AND relation",
+ input: `{
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+ }`,
+ expected: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ wantErr: false,
+ },
+ {
+ name: "Parse token with wrapped term",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`,
+ expected: &ast.Token{
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Parse complex nested structure",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PronType",
+ "layer": "m",
+ "match": "match:ne",
+ "value": "Neg"
+ }
+ ],
+ "relation": "relation:or"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`,
+ expected: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "m",
+ Match: ast.MatchNotEqual,
+ Value: "Neg",
+ },
+ },
+ Relation: ast.OrRelation,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ wantErr: false,
+ },
+ {
+ name: "Invalid JSON",
+ input: `{"invalid": json`,
+ wantErr: true,
+ },
+ {
+ name: "Empty JSON",
+ input: `{}`,
+ wantErr: true,
+ },
+ {
+ name: "Invalid node type",
+ input: `{
+ "@type": "koral:unknown",
+ "key": "value"
+ }`,
+ wantErr: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := ParseJSON([]byte(tt.input))
+ if tt.wantErr {
+ assert.Error(t, err)
+ return
+ }
+
+ require.NoError(t, err)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestSerializeToJSON(t *testing.T) {
+ tests := []struct {
+ name string
+ input ast.Node
+ expected string
+ wantErr bool
+ }{
+ {
+ name: "Serialize simple term",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: `{
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+}`,
+ wantErr: false,
+ },
+ {
+ name: "Serialize term group",
+ input: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ expected: `{
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+}`,
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := SerializeToJSON(tt.input)
+ if tt.wantErr {
+ assert.Error(t, err)
+ return
+ }
+
+ require.NoError(t, err)
+ // Compare JSON objects instead of raw strings to avoid whitespace issues
+ var expected, actual interface{}
+ err = json.Unmarshal([]byte(tt.expected), &expected)
+ require.NoError(t, err)
+ err = json.Unmarshal(result, &actual)
+ require.NoError(t, err)
+ assert.Equal(t, expected, actual)
+ })
+ }
+}
+
+func TestRoundTrip(t *testing.T) {
+ // Test that parsing and then serializing produces equivalent JSON
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "m",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+
+ // Parse JSON to AST
+ node, err := ParseJSON([]byte(input))
+ require.NoError(t, err)
+
+ // Serialize AST back to JSON
+ output, err := SerializeToJSON(node)
+ require.NoError(t, err)
+
+ // Compare JSON objects
+ var expected, actual interface{}
+ err = json.Unmarshal([]byte(input), &expected)
+ require.NoError(t, err)
+ err = json.Unmarshal(output, &actual)
+ require.NoError(t, err)
+ assert.Equal(t, expected, actual)
+}