Test new approach with AST (AI assisted)
diff --git a/pkg/matcher/matcher.go b/pkg/matcher/matcher.go
new file mode 100644
index 0000000..46b4cc4
--- /dev/null
+++ b/pkg/matcher/matcher.go
@@ -0,0 +1,176 @@
+package matcher
+
+import (
+ "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
+)
+
+// Matcher handles pattern matching and replacement in the AST
+type Matcher struct {
+ pattern ast.Pattern
+ replacement ast.Replacement
+}
+
+// NewMatcher creates a new Matcher with the given pattern and replacement
+func NewMatcher(pattern ast.Pattern, replacement ast.Replacement) *Matcher {
+ return &Matcher{
+ pattern: pattern,
+ replacement: replacement,
+ }
+}
+
+// Match checks if the given node matches the pattern
+func (m *Matcher) Match(node ast.Node) bool {
+ return m.matchNode(node, m.pattern.Root)
+}
+
+// Replace replaces all occurrences of the pattern in the given node with the replacement
+func (m *Matcher) Replace(node ast.Node) ast.Node {
+ if m.Match(node) {
+ return m.cloneNode(m.replacement.Root)
+ }
+
+ switch n := node.(type) {
+ case *ast.Token:
+ n.Wrap = m.Replace(n.Wrap)
+ return n
+
+ case *ast.TermGroup:
+ newOperands := make([]ast.Node, len(n.Operands))
+ for i, op := range n.Operands {
+ newOperands[i] = m.Replace(op)
+ }
+ n.Operands = newOperands
+ return n
+
+ default:
+ return node
+ }
+}
+
+// matchNode recursively checks if two nodes match
+func (m *Matcher) matchNode(node, pattern ast.Node) bool {
+ if pattern == nil {
+ return true
+ }
+ if node == nil {
+ return false
+ }
+
+ switch p := pattern.(type) {
+ case *ast.Token:
+ if t, ok := node.(*ast.Token); ok {
+ return m.matchNode(t.Wrap, p.Wrap)
+ }
+
+ case *ast.TermGroup:
+ // If we're matching against a term, try to match it against any operand
+ if t, ok := node.(*ast.Term); ok && p.Relation == ast.OrRelation {
+ for _, op := range p.Operands {
+ if m.matchNode(t, op) {
+ return true
+ }
+ }
+ return false
+ }
+
+ // If we're matching against a term group
+ if t, ok := node.(*ast.TermGroup); ok {
+ if t.Relation != p.Relation {
+ return false
+ }
+
+ if p.Relation == ast.OrRelation {
+ // For OR relation, at least one operand must match
+ for _, pOp := range p.Operands {
+ for _, tOp := range t.Operands {
+ if m.matchNode(tOp, pOp) {
+ return true
+ }
+ }
+ }
+ return false
+ }
+
+ // For AND relation, all pattern operands must match
+ if len(t.Operands) < len(p.Operands) {
+ return false
+ }
+
+ // Try to match pattern operands against node operands in any order
+ matched := make([]bool, len(t.Operands))
+ for _, pOp := range p.Operands {
+ found := false
+ for j, tOp := range t.Operands {
+ if !matched[j] && m.matchNode(tOp, pOp) {
+ matched[j] = true
+ found = true
+ break
+ }
+ }
+ if !found {
+ return false
+ }
+ }
+ return true
+ }
+
+ case *ast.Term:
+ // If we're matching against a term group with OR relation,
+ // try to match against any of its operands
+ if t, ok := node.(*ast.TermGroup); ok && t.Relation == ast.OrRelation {
+ for _, op := range t.Operands {
+ if m.matchNode(op, p) {
+ return true
+ }
+ }
+ return false
+ }
+
+ // Direct term to term matching
+ if t, ok := node.(*ast.Term); ok {
+ return t.Foundry == p.Foundry &&
+ t.Key == p.Key &&
+ t.Layer == p.Layer &&
+ t.Match == p.Match &&
+ (p.Value == "" || t.Value == p.Value)
+ }
+ }
+
+ return false
+}
+
+// cloneNode creates a deep copy of a node
+func (m *Matcher) cloneNode(node ast.Node) ast.Node {
+ if node == nil {
+ return nil
+ }
+
+ switch n := node.(type) {
+ case *ast.Token:
+ return &ast.Token{
+ Wrap: m.cloneNode(n.Wrap),
+ }
+
+ case *ast.TermGroup:
+ operands := make([]ast.Node, len(n.Operands))
+ for i, op := range n.Operands {
+ operands[i] = m.cloneNode(op)
+ }
+ return &ast.TermGroup{
+ Operands: operands,
+ Relation: n.Relation,
+ }
+
+ case *ast.Term:
+ return &ast.Term{
+ Foundry: n.Foundry,
+ Key: n.Key,
+ Layer: n.Layer,
+ Match: n.Match,
+ Value: n.Value,
+ }
+
+ default:
+ return nil
+ }
+}
diff --git a/pkg/matcher/matcher_test.go b/pkg/matcher/matcher_test.go
new file mode 100644
index 0000000..d058a7c
--- /dev/null
+++ b/pkg/matcher/matcher_test.go
@@ -0,0 +1,504 @@
+package matcher
+
+import (
+ "testing"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestMatchSimplePattern(t *testing.T) {
+ // Create a simple pattern: match a term with DET
+ pattern := ast.Pattern{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ // Create a simple replacement
+ replacement := ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ m := NewMatcher(pattern, replacement)
+
+ tests := []struct {
+ name string
+ input ast.Node
+ expected bool
+ }{
+ {
+ name: "Exact match",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: true,
+ },
+ {
+ name: "Different key",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: false,
+ },
+ {
+ name: "Different foundry",
+ input: &ast.Term{
+ Foundry: "different",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: false,
+ },
+ {
+ name: "Different match type",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchNotEqual,
+ },
+ expected: false,
+ },
+ {
+ name: "Wrong node type",
+ input: &ast.Token{
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expected: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := m.Match(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestMatchComplexPattern(t *testing.T) {
+ // Create a complex pattern: DET AND (AdjType=Pdt OR PronType=Ind)
+ pattern := ast.Pattern{
+ Root: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Ind",
+ },
+ },
+ Relation: ast.OrRelation,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ }
+
+ replacement := ast.Replacement{
+ Root: &ast.Token{
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ }
+
+ m := NewMatcher(pattern, replacement)
+
+ tests := []struct {
+ name string
+ input ast.Node
+ expected bool
+ }{
+ {
+ name: "Match with AdjType=Pdt",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: true,
+ },
+ {
+ name: "Match with PronType=Ind",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Ind",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: true,
+ },
+ {
+ name: "No match - missing DET",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: false,
+ },
+ {
+ name: "No match - wrong value",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Wrong",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := m.Match(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestReplace(t *testing.T) {
+ // Create pattern and replacement
+ pattern := ast.Pattern{
+ Root: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ }
+
+ replacement := ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ m := NewMatcher(pattern, replacement)
+
+ tests := []struct {
+ name string
+ input ast.Node
+ expected ast.Node
+ }{
+ {
+ name: "Replace matching pattern",
+ input: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ expected: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ {
+ name: "No replacement for non-matching pattern",
+ input: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ expected: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ {
+ name: "Replace in nested structure",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := m.Replace(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestMatchNodeOrder(t *testing.T) {
+ // Test that operands can match in any order
+ pattern := ast.Pattern{
+ Root: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ }
+
+ replacement := ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ m := NewMatcher(pattern, replacement)
+
+ // Test with operands in different orders
+ input1 := &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ }
+
+ input2 := &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ }
+
+ assert.True(t, m.Match(input1), "Should match with original order")
+ assert.True(t, m.Match(input2), "Should match with reversed order")
+}