Remove pkg subfolder
diff --git a/matcher/matcher.go b/matcher/matcher.go
new file mode 100644
index 0000000..7542bdf
--- /dev/null
+++ b/matcher/matcher.go
@@ -0,0 +1,443 @@
+package matcher
+
+import (
+ "fmt"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/ast"
+)
+
+// Matcher handles pattern matching and replacement in the AST
+type Matcher struct {
+ pattern ast.Pattern
+ replacement ast.Replacement
+}
+
+// validateNode checks if a node is valid for pattern/replacement ASTs
+func validateNode(node ast.Node) error {
+ if node == nil {
+ return fmt.Errorf("nil node")
+ }
+
+ switch n := node.(type) {
+ case *ast.Token:
+ if n.Wrap != nil {
+ return validateNode(n.Wrap)
+ }
+ return nil
+ case *ast.Term:
+ return nil
+ case *ast.TermGroup:
+ if len(n.Operands) == 0 {
+ return fmt.Errorf("empty term group")
+ }
+ for _, op := range n.Operands {
+ if err := validateNode(op); err != nil {
+ return fmt.Errorf("invalid operand: %v", err)
+ }
+ }
+ return nil
+ case *ast.CatchallNode:
+ return fmt.Errorf("catchall nodes are not allowed in pattern/replacement ASTs")
+ default:
+ return fmt.Errorf("unknown node type: %T", node)
+ }
+}
+
+// NewMatcher creates a new Matcher with the given pattern and replacement
+func NewMatcher(pattern ast.Pattern, replacement ast.Replacement) (*Matcher, error) {
+ if err := validateNode(pattern.Root); err != nil {
+ return nil, fmt.Errorf("invalid pattern: %v", err)
+ }
+ if err := validateNode(replacement.Root); err != nil {
+ return nil, fmt.Errorf("invalid replacement: %v", err)
+ }
+ return &Matcher{
+ pattern: pattern,
+ replacement: replacement,
+ }, nil
+}
+
+// Match checks if the given node matches the pattern
+func (m *Matcher) Match(node ast.Node) bool {
+ return m.matchNode(node, m.pattern.Root)
+}
+
+// Replace replaces all occurrences of the pattern in the given node with the replacement
+func (m *Matcher) Replace(node ast.Node) ast.Node {
+ // First step: Create complete structure with replacements
+ replaced := m.replaceNode(node)
+ // Second step: Simplify the structure
+ simplified := m.simplifyNode(replaced)
+ // If the input was a Token, ensure the output is also a Token
+ if _, isToken := node.(*ast.Token); isToken {
+ if _, isToken := simplified.(*ast.Token); !isToken {
+ return &ast.Token{Wrap: simplified}
+ }
+ }
+ return simplified
+}
+
+// replaceNode creates a complete structure with replacements
+func (m *Matcher) replaceNode(node ast.Node) ast.Node {
+ if node == nil {
+ return nil
+ }
+
+ // First handle Token nodes specially to preserve their structure
+ if token, ok := node.(*ast.Token); ok {
+ if token.Wrap == nil {
+ return token
+ }
+ // Process the wrapped node
+ wrap := m.replaceNode(token.Wrap)
+ return &ast.Token{Wrap: wrap}
+ }
+
+ // If this node matches the pattern
+ if m.Match(node) {
+ // For TermGroups that contain a matching Term, preserve unmatched operands
+ if tg, ok := node.(*ast.TermGroup); ok {
+ // Check if any operand matches the pattern exactly
+ hasExactMatch := false
+ for _, op := range tg.Operands {
+ if m.matchNode(op, m.pattern.Root) {
+ hasExactMatch = true
+ break
+ }
+ }
+
+ // If we have an exact match, replace matching operands
+ if hasExactMatch {
+ hasMatch := false
+ newOperands := make([]ast.Node, 0, len(tg.Operands))
+ for _, op := range tg.Operands {
+ if m.matchNode(op, m.pattern.Root) {
+ if !hasMatch {
+ newOperands = append(newOperands, m.cloneNode(m.replacement.Root))
+ hasMatch = true
+ } else {
+ newOperands = append(newOperands, m.replaceNode(op))
+ }
+ } else {
+ newOperands = append(newOperands, m.replaceNode(op))
+ }
+ }
+ return &ast.TermGroup{
+ Operands: newOperands,
+ Relation: tg.Relation,
+ }
+ }
+ // Otherwise, replace the entire TermGroup
+ return m.cloneNode(m.replacement.Root)
+ }
+ // For other nodes, return the replacement
+ return m.cloneNode(m.replacement.Root)
+ }
+
+ // Otherwise recursively process children
+ switch n := node.(type) {
+ case *ast.TermGroup:
+ // Check if any operand matches the pattern exactly
+ hasExactMatch := false
+ for _, op := range n.Operands {
+ if m.matchNode(op, m.pattern.Root) {
+ hasExactMatch = true
+ break
+ }
+ }
+
+ // If we have an exact match, replace matching operands
+ if hasExactMatch {
+ hasMatch := false
+ newOperands := make([]ast.Node, 0, len(n.Operands))
+ for _, op := range n.Operands {
+ if m.matchNode(op, m.pattern.Root) {
+ if !hasMatch {
+ newOperands = append(newOperands, m.cloneNode(m.replacement.Root))
+ hasMatch = true
+ } else {
+ newOperands = append(newOperands, m.replaceNode(op))
+ }
+ } else {
+ newOperands = append(newOperands, m.replaceNode(op))
+ }
+ }
+ return &ast.TermGroup{
+ Operands: newOperands,
+ Relation: n.Relation,
+ }
+ }
+ // Otherwise, recursively process operands
+ newOperands := make([]ast.Node, len(n.Operands))
+ for i, op := range n.Operands {
+ newOperands[i] = m.replaceNode(op)
+ }
+ return &ast.TermGroup{
+ Operands: newOperands,
+ Relation: n.Relation,
+ }
+
+ case *ast.CatchallNode:
+ newNode := &ast.CatchallNode{
+ NodeType: n.NodeType,
+ RawContent: n.RawContent,
+ }
+ if n.Wrap != nil {
+ newNode.Wrap = m.replaceNode(n.Wrap)
+ }
+ if len(n.Operands) > 0 {
+ newNode.Operands = make([]ast.Node, len(n.Operands))
+ for i, op := range n.Operands {
+ newNode.Operands[i] = m.replaceNode(op)
+ }
+ }
+ return newNode
+
+ default:
+ return node
+ }
+}
+
+// simplifyNode removes unnecessary wrappers and empty nodes
+func (m *Matcher) simplifyNode(node ast.Node) ast.Node {
+ if node == nil {
+ return nil
+ }
+
+ switch n := node.(type) {
+ case *ast.Token:
+ if n.Wrap == nil {
+ return nil
+ }
+ simplified := m.simplifyNode(n.Wrap)
+ if simplified == nil {
+ return nil
+ }
+ return &ast.Token{Wrap: simplified}
+
+ case *ast.TermGroup:
+ // First simplify all operands
+ simplified := make([]ast.Node, 0, len(n.Operands))
+ for _, op := range n.Operands {
+ if s := m.simplifyNode(op); s != nil {
+ simplified = append(simplified, s)
+ }
+ }
+
+ // Handle special cases
+ if len(simplified) == 0 {
+ return nil
+ }
+ if len(simplified) == 1 {
+ // If we have a single operand, return it directly
+ // But only if we're not inside a Token
+ if _, isToken := node.(*ast.Token); !isToken {
+ return simplified[0]
+ }
+ }
+
+ return &ast.TermGroup{
+ Operands: simplified,
+ Relation: n.Relation,
+ }
+
+ case *ast.CatchallNode:
+ newNode := &ast.CatchallNode{
+ NodeType: n.NodeType,
+ RawContent: n.RawContent,
+ }
+ if n.Wrap != nil {
+ newNode.Wrap = m.simplifyNode(n.Wrap)
+ }
+ if len(n.Operands) > 0 {
+ simplified := make([]ast.Node, 0, len(n.Operands))
+ for _, op := range n.Operands {
+ if s := m.simplifyNode(op); s != nil {
+ simplified = append(simplified, s)
+ }
+ }
+ if len(simplified) > 0 {
+ newNode.Operands = simplified
+ }
+ }
+ return newNode
+
+ default:
+ return node
+ }
+}
+
+// matchNode recursively checks if two nodes match
+func (m *Matcher) matchNode(node, pattern ast.Node) bool {
+ if pattern == nil {
+ return true
+ }
+ if node == nil {
+ return false
+ }
+
+ // Handle pattern being a Token
+ if pToken, ok := pattern.(*ast.Token); ok {
+ if nToken, ok := node.(*ast.Token); ok {
+ return m.matchNode(nToken.Wrap, pToken.Wrap)
+ }
+ return false
+ }
+
+ // Handle pattern being a Term
+ if pTerm, ok := pattern.(*ast.Term); ok {
+ // Direct term to term matching
+ if t, ok := node.(*ast.Term); ok {
+ return t.Foundry == pTerm.Foundry &&
+ t.Key == pTerm.Key &&
+ t.Layer == pTerm.Layer &&
+ t.Match == pTerm.Match &&
+ (pTerm.Value == "" || t.Value == pTerm.Value)
+ }
+ // If node is a Token, check its wrap
+ if tkn, ok := node.(*ast.Token); ok {
+ if tkn.Wrap == nil {
+ return false
+ }
+ return m.matchNode(tkn.Wrap, pattern)
+ }
+ // If node is a TermGroup, check its operands
+ if tg, ok := node.(*ast.TermGroup); ok {
+ for _, op := range tg.Operands {
+ if m.matchNode(op, pattern) {
+ return true
+ }
+ }
+ return false
+ }
+ // If node is a CatchallNode, check its wrap and operands
+ if c, ok := node.(*ast.CatchallNode); ok {
+ if c.Wrap != nil && m.matchNode(c.Wrap, pattern) {
+ return true
+ }
+ for _, op := range c.Operands {
+ if m.matchNode(op, pattern) {
+ return true
+ }
+ }
+ return false
+ }
+ return false
+ }
+
+ // Handle pattern being a TermGroup
+ if pGroup, ok := pattern.(*ast.TermGroup); ok {
+ // For OR relations, check if any operand matches the node
+ if pGroup.Relation == ast.OrRelation {
+ for _, pOp := range pGroup.Operands {
+ if m.matchNode(node, pOp) {
+ return true
+ }
+ }
+ return false
+ }
+
+ // For AND relations, node must be a TermGroup with matching relation
+ if tg, ok := node.(*ast.TermGroup); ok {
+ if tg.Relation != pGroup.Relation {
+ return false
+ }
+ // Check that all pattern operands match in any order
+ if len(tg.Operands) < len(pGroup.Operands) {
+ return false
+ }
+ matched := make([]bool, len(tg.Operands))
+ for _, pOp := range pGroup.Operands {
+ found := false
+ for j, tOp := range tg.Operands {
+ if !matched[j] && m.matchNode(tOp, pOp) {
+ matched[j] = true
+ found = true
+ break
+ }
+ }
+ if !found {
+ return false
+ }
+ }
+ return true
+ }
+
+ // If node is a Token, check its wrap
+ if tkn, ok := node.(*ast.Token); ok {
+ if tkn.Wrap == nil {
+ return false
+ }
+ return m.matchNode(tkn.Wrap, pattern)
+ }
+
+ // If node is a CatchallNode, check its wrap and operands
+ if c, ok := node.(*ast.CatchallNode); ok {
+ if c.Wrap != nil && m.matchNode(c.Wrap, pattern) {
+ return true
+ }
+ for _, op := range c.Operands {
+ if m.matchNode(op, pattern) {
+ return true
+ }
+ }
+ return false
+ }
+
+ return false
+ }
+
+ return false
+}
+
+// cloneNode creates a deep copy of a node
+func (m *Matcher) cloneNode(node ast.Node) ast.Node {
+ if node == nil {
+ return nil
+ }
+
+ switch n := node.(type) {
+ case *ast.Token:
+ return &ast.Token{
+ Wrap: m.cloneNode(n.Wrap),
+ }
+
+ case *ast.TermGroup:
+ operands := make([]ast.Node, len(n.Operands))
+ for i, op := range n.Operands {
+ operands[i] = m.cloneNode(op)
+ }
+ return &ast.TermGroup{
+ Operands: operands,
+ Relation: n.Relation,
+ }
+
+ case *ast.Term:
+ return &ast.Term{
+ Foundry: n.Foundry,
+ Key: n.Key,
+ Layer: n.Layer,
+ Match: n.Match,
+ Value: n.Value,
+ }
+
+ case *ast.CatchallNode:
+ newNode := &ast.CatchallNode{
+ NodeType: n.NodeType,
+ RawContent: n.RawContent,
+ }
+ if n.Wrap != nil {
+ newNode.Wrap = m.cloneNode(n.Wrap)
+ }
+ if len(n.Operands) > 0 {
+ newNode.Operands = make([]ast.Node, len(n.Operands))
+ for i, op := range n.Operands {
+ newNode.Operands[i] = m.cloneNode(op)
+ }
+ }
+ return newNode
+
+ default:
+ return nil
+ }
+}
diff --git a/matcher/matcher_test.go b/matcher/matcher_test.go
new file mode 100644
index 0000000..3f05edc
--- /dev/null
+++ b/matcher/matcher_test.go
@@ -0,0 +1,812 @@
+package matcher
+
+// matcher is a function that takes a pattern and a node and returns true if the node matches the pattern.
+// It is used to match a pattern against a node in the AST.
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/KorAP/KoralPipe-TermMapper2/ast"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestNewMatcherValidation(t *testing.T) {
+ tests := []struct {
+ name string
+ pattern ast.Pattern
+ replacement ast.Replacement
+ expectedError string
+ }{
+ {
+ name: "Valid pattern and replacement",
+ pattern: ast.Pattern{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ replacement: ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expectedError: "",
+ },
+ {
+ name: "Invalid pattern - CatchallNode",
+ pattern: ast.Pattern{
+ Root: &ast.CatchallNode{
+ NodeType: "custom",
+ },
+ },
+ replacement: ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expectedError: "invalid pattern: catchall nodes are not allowed in pattern/replacement ASTs",
+ },
+ {
+ name: "Invalid replacement - CatchallNode",
+ pattern: ast.Pattern{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ replacement: ast.Replacement{
+ Root: &ast.CatchallNode{
+ NodeType: "custom",
+ },
+ },
+ expectedError: "invalid replacement: catchall nodes are not allowed in pattern/replacement ASTs",
+ },
+ {
+ name: "Invalid pattern - Empty TermGroup",
+ pattern: ast.Pattern{
+ Root: &ast.TermGroup{
+ Operands: []ast.Node{},
+ Relation: ast.AndRelation,
+ },
+ },
+ replacement: ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expectedError: "invalid pattern: empty term group",
+ },
+ {
+ name: "Invalid pattern - Nested CatchallNode",
+ pattern: ast.Pattern{
+ Root: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.CatchallNode{
+ NodeType: "custom",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ replacement: ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expectedError: "invalid pattern: invalid operand: catchall nodes are not allowed in pattern/replacement ASTs",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ matcher, err := NewMatcher(tt.pattern, tt.replacement)
+ if tt.expectedError != "" {
+ assert.Error(t, err)
+ assert.Equal(t, tt.expectedError, err.Error())
+ assert.Nil(t, matcher)
+ } else {
+ assert.NoError(t, err)
+ assert.NotNil(t, matcher)
+ }
+ })
+ }
+}
+
+func TestMatchSimplePattern(t *testing.T) {
+ // Create a simple pattern: match a term with DET
+ pattern := ast.Pattern{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ // Create a simple replacement
+ replacement := ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ m, err := NewMatcher(pattern, replacement)
+ assert.NoError(t, err)
+ assert.NotNil(t, m)
+
+ tests := []struct {
+ name string
+ input ast.Node
+ expected bool
+ }{
+ {
+ name: "Exact match",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: true,
+ },
+ {
+ name: "Different key",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: false,
+ },
+ {
+ name: "Different foundry",
+ input: &ast.Term{
+ Foundry: "different",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ expected: false,
+ },
+ {
+ name: "Different match type",
+ input: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchNotEqual,
+ },
+ expected: false,
+ },
+ {
+ name: "Nested node",
+ input: &ast.Token{
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expected: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := m.Match(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestMatchComplexPattern(t *testing.T) {
+ // Create a complex pattern: DET AND (AdjType=Pdt OR PronType=Ind)
+ pattern := ast.Pattern{
+ Root: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Ind",
+ },
+ },
+ Relation: ast.OrRelation,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ }
+
+ replacement := ast.Replacement{
+ Root: &ast.Token{
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ }
+
+ m, err := NewMatcher(pattern, replacement)
+ assert.NoError(t, err)
+ assert.NotNil(t, m)
+
+ tests := []struct {
+ name string
+ input ast.Node
+ expected bool
+ }{
+ {
+ name: "Match with AdjType=Pdt",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: true,
+ },
+ {
+ name: "Match with PronType=Ind",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "PronType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Ind",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: true,
+ },
+ {
+ name: "No match - missing DET",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: false,
+ },
+ {
+ name: "No match - wrong value",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Wrong",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := m.Match(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestReplace(t *testing.T) {
+ pattern := ast.Pattern{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ replacement := ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ m, err := NewMatcher(pattern, replacement)
+ assert.NoError(t, err)
+ assert.NotNil(t, m)
+
+ tests := []struct {
+ name string
+ input ast.Node
+ expected ast.Node
+ }{
+ {
+ name: "Replace matching pattern",
+ input: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ expected: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ {
+ name: "No replacement for non-matching pattern",
+ input: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ expected: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ {
+ name: "Replace in nested structure",
+ input: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ expected: &ast.Token{
+ Wrap: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := m.Replace(tt.input)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestMatchNodeOrder(t *testing.T) {
+ pattern := ast.Pattern{
+ Root: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ }
+
+ replacement := ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ m, err := NewMatcher(pattern, replacement)
+ assert.NoError(t, err)
+ assert.NotNil(t, m)
+
+ // Test with operands in different orders
+ input1 := &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
+ }
+
+ input2 := &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ }
+
+ assert.True(t, m.Match(input1), "Should match with original order")
+ assert.True(t, m.Match(input2), "Should match with reversed order")
+}
+
+func TestMatchWithUnknownNodes(t *testing.T) {
+ pattern := ast.Pattern{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ replacement := ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ }
+
+ m, err := NewMatcher(pattern, replacement)
+ assert.NoError(t, err)
+ assert.NotNil(t, m)
+
+ tests := []struct {
+ name string
+ input ast.Node
+ expected bool
+ }{
+ {
+ name: "Match term inside unknown node with wrap",
+ input: &ast.CatchallNode{
+ NodeType: "koral:custom",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:custom",
+ "customField": "value"
+ }`),
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expected: true,
+ },
+ {
+ name: "Match term inside unknown node's operands",
+ input: &ast.CatchallNode{
+ NodeType: "koral:custom",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:custom",
+ "customField": "value"
+ }`),
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ expected: true,
+ },
+ {
+ name: "No match in unknown node with different term",
+ input: &ast.CatchallNode{
+ NodeType: "koral:custom",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:custom",
+ "customField": "value"
+ }`),
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "NOUN",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expected: false,
+ },
+ {
+ name: "Match in deeply nested unknown nodes",
+ input: &ast.CatchallNode{
+ NodeType: "koral:outer",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:outer",
+ "outerField": "value"
+ }`),
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:inner",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:inner",
+ "innerField": "value"
+ }`),
+ Wrap: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ },
+ expected: true,
+ },
+ {
+ name: "Match in mixed known and unknown nodes",
+ input: &ast.Token{
+ Wrap: &ast.CatchallNode{
+ NodeType: "koral:custom",
+ RawContent: json.RawMessage(`{
+ "@type": "koral:custom",
+ "customField": "value"
+ }`),
+ Operands: []ast.Node{
+ &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ },
+ },
+ expected: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := m.Match(tt.input)
+ assert.Equal(t, tt.expected, result)
+
+ if tt.expected {
+ // Test replacement when there's a match
+ replaced := m.Replace(tt.input)
+ // Verify the replacement happened somewhere in the structure
+ containsReplacement := false
+ var checkNode func(ast.Node)
+ checkNode = func(node ast.Node) {
+ switch n := node.(type) {
+ case *ast.Term:
+ if n.Key == "COMBINED_DET" {
+ containsReplacement = true
+ }
+ case *ast.Token:
+ if n.Wrap != nil {
+ checkNode(n.Wrap)
+ }
+ case *ast.TermGroup:
+ for _, op := range n.Operands {
+ checkNode(op)
+ }
+ case *ast.CatchallNode:
+ if n.Wrap != nil {
+ checkNode(n.Wrap)
+ }
+ for _, op := range n.Operands {
+ checkNode(op)
+ }
+ }
+ }
+ checkNode(replaced)
+ assert.True(t, containsReplacement, "Replacement should be found in the result")
+ }
+ })
+ }
+}