Disallow non-supported nodes in pattern and replacement
diff --git a/pkg/matcher/matcher.go b/pkg/matcher/matcher.go
index a26404b..d0a2259 100644
--- a/pkg/matcher/matcher.go
+++ b/pkg/matcher/matcher.go
@@ -1,6 +1,8 @@
package matcher
import (
+ "fmt"
+
"github.com/KorAP/KoralPipe-TermMapper2/pkg/ast"
)
@@ -10,12 +12,49 @@
replacement ast.Replacement
}
+// validateNode checks if a node is valid for pattern/replacement ASTs
+func validateNode(node ast.Node) error {
+ if node == nil {
+ return fmt.Errorf("nil node")
+ }
+
+ switch n := node.(type) {
+ case *ast.Token:
+ if n.Wrap != nil {
+ return validateNode(n.Wrap)
+ }
+ return nil
+ case *ast.Term:
+ return nil
+ case *ast.TermGroup:
+ if len(n.Operands) == 0 {
+ return fmt.Errorf("empty term group")
+ }
+ for _, op := range n.Operands {
+ if err := validateNode(op); err != nil {
+ return fmt.Errorf("invalid operand: %v", err)
+ }
+ }
+ return nil
+ case *ast.CatchallNode:
+ return fmt.Errorf("catchall nodes are not allowed in pattern/replacement ASTs")
+ default:
+ return fmt.Errorf("unknown node type: %T", node)
+ }
+}
+
// NewMatcher creates a new Matcher with the given pattern and replacement
-func NewMatcher(pattern ast.Pattern, replacement ast.Replacement) *Matcher {
+func NewMatcher(pattern ast.Pattern, replacement ast.Replacement) (*Matcher, error) {
+ if err := validateNode(pattern.Root); err != nil {
+ return nil, fmt.Errorf("invalid pattern: %v", err)
+ }
+ if err := validateNode(replacement.Root); err != nil {
+ return nil, fmt.Errorf("invalid replacement: %v", err)
+ }
return &Matcher{
pattern: pattern,
replacement: replacement,
- }
+ }, nil
}
// Match checks if the given node matches the pattern
@@ -25,32 +64,113 @@
// Replace replaces all occurrences of the pattern in the given node with the replacement
func (m *Matcher) Replace(node ast.Node) ast.Node {
- // If this node matches the pattern, create replacement while preserving outer structure
+ // First step: Create complete structure with replacements
+ replaced := m.replaceNode(node)
+ // Second step: Simplify the structure
+ simplified := m.simplifyNode(replaced)
+ // If the input was a Token, ensure the output is also a Token
+ if _, isToken := node.(*ast.Token); isToken {
+ if _, isToken := simplified.(*ast.Token); !isToken {
+ return &ast.Token{Wrap: simplified}
+ }
+ }
+ return simplified
+}
+
+// replaceNode creates a complete structure with replacements
+func (m *Matcher) replaceNode(node ast.Node) ast.Node {
+ if node == nil {
+ return nil
+ }
+
+ // First handle Token nodes specially to preserve their structure
+ if token, ok := node.(*ast.Token); ok {
+ if token.Wrap == nil {
+ return token
+ }
+ // Process the wrapped node
+ wrap := m.replaceNode(token.Wrap)
+ return &ast.Token{Wrap: wrap}
+ }
+
+ // If this node matches the pattern
if m.Match(node) {
- switch node.(type) {
- case *ast.Token:
- // For Token nodes, preserve the Token wrapper but replace its wrap
- newToken := &ast.Token{
- Wrap: m.cloneNode(m.replacement.Root),
+ // For TermGroups that contain a matching Term, preserve unmatched operands
+ if tg, ok := node.(*ast.TermGroup); ok {
+ // Check if any operand matches the pattern exactly
+ hasExactMatch := false
+ for _, op := range tg.Operands {
+ if m.matchNode(op, m.pattern.Root) {
+ hasExactMatch = true
+ break
+ }
}
- return newToken
- default:
+
+ // If we have an exact match, replace matching operands
+ if hasExactMatch {
+ hasMatch := false
+ newOperands := make([]ast.Node, 0, len(tg.Operands))
+ for _, op := range tg.Operands {
+ if m.matchNode(op, m.pattern.Root) {
+ if !hasMatch {
+ newOperands = append(newOperands, m.cloneNode(m.replacement.Root))
+ hasMatch = true
+ } else {
+ newOperands = append(newOperands, m.replaceNode(op))
+ }
+ } else {
+ newOperands = append(newOperands, m.replaceNode(op))
+ }
+ }
+ return &ast.TermGroup{
+ Operands: newOperands,
+ Relation: tg.Relation,
+ }
+ }
+ // Otherwise, replace the entire TermGroup
return m.cloneNode(m.replacement.Root)
}
+ // For other nodes, return the replacement
+ return m.cloneNode(m.replacement.Root)
}
// Otherwise recursively process children
switch n := node.(type) {
- case *ast.Token:
- newToken := &ast.Token{
- Wrap: m.Replace(n.Wrap),
- }
- return newToken
-
case *ast.TermGroup:
+ // Check if any operand matches the pattern exactly
+ hasExactMatch := false
+ for _, op := range n.Operands {
+ if m.matchNode(op, m.pattern.Root) {
+ hasExactMatch = true
+ break
+ }
+ }
+
+ // If we have an exact match, replace matching operands
+ if hasExactMatch {
+ hasMatch := false
+ newOperands := make([]ast.Node, 0, len(n.Operands))
+ for _, op := range n.Operands {
+ if m.matchNode(op, m.pattern.Root) {
+ if !hasMatch {
+ newOperands = append(newOperands, m.cloneNode(m.replacement.Root))
+ hasMatch = true
+ } else {
+ newOperands = append(newOperands, m.replaceNode(op))
+ }
+ } else {
+ newOperands = append(newOperands, m.replaceNode(op))
+ }
+ }
+ return &ast.TermGroup{
+ Operands: newOperands,
+ Relation: n.Relation,
+ }
+ }
+ // Otherwise, recursively process operands
newOperands := make([]ast.Node, len(n.Operands))
for i, op := range n.Operands {
- newOperands[i] = m.Replace(op)
+ newOperands[i] = m.replaceNode(op)
}
return &ast.TermGroup{
Operands: newOperands,
@@ -63,12 +183,81 @@
RawContent: n.RawContent,
}
if n.Wrap != nil {
- newNode.Wrap = m.Replace(n.Wrap)
+ newNode.Wrap = m.replaceNode(n.Wrap)
}
if len(n.Operands) > 0 {
newNode.Operands = make([]ast.Node, len(n.Operands))
for i, op := range n.Operands {
- newNode.Operands[i] = m.Replace(op)
+ newNode.Operands[i] = m.replaceNode(op)
+ }
+ }
+ return newNode
+
+ default:
+ return node
+ }
+}
+
+// simplifyNode removes unnecessary wrappers and empty nodes
+func (m *Matcher) simplifyNode(node ast.Node) ast.Node {
+ if node == nil {
+ return nil
+ }
+
+ switch n := node.(type) {
+ case *ast.Token:
+ if n.Wrap == nil {
+ return nil
+ }
+ simplified := m.simplifyNode(n.Wrap)
+ if simplified == nil {
+ return nil
+ }
+ return &ast.Token{Wrap: simplified}
+
+ case *ast.TermGroup:
+ // First simplify all operands
+ simplified := make([]ast.Node, 0, len(n.Operands))
+ for _, op := range n.Operands {
+ if s := m.simplifyNode(op); s != nil {
+ simplified = append(simplified, s)
+ }
+ }
+
+ // Handle special cases
+ if len(simplified) == 0 {
+ return nil
+ }
+ if len(simplified) == 1 {
+ // If we have a single operand, return it directly
+ // But only if we're not inside a Token
+ if _, isToken := node.(*ast.Token); !isToken {
+ return simplified[0]
+ }
+ }
+
+ return &ast.TermGroup{
+ Operands: simplified,
+ Relation: n.Relation,
+ }
+
+ case *ast.CatchallNode:
+ newNode := &ast.CatchallNode{
+ NodeType: n.NodeType,
+ RawContent: n.RawContent,
+ }
+ if n.Wrap != nil {
+ newNode.Wrap = m.simplifyNode(n.Wrap)
+ }
+ if len(n.Operands) > 0 {
+ simplified := make([]ast.Node, 0, len(n.Operands))
+ for _, op := range n.Operands {
+ if s := m.simplifyNode(op); s != nil {
+ simplified = append(simplified, s)
+ }
+ }
+ if len(simplified) > 0 {
+ newNode.Operands = simplified
}
}
return newNode
diff --git a/pkg/matcher/matcher_test.go b/pkg/matcher/matcher_test.go
index adc8784..47af1f9 100644
--- a/pkg/matcher/matcher_test.go
+++ b/pkg/matcher/matcher_test.go
@@ -11,6 +11,130 @@
"github.com/stretchr/testify/assert"
)
+func TestNewMatcherValidation(t *testing.T) {
+ tests := []struct {
+ name string
+ pattern ast.Pattern
+ replacement ast.Replacement
+ expectedError string
+ }{
+ {
+ name: "Valid pattern and replacement",
+ pattern: ast.Pattern{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ replacement: ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expectedError: "",
+ },
+ {
+ name: "Invalid pattern - CatchallNode",
+ pattern: ast.Pattern{
+ Root: &ast.CatchallNode{
+ NodeType: "custom",
+ },
+ },
+ replacement: ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expectedError: "invalid pattern: catchall nodes are not allowed in pattern/replacement ASTs",
+ },
+ {
+ name: "Invalid replacement - CatchallNode",
+ pattern: ast.Pattern{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ replacement: ast.Replacement{
+ Root: &ast.CatchallNode{
+ NodeType: "custom",
+ },
+ },
+ expectedError: "invalid replacement: catchall nodes are not allowed in pattern/replacement ASTs",
+ },
+ {
+ name: "Invalid pattern - Empty TermGroup",
+ pattern: ast.Pattern{
+ Root: &ast.TermGroup{
+ Operands: []ast.Node{},
+ Relation: ast.AndRelation,
+ },
+ },
+ replacement: ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expectedError: "invalid pattern: empty term group",
+ },
+ {
+ name: "Invalid pattern - Nested CatchallNode",
+ pattern: ast.Pattern{
+ Root: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.CatchallNode{
+ NodeType: "custom",
+ },
+ },
+ Relation: ast.AndRelation,
+ },
+ },
+ replacement: ast.Replacement{
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ },
+ expectedError: "invalid pattern: invalid operand: catchall nodes are not allowed in pattern/replacement ASTs",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ matcher, err := NewMatcher(tt.pattern, tt.replacement)
+ if tt.expectedError != "" {
+ assert.Error(t, err)
+ assert.Equal(t, tt.expectedError, err.Error())
+ assert.Nil(t, matcher)
+ } else {
+ assert.NoError(t, err)
+ assert.NotNil(t, matcher)
+ }
+ })
+ }
+}
+
func TestMatchSimplePattern(t *testing.T) {
// Create a simple pattern: match a term with DET
pattern := ast.Pattern{
@@ -32,7 +156,9 @@
},
}
- m := NewMatcher(pattern, replacement)
+ m, err := NewMatcher(pattern, replacement)
+ assert.NoError(t, err)
+ assert.NotNil(t, m)
tests := []struct {
name string
@@ -149,7 +275,9 @@
},
}
- m := NewMatcher(pattern, replacement)
+ m, err := NewMatcher(pattern, replacement)
+ assert.NoError(t, err)
+ assert.NotNil(t, m)
tests := []struct {
name string
@@ -263,25 +391,12 @@
}
func TestReplace(t *testing.T) {
- // Create pattern and replacement
pattern := ast.Pattern{
- Root: &ast.TermGroup{
- Operands: []ast.Node{
- &ast.Term{
- Foundry: "opennlp",
- Key: "DET",
- Layer: "p",
- Match: ast.MatchEqual,
- },
- &ast.Term{
- Foundry: "opennlp",
- Key: "AdjType",
- Layer: "m",
- Match: ast.MatchEqual,
- Value: "Pdt",
- },
- },
- Relation: ast.AndRelation,
+ Root: &ast.Term{
+ Foundry: "opennlp",
+ Key: "DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
},
}
@@ -294,7 +409,9 @@
},
}
- m := NewMatcher(pattern, replacement)
+ m, err := NewMatcher(pattern, replacement)
+ assert.NoError(t, err)
+ assert.NotNil(t, m)
tests := []struct {
name string
@@ -321,11 +438,23 @@
},
Relation: ast.AndRelation,
},
- expected: &ast.Term{
- Foundry: "opennlp",
- Key: "COMBINED_DET",
- Layer: "p",
- Match: ast.MatchEqual,
+ expected: &ast.TermGroup{
+ Operands: []ast.Node{
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "COMBINED_DET",
+ Layer: "p",
+ Match: ast.MatchEqual,
+ },
+ &ast.Term{
+ Foundry: "opennlp",
+ Key: "AdjType",
+ Layer: "m",
+ Match: ast.MatchEqual,
+ Value: "Pdt",
+ },
+ },
+ Relation: ast.AndRelation,
},
},
{
@@ -431,7 +560,6 @@
}
func TestMatchNodeOrder(t *testing.T) {
- // Test that operands can match in any order
pattern := ast.Pattern{
Root: &ast.TermGroup{
Operands: []ast.Node{
@@ -462,7 +590,9 @@
},
}
- m := NewMatcher(pattern, replacement)
+ m, err := NewMatcher(pattern, replacement)
+ assert.NoError(t, err)
+ assert.NotNil(t, m)
// Test with operands in different orders
input1 := &ast.TermGroup{
@@ -508,7 +638,6 @@
}
func TestMatchWithUnknownNodes(t *testing.T) {
- // Create a pattern that looks for a term with DET inside any structure
pattern := ast.Pattern{
Root: &ast.Term{
Foundry: "opennlp",
@@ -527,7 +656,9 @@
},
}
- m := NewMatcher(pattern, replacement)
+ m, err := NewMatcher(pattern, replacement)
+ assert.NoError(t, err)
+ assert.NotNil(t, m)
tests := []struct {
name string