blob: 53d776548bced1b31e5fab498f3102676fe7f341 [file] [log] [blame]
package matcher
// matcher is a function that takes a pattern and a node and returns true if the node matches the pattern.
// It is used to match a pattern against a node in the AST.
import (
"encoding/json"
"testing"
"github.com/KorAP/Koral-Mapper/ast"
"github.com/stretchr/testify/assert"
)
func TestNewMatcherValidation(t *testing.T) {
tests := []struct {
name string
pattern ast.Pattern
replacement ast.Replacement
expectedError string
}{
{
name: "Valid pattern and replacement",
pattern: ast.Pattern{
Root: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
replacement: ast.Replacement{
Root: &ast.Term{
Foundry: "opennlp",
Key: "COMBINED_DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
expectedError: "",
},
{
name: "Invalid pattern - CatchallNode",
pattern: ast.Pattern{
Root: &ast.CatchallNode{
NodeType: "custom",
},
},
replacement: ast.Replacement{
Root: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
expectedError: "invalid pattern: catchall nodes are not allowed in pattern/replacement ASTs",
},
{
name: "Invalid replacement - CatchallNode",
pattern: ast.Pattern{
Root: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
replacement: ast.Replacement{
Root: &ast.CatchallNode{
NodeType: "custom",
},
},
expectedError: "invalid replacement: catchall nodes are not allowed in pattern/replacement ASTs",
},
{
name: "Invalid pattern - Empty TermGroup",
pattern: ast.Pattern{
Root: &ast.TermGroup{
Operands: []ast.Node{},
Relation: ast.AndRelation,
},
},
replacement: ast.Replacement{
Root: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
expectedError: "invalid pattern: empty term group",
},
{
name: "Invalid pattern - Nested CatchallNode",
pattern: ast.Pattern{
Root: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.CatchallNode{
NodeType: "custom",
},
},
Relation: ast.AndRelation,
},
},
replacement: ast.Replacement{
Root: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
expectedError: "invalid pattern: invalid operand: catchall nodes are not allowed in pattern/replacement ASTs",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
matcher, err := NewMatcher(tt.pattern, tt.replacement)
if tt.expectedError != "" {
assert.Error(t, err)
assert.Equal(t, tt.expectedError, err.Error())
assert.Nil(t, matcher)
} else {
assert.NoError(t, err)
assert.NotNil(t, matcher)
}
})
}
}
func TestMatchSimplePattern(t *testing.T) {
// Create a simple pattern: match a term with DET
pattern := ast.Pattern{
Root: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
}
// Create a simple replacement
replacement := ast.Replacement{
Root: &ast.Term{
Foundry: "opennlp",
Key: "COMBINED_DET",
Layer: "p",
Match: ast.MatchEqual,
},
}
m, err := NewMatcher(pattern, replacement)
assert.NoError(t, err)
assert.NotNil(t, m)
tests := []struct {
name string
input ast.Node
expected bool
}{
{
name: "Exact match",
input: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
expected: true,
},
{
name: "Different key",
input: &ast.Term{
Foundry: "opennlp",
Key: "NOUN",
Layer: "p",
Match: ast.MatchEqual,
},
expected: false,
},
{
name: "Different foundry",
input: &ast.Term{
Foundry: "different",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
expected: false,
},
{
name: "Different match type",
input: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchNotEqual,
},
expected: false,
},
{
name: "Nested node",
input: &ast.Token{
Wrap: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := m.Match(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestMatchComplexPattern(t *testing.T) {
// Create a complex pattern: DET AND (AdjType=Pdt OR PronType=Ind)
pattern := ast.Pattern{
Root: &ast.Token{
Wrap: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
&ast.Term{
Foundry: "opennlp",
Key: "PronType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Ind",
},
},
Relation: ast.OrRelation,
},
},
Relation: ast.AndRelation,
},
},
}
replacement := ast.Replacement{
Root: &ast.Token{
Wrap: &ast.Term{
Foundry: "opennlp",
Key: "COMBINED_DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
}
m, err := NewMatcher(pattern, replacement)
assert.NoError(t, err)
assert.NotNil(t, m)
tests := []struct {
name string
input ast.Node
expected bool
}{
{
name: "Match with AdjType=Pdt",
input: &ast.Token{
Wrap: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
},
Relation: ast.AndRelation,
},
},
expected: true,
},
{
name: "Match with PronType=Ind",
input: &ast.Token{
Wrap: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "PronType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Ind",
},
},
Relation: ast.AndRelation,
},
},
expected: true,
},
{
name: "No match - missing DET",
input: &ast.Token{
Wrap: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "NOUN",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
},
Relation: ast.AndRelation,
},
},
expected: false,
},
{
name: "No match - wrong value",
input: &ast.Token{
Wrap: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Wrong",
},
},
Relation: ast.AndRelation,
},
},
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := m.Match(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestReplace(t *testing.T) {
pattern := ast.Pattern{
Root: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
}
replacement := ast.Replacement{
Root: &ast.Term{
Foundry: "opennlp",
Key: "COMBINED_DET",
Layer: "p",
Match: ast.MatchEqual,
},
}
m, err := NewMatcher(pattern, replacement)
assert.NoError(t, err)
assert.NotNil(t, m)
tests := []struct {
name string
input ast.Node
expected ast.Node
}{
{
name: "Replace matching pattern",
input: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
},
Relation: ast.AndRelation,
},
expected: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "COMBINED_DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
},
Relation: ast.AndRelation,
},
},
{
name: "No replacement for non-matching pattern",
input: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "NOUN",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
},
Relation: ast.AndRelation,
},
expected: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "NOUN",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
},
Relation: ast.AndRelation,
},
},
{
name: "Replace in nested structure",
input: &ast.Token{
Wrap: &ast.TermGroup{
Operands: []ast.Node{
&ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
},
Relation: ast.AndRelation,
},
&ast.Term{
Foundry: "opennlp",
Key: "NOUN",
Layer: "p",
Match: ast.MatchEqual,
},
},
Relation: ast.AndRelation,
},
},
expected: &ast.Token{
Wrap: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "COMBINED_DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "NOUN",
Layer: "p",
Match: ast.MatchEqual,
},
},
Relation: ast.AndRelation,
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := m.Replace(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestMatchNodeOrder(t *testing.T) {
pattern := ast.Pattern{
Root: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
},
Relation: ast.AndRelation,
},
}
replacement := ast.Replacement{
Root: &ast.Term{
Foundry: "opennlp",
Key: "COMBINED_DET",
Layer: "p",
Match: ast.MatchEqual,
},
}
m, err := NewMatcher(pattern, replacement)
assert.NoError(t, err)
assert.NotNil(t, m)
// Test with operands in different orders
input1 := &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
},
Relation: ast.AndRelation,
}
input2 := &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "AdjType",
Layer: "m",
Match: ast.MatchEqual,
Value: "Pdt",
},
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
Relation: ast.AndRelation,
}
assert.True(t, m.Match(input1), "Should match with original order")
assert.True(t, m.Match(input2), "Should match with reversed order")
}
func TestMatchWithUnknownNodes(t *testing.T) {
pattern := ast.Pattern{
Root: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
}
replacement := ast.Replacement{
Root: &ast.Term{
Foundry: "opennlp",
Key: "COMBINED_DET",
Layer: "p",
Match: ast.MatchEqual,
},
}
m, err := NewMatcher(pattern, replacement)
assert.NoError(t, err)
assert.NotNil(t, m)
tests := []struct {
name string
input ast.Node
expected bool
}{
{
name: "Match term inside unknown node with wrap",
input: &ast.CatchallNode{
NodeType: "koral:custom",
RawContent: json.RawMessage(`{
"@type": "koral:custom",
"customField": "value"
}`),
Wrap: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
expected: true,
},
{
name: "Match term inside unknown node's operands",
input: &ast.CatchallNode{
NodeType: "koral:custom",
RawContent: json.RawMessage(`{
"@type": "koral:custom",
"customField": "value"
}`),
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
},
expected: true,
},
{
name: "No match in unknown node with different term",
input: &ast.CatchallNode{
NodeType: "koral:custom",
RawContent: json.RawMessage(`{
"@type": "koral:custom",
"customField": "value"
}`),
Wrap: &ast.Term{
Foundry: "opennlp",
Key: "NOUN",
Layer: "p",
Match: ast.MatchEqual,
},
},
expected: false,
},
{
name: "Match in deeply nested unknown nodes",
input: &ast.CatchallNode{
NodeType: "koral:outer",
RawContent: json.RawMessage(`{
"@type": "koral:outer",
"outerField": "value"
}`),
Wrap: &ast.CatchallNode{
NodeType: "koral:inner",
RawContent: json.RawMessage(`{
"@type": "koral:inner",
"innerField": "value"
}`),
Wrap: &ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
},
expected: true,
},
{
name: "Match in mixed known and unknown nodes",
input: &ast.Token{
Wrap: &ast.CatchallNode{
NodeType: "koral:custom",
RawContent: json.RawMessage(`{
"@type": "koral:custom",
"customField": "value"
}`),
Operands: []ast.Node{
&ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Foundry: "opennlp",
Key: "DET",
Layer: "p",
Match: ast.MatchEqual,
},
},
Relation: ast.AndRelation,
},
},
},
},
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := m.Match(tt.input)
assert.Equal(t, tt.expected, result)
if tt.expected {
// Test replacement when there's a match
replaced := m.Replace(tt.input)
// Verify the replacement happened somewhere in the structure
containsReplacement := false
var checkNode func(ast.Node)
checkNode = func(node ast.Node) {
switch n := node.(type) {
case *ast.Term:
if n.Key == "COMBINED_DET" {
containsReplacement = true
}
case *ast.Token:
if n.Wrap != nil {
checkNode(n.Wrap)
}
case *ast.TermGroup:
for _, op := range n.Operands {
checkNode(op)
}
case *ast.CatchallNode:
if n.Wrap != nil {
checkNode(n.Wrap)
}
for _, op := range n.Operands {
checkNode(op)
}
}
}
checkNode(replaced)
assert.True(t, containsReplacement, "Replacement should be found in the result")
}
})
}
}