Simplify matcher
Change-Id: I004f0bb72ca206484d65c94fe8e8b31807f8ad1d
diff --git a/matcher/matcher.go b/matcher/matcher.go
index 1630b1a..bf5aaa5 100644
--- a/matcher/matcher.go
+++ b/matcher/matcher.go
@@ -181,11 +181,7 @@
return nil
}
if len(simplified) == 1 {
- // If we have a single operand, return it directly
- // But only if we're not inside a Token
- if _, isToken := node.(*ast.Token); !isToken {
- return simplified[0]
- }
+ return simplified[0]
}
return &ast.TermGroup{
@@ -263,9 +259,7 @@
func (m *Matcher) tryMatchWrapped(node, pattern ast.Node) bool {
switch n := node.(type) {
case *ast.Token:
- if n.Wrap != nil {
- return m.matchNode(n.Wrap, pattern)
- }
+ return n.Wrap != nil && m.matchNode(n.Wrap, pattern)
case *ast.CatchallNode:
if n.Wrap != nil && m.matchNode(n.Wrap, pattern) {
return true
diff --git a/matcher/snippet_matcher.go b/matcher/snippet_matcher.go
index 35e9461..0e40c78 100644
--- a/matcher/snippet_matcher.go
+++ b/matcher/snippet_matcher.go
@@ -162,58 +162,6 @@
return sm.matcher.Match(nodeToMatch), nil
}
-// CheckTokenSequence checks if a sequence of tokens matches the pattern
-func (sm *SnippetMatcher) CheckTokenSequence(tokens []TokenSpan) (bool, error) {
- if len(tokens) == 0 {
- return false, nil
- }
-
- // For token sequences, we need to check different strategies:
- // 1. Check if any individual token matches
- // 2. Check if the combined annotations of all tokens match
-
- // Strategy 1: Check individual tokens
- for _, token := range tokens {
- matches, err := sm.CheckToken(token)
- if err != nil {
- return false, err
- }
- if matches {
- return true, nil
- }
- }
-
- // Strategy 2: Check combined annotations
- allAnnotations := make([]string, 0)
- for _, token := range tokens {
- allAnnotations = append(allAnnotations, token.Annotations...)
- }
-
- // Remove duplicates from combined annotations
- annotationMap := make(map[string]bool)
- uniqueAnnotations := make([]string, 0)
- for _, annotation := range allAnnotations {
- if !annotationMap[annotation] {
- annotationMap[annotation] = true
- uniqueAnnotations = append(uniqueAnnotations, annotation)
- }
- }
-
- if len(uniqueAnnotations) == 0 {
- return false, nil
- }
-
- // Create a combined token for checking
- combinedToken := TokenSpan{
- Text: strings.Join(getTokenTexts(tokens), " "),
- StartPos: tokens[0].StartPos,
- EndPos: tokens[len(tokens)-1].EndPos,
- Annotations: uniqueAnnotations,
- }
-
- return sm.CheckToken(combinedToken)
-}
-
// FindMatchingTokens finds all tokens in the snippet that match the pattern
func (sm *SnippetMatcher) FindMatchingTokens(snippet string) ([]TokenSpan, error) {
tokens, err := sm.ParseSnippet(snippet)
@@ -222,61 +170,13 @@
}
matchingTokens := make([]TokenSpan, 0)
-
for _, token := range tokens {
- matches, err := sm.CheckToken(token)
- if err != nil {
+ if matches, err := sm.CheckToken(token); err != nil {
return nil, fmt.Errorf("failed to check token '%s': %w", token.Text, err)
- }
- if matches {
+ } else if matches {
matchingTokens = append(matchingTokens, token)
}
}
return matchingTokens, nil
}
-
-// FindMatchingTokenSequences finds all token sequences that match the pattern
-func (sm *SnippetMatcher) FindMatchingTokenSequences(snippet string, maxSequenceLength int) ([][]TokenSpan, error) {
- tokens, err := sm.ParseSnippet(snippet)
- if err != nil {
- return nil, err
- }
-
- if maxSequenceLength <= 0 {
- maxSequenceLength = len(tokens)
- }
-
- matchingSequences := make([][]TokenSpan, 0)
-
- // Check all possible token sequences up to maxSequenceLength
- for start := 0; start < len(tokens); start++ {
- for length := 1; length <= maxSequenceLength && start+length <= len(tokens); length++ {
- sequence := tokens[start : start+length]
-
- matches, err := sm.CheckTokenSequence(sequence)
- if err != nil {
- return nil, fmt.Errorf("failed to check token sequence: %w", err)
- }
- if matches {
- matchingSequences = append(matchingSequences, sequence)
- }
- }
- }
-
- return matchingSequences, nil
-}
-
-// GetReplacement returns the replacement node from the matcher
-func (sm *SnippetMatcher) GetReplacement() ast.Node {
- return sm.matcher.replacement.Root
-}
-
-// Helper function to extract token texts
-func getTokenTexts(tokens []TokenSpan) []string {
- texts := make([]string, len(tokens))
- for i, token := range tokens {
- texts[i] = token.Text
- }
- return texts
-}
diff --git a/matcher/snippet_matcher_test.go b/matcher/snippet_matcher_test.go
index f740d65..5c9b852 100644
--- a/matcher/snippet_matcher_test.go
+++ b/matcher/snippet_matcher_test.go
@@ -280,124 +280,6 @@
}
}
-func TestSnippetMatcher_CheckTokenSequence(t *testing.T) {
- // Create a pattern for testing
- pattern := ast.Pattern{
- Root: &ast.Term{
- Foundry: "marmot",
- Layer: "m",
- Key: "gender",
- Value: "masc",
- Match: ast.MatchEqual,
- },
- }
-
- replacement := ast.Replacement{
- Root: &ast.Term{
- Foundry: "opennlp",
- Layer: "m",
- Key: "M",
- Value: "",
- Match: ast.MatchEqual,
- },
- }
-
- sm, err := NewSnippetMatcher(pattern, replacement)
- require.NoError(t, err)
-
- tests := []struct {
- name string
- tokens []TokenSpan
- shouldMatch bool
- }{
- {
- name: "Sequence with matching token",
- tokens: []TokenSpan{
- {
- Text: "Der",
- Annotations: []string{
- "marmot/m:gender:masc",
- "marmot/m:case:nom",
- },
- },
- {
- Text: "alte",
- Annotations: []string{
- "marmot/m:gender:fem",
- "marmot/m:case:nom",
- },
- },
- },
- shouldMatch: true, // First token matches
- },
- {
- name: "Sequence with no matching tokens",
- tokens: []TokenSpan{
- {
- Text: "und",
- Annotations: []string{
- "marmot/p:KON",
- },
- },
- {
- Text: "oder",
- Annotations: []string{
- "marmot/p:KON",
- },
- },
- },
- shouldMatch: false,
- },
- {
- name: "Empty sequence",
- tokens: []TokenSpan{},
- shouldMatch: false,
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- matches, err := sm.CheckTokenSequence(tt.tokens)
- require.NoError(t, err)
- assert.Equal(t, tt.shouldMatch, matches)
- })
- }
-}
-
-func TestSnippetMatcher_GetReplacement(t *testing.T) {
- pattern := ast.Pattern{
- Root: &ast.Term{
- Foundry: "marmot",
- Layer: "m",
- Key: "gender",
- Value: "masc",
- Match: ast.MatchEqual,
- },
- }
-
- replacement := ast.Replacement{
- Root: &ast.Term{
- Foundry: "opennlp",
- Layer: "m",
- Key: "M",
- Value: "",
- Match: ast.MatchEqual,
- },
- }
-
- sm, err := NewSnippetMatcher(pattern, replacement)
- require.NoError(t, err)
-
- replacementNode := sm.GetReplacement()
- require.NotNil(t, replacementNode)
-
- term, ok := replacementNode.(*ast.Term)
- require.True(t, ok)
- assert.Equal(t, "opennlp", term.Foundry)
- assert.Equal(t, "m", term.Layer)
- assert.Equal(t, "M", term.Key)
-}
-
func TestSnippetMatcher_RealWorldExample(t *testing.T) {
// Test with the real-world example from the response test
pattern := ast.Pattern{