Update specificity rule matching for annotations
Change-Id: Ifa7ec5eef3583cb196f4aa1ca0cfcd65790de226
diff --git a/mapper/corpus_test.go b/mapper/corpus_test.go
index cc2381b..0b88988 100644
--- a/mapper/corpus_test.go
+++ b/mapper/corpus_test.go
@@ -1106,11 +1106,11 @@
// OR matches OR groups. The forward rule's OR B-side does NOT match
// individual AND groups in BtoA, so reverse rules handle those.
m := newCorpusMapper(t,
- // Forward: Entertainment → OR-of-ANDs (complex B-side, for AtoB)
+ // Forward: Entertainment -> OR-of-ANDs (complex B-side, for AtoB)
"genre=Entertainment <> ((textClass=kultur & textClass=musik) | (textClass=kultur & textClass=film))",
- // Reverse aggregated: (Entertainment | Culture) → AND (for BtoA with (k&f))
+ // Reverse aggregated: (Entertainment | Culture) -> AND (for BtoA with (k&f))
"(genre=Entertainment | genre=Culture) <> (textClass=kultur & textClass=film)",
- // Reverse individual: Entertainment → AND (for BtoA with (k&m))
+ // Reverse individual: Entertainment -> AND (for BtoA with (k&m))
"genre=Entertainment <> (textClass=kultur & textClass=musik)",
)
diff --git a/mapper/mapper_test.go b/mapper/mapper_test.go
index 13cb7a8..8d3e691 100644
--- a/mapper/mapper_test.go
+++ b/mapper/mapper_test.go
@@ -2,6 +2,7 @@
import (
"encoding/json"
+ "os"
"testing"
"github.com/KorAP/Koral-Mapper/ast"
@@ -9,6 +10,7 @@
"github.com/KorAP/Koral-Mapper/matcher"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
+ "gopkg.in/yaml.v3"
)
func TestMapper(t *testing.T) {
@@ -1309,3 +1311,664 @@
assert.Contains(t, err.Error(), "identical source and target field")
})
}
+
+func newSTTSUPoSMapper(t *testing.T) *Mapper {
+ t.Helper()
+ data, err := os.ReadFile("../mappings/stts-upos.yaml")
+ require.NoError(t, err, "failed to read stts-upos.yaml from disk")
+
+ var mappingList config.MappingList
+ err = yaml.Unmarshal(data, &mappingList)
+ require.NoError(t, err, "failed to parse stts-upos.yaml")
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+ return m
+}
+
+func TestFallbackRules(t *testing.T) {
+ m := newSTTSUPoSMapper(t)
+
+ t.Run("Bare ADJ (BtoA) maps to ADJA|ADJD disjunction", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ operands := wrap["operands"].([]any)
+ assert.Len(t, operands, 2)
+ keys := []string{
+ operands[0].(map[string]any)["key"].(string),
+ operands[1].(map[string]any)["key"].(string),
+ }
+ assert.Contains(t, keys, "ADJA")
+ assert.Contains(t, keys, "ADJD")
+ })
+
+ t.Run("ADJ & Variant=Short (BtoA) maps to ADJD only", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "Short",
+ "layer": "Variant",
+ "match": "match:eq"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ADJD", wrap["key"])
+ })
+
+ t.Run("Bare DET (BtoA) maps to DET subtypes disjunction", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ operands := wrap["operands"].([]any)
+ assert.Len(t, operands, 7)
+ var keys []string
+ for _, op := range operands {
+ keys = append(keys, op.(map[string]any)["key"].(string))
+ }
+ assert.Contains(t, keys, "ART")
+ assert.Contains(t, keys, "PDAT")
+ assert.Contains(t, keys, "PWAT")
+ })
+
+ t.Run("DET & PronType=Art (BtoA) maps to ART only", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "Art",
+ "layer": "PronType",
+ "match": "match:eq"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ART", wrap["key"])
+ })
+
+ t.Run("Bare SCONJ (BtoA) maps to KOUI|KOUS disjunction", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "SCONJ",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ operands := wrap["operands"].([]any)
+ assert.Len(t, operands, 2)
+ })
+
+ t.Run("Bare VERB (BtoA) maps to STTS verb subtypes disjunction", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "VERB",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ operands := wrap["operands"].([]any)
+ assert.Len(t, operands, 8)
+ })
+
+ t.Run("Bare AUX (BtoA) maps to AUX subtypes disjunction", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "AUX",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ operands := wrap["operands"].([]any)
+ assert.Len(t, operands, 4)
+ })
+
+ t.Run("Forward direction AtoB: ADJA maps to ADJ", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "ADJA",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: AtoB}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ADJ", wrap["key"])
+ })
+
+ t.Run("Forward direction AtoB: ART maps to DET & PronType=Art", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "ART",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: AtoB}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:and", wrap["relation"])
+ })
+}
+
+func TestOriginalProblemMultiTokenQuery(t *testing.T) {
+ m := newSTTSUPoSMapper(t)
+
+ t.Run("Multi-token [DET][ADJ][NOUN] BtoA produces correct disjunctions", func(t *testing.T) {
+ // This reproduces the exact problem from the issue:
+ // [upos/p=DET][upos/p=ADJ][upos/p=NOUN] mapped B->A
+ input := `{
+ "@type": "koral:group",
+ "operation": "operation:sequence",
+ "operands": [
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ },
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ },
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "NOUN",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }
+ ]
+ }`
+
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ operands := resultMap["operands"].([]any)
+ require.Len(t, operands, 3)
+
+ // Token 1: DET -> ART | PDAT | PIAT | PIDAT | PPOSAT | PRELAT | PWAT
+ token1 := operands[0].(map[string]any)
+ wrap1 := token1["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap1["@type"], "DET should be mapped to OR group")
+ assert.Equal(t, "relation:or", wrap1["relation"])
+ ops1 := wrap1["operands"].([]any)
+ assert.Len(t, ops1, 7, "DET fallback should have 7 alternatives")
+
+ // Token 2: ADJ -> ADJA | ADJD
+ token2 := operands[1].(map[string]any)
+ wrap2 := token2["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap2["@type"], "ADJ should be mapped to OR group")
+ assert.Equal(t, "relation:or", wrap2["relation"])
+ ops2 := wrap2["operands"].([]any)
+ assert.Len(t, ops2, 2, "ADJ fallback should have 2 alternatives")
+
+ adjKeys := []string{
+ ops2[0].(map[string]any)["key"].(string),
+ ops2[1].(map[string]any)["key"].(string),
+ }
+ assert.Contains(t, adjKeys, "ADJA")
+ assert.Contains(t, adjKeys, "ADJD")
+
+ // Token 3: NOUN -> NN (specific rule, not fallback, because
+ // [NN] <> [NOUN] has specificity 1 and [NN | NE] <> [NOUN | PROPN]
+ // has pattern specificity 0 on B-side (OR group))
+ token3 := operands[2].(map[string]any)
+ wrap3 := token3["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap3["@type"], "NOUN should map to single NN term")
+ assert.Equal(t, "NN", wrap3["key"])
+ })
+
+ t.Run("Specific input [ADJ & Variant=Short] maps to ADJD only", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "Short",
+ "layer": "Variant",
+ "match": "match:eq"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ADJD", wrap["key"])
+ })
+
+ t.Run("Specific input [DET & PronType=Art] maps to ART only", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "Art",
+ "layer": "PronType",
+ "match": "match:eq"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ART", wrap["key"])
+ })
+}
+
+func TestSpecificityBasedRuleSelection(t *testing.T) {
+ t.Run("More specific rule wins over less specific", func(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "spec-test",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[ADJA] <> [ADJ]",
+ "[ADJD] <> [ADJ & Variant=Short]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ // Input: ADJ & Variant=Short — matches the internal representation
+ // where "Variant=Short" is parsed as layer="Variant", key="Short"
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "Short",
+ "layer": "Variant",
+ "match": "match:eq"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+
+ var inputData any
+ err = json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("spec-test", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ADJD", wrap["key"])
+ })
+
+ t.Run("Same specificity - first rule in file order wins", func(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "tie-test",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[KOUI] <> [SCONJ]",
+ "[KOUS] <> [SCONJ]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "SCONJ",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+
+ var inputData any
+ err = json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("tie-test", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "KOUI", wrap["key"])
+ })
+
+ t.Run("Single matching rule - identical to first-match-wins", func(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "single-test",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[NN] <> [NOUN]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "NOUN",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+
+ var inputData any
+ err = json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("single-test", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "NN", wrap["key"])
+ })
+
+ t.Run("No matching rule - node passes through unchanged", func(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "nomatch-test",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[NN] <> [NOUN]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "VERB",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+
+ var inputData any
+ err = json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("nomatch-test", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "VERB", wrap["key"])
+ })
+
+ t.Run("Fallback OR-disjunction rule loses to specific rule", func(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "fallback-test",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[ADJA] <> [ADJ]",
+ "[ADJA | ADJD] <> [ADJ]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+
+ var inputData any
+ err = json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("fallback-test", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ // Both rules match with pattern specificity 1 on B-side.
+ // Rule 1 replacement specificity = 1 (Term), Rule 2 replacement specificity = 0 (OR group).
+ // Lower replacement specificity wins (broader/fallback output) => rule 2 wins.
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ })
+}
diff --git a/mapper/query.go b/mapper/query.go
index 95b651c..ec52da3 100644
--- a/mapper/query.go
+++ b/mapper/query.go
@@ -96,7 +96,8 @@
}
patternCache := make(map[patternCacheKey]ast.Node)
- for i, rule := range rules {
+ // getProcessedPattern returns a cached, override-applied clone of a rule's pattern.
+ getProcessedPattern := func(i int, rule *parser.MappingResult) (ast.Node, ast.Node, ast.Node, error) {
var pattern, replacement ast.Node
if opts.Direction {
pattern = rule.Upper
@@ -105,7 +106,6 @@
pattern = rule.Lower
replacement = rule.Upper
}
-
if token, ok := pattern.(*ast.Token); ok {
pattern = token.Wrap
}
@@ -122,17 +122,40 @@
}
patternCache[patternKey] = processedPattern
}
+ return processedPattern, replacement, pattern, nil
+ }
- // Probe for a match before cloning the replacement (lazy evaluation)
- tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
- if err != nil {
- return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
+ // applyBestRule applies the best-matching rule (by specificity) to a single node.
+ applyBestRule := func(target ast.Node) (ast.Node, error) {
+ var candidates []matchCandidate
+ for i, rule := range rules {
+ processedPattern, replacement, _, err := getProcessedPattern(i, rule)
+ if err != nil {
+ return nil, err
+ }
+ tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
+ if err != nil {
+ return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
+ }
+ if !tempMatcher.Match(target) {
+ continue
+ }
+ candidates = append(candidates, matchCandidate{
+ ruleIndex: i,
+ patternSpecificity: ast.Specificity(processedPattern),
+ replacementSpecificity: ast.Specificity(replacement),
+ })
}
- if !tempMatcher.Match(node) {
- continue
+ if len(candidates) == 0 {
+ return target, nil
}
- replacementKey := patternCacheKey{ruleIndex: i, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
+ best := selectBestCandidate(candidates)
+
+ rule := rules[best.ruleIndex]
+ processedPattern, replacement, _, _ := getProcessedPattern(best.ruleIndex, rule)
+
+ replacementKey := patternCacheKey{ruleIndex: best.ruleIndex, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
processedReplacement, exists := patternCache[replacementKey]
if !exists {
processedReplacement = replacement.Clone()
@@ -144,7 +167,7 @@
var beforeNode ast.Node
if opts.AddRewrites {
- beforeNode = node.Clone()
+ beforeNode = target.Clone()
}
// Collect pre-existing rewrites before replacement so they
@@ -155,15 +178,41 @@
if err != nil {
return nil, fmt.Errorf("failed to create matcher: %w", err)
}
- node = actualMatcher.Replace(node)
+ result := actualMatcher.Replace(target)
- // Carry forward pre-existing rewrites from earlier cascade steps.
if len(existingRewrites) > 0 {
- prependRewrites(node, existingRewrites)
+ prependRewrites(result, existingRewrites)
}
if opts.AddRewrites {
- recordRewrites(node, beforeNode)
+ recordRewrites(result, beforeNode)
+ }
+ return result, nil
+ }
+
+ // For CatchallNodes (any complex KoralQuery operation like sequence,
+ // disjunction, or position), apply best-rule selection per operand
+ // so each token gets its own best-matching rule.
+ if catchall, ok := node.(*ast.CatchallNode); ok && len(catchall.Operands) > 0 {
+ newOperands := make([]ast.Node, len(catchall.Operands))
+ for i, op := range catchall.Operands {
+ replaced, err := applyBestRule(op)
+ if err != nil {
+ return nil, err
+ }
+ newOperands[i] = replaced
+ }
+ node = &ast.CatchallNode{
+ NodeType: catchall.NodeType,
+ RawContent: catchall.RawContent,
+ Wrap: catchall.Wrap,
+ Operands: newOperands,
+ }
+ } else {
+ var err error
+ node, err = applyBestRule(node)
+ if err != nil {
+ return nil, err
}
}
@@ -229,6 +278,31 @@
return resultData, nil
}
+// selectBestCandidate picks the best match from candidates using:
+// 1. Highest pattern specificity (most features matched)
+// 2. Lowest replacement specificity (broadest/fallback output)
+// 3. First in file order (lowest ruleIndex)
+func selectBestCandidate(candidates []matchCandidate) matchCandidate {
+ best := candidates[0]
+ for _, c := range candidates[1:] {
+ if c.patternSpecificity > best.patternSpecificity {
+ best = c
+ } else if c.patternSpecificity == best.patternSpecificity {
+ if c.replacementSpecificity < best.replacementSpecificity {
+ best = c
+ }
+ }
+ }
+ return best
+}
+
+// matchCandidate holds a rule index and its specificity scores for selection.
+type matchCandidate struct {
+ ruleIndex int
+ patternSpecificity int
+ replacementSpecificity int
+}
+
// recordRewrites compares the new node against the before-snapshot and
// attaches rewrite entries to any changed nodes. It handles both simple
// nodes (Term, TermGroup) and container nodes (CatchallNode with operands).
@@ -237,8 +311,9 @@
return
}
- // For CatchallNodes with operands (e.g. token sequences), attach
- // per-operand rewrites so each changed token gets its own annotation.
+ // For CatchallNodes with operands (e.g. any complex KoralQuery
+ // operation), attach per-operand rewrites so each changed token
+ // gets its own annotation.
if newCatchall, ok := newNode.(*ast.CatchallNode); ok {
if oldCatchall, ok := beforeNode.(*ast.CatchallNode); ok && len(newCatchall.Operands) > 0 {
for i, newOp := range newCatchall.Operands {
diff --git a/mapper/response_test.go b/mapper/response_test.go
index 8452730..30aee07 100644
--- a/mapper/response_test.go
+++ b/mapper/response_test.go
@@ -1091,3 +1091,77 @@
assert.NotContains(t, snippet, `title="opennlp/p:DT" class="notinindex"`)
})
}
+
+func TestResponseMappingWithFallbackRules(t *testing.T) {
+ t.Run("Fallback OR-group replacement is discarded by RestrictToObligatory", func(t *testing.T) {
+ responseSnippet := `{
+ "snippet": "<span title=\"upos/p:ADJ\">schön</span>"
+ }`
+
+ mappingList := config.MappingList{
+ ID: "test-fallback-resp",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[ADJA] <> [ADJ]",
+ "[ADJD] <> [ADJ & Variant=Short]",
+ "[ADJA | ADJD] <> [ADJ]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ var inputData any
+ err = json.Unmarshal([]byte(responseSnippet), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyResponseMappings("test-fallback-resp", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ snippet := resultMap["snippet"].(string)
+
+ // The specific rule [ADJA] <> [ADJ] should add opennlp/p:ADJA annotation
+ assert.Contains(t, snippet, `title="opennlp/p:ADJA" class="notinindex"`)
+ // The fallback rule [ADJA | ADJD] <> [ADJ] should NOT add an annotation
+ // because RestrictToObligatory discards OR groups
+ assert.NotContains(t, snippet, `title="opennlp/p:ADJD" class="notinindex"`)
+ })
+
+ t.Run("Specific DET rule produces annotation, fallback does not", func(t *testing.T) {
+ responseSnippet := `{
+ "snippet": "<span title=\"upos/p:DET\"><span title=\"upos/PronType:Art\">Der</span></span>"
+ }`
+
+ mappingList := config.MappingList{
+ ID: "test-det-resp",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[ART] <> [DET & PronType=Art]",
+ "[ART | PDAT | PIAT | PIDAT | PPOSAT | PRELAT | PWAT] <> [DET]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ var inputData any
+ err = json.Unmarshal([]byte(responseSnippet), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyResponseMappings("test-det-resp", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ snippet := resultMap["snippet"].(string)
+
+ // The specific rule should add ART annotation
+ assert.Contains(t, snippet, `title="opennlp/p:ART" class="notinindex"`)
+ })
+}