Update specificity rule matching for annotations
Change-Id: Ifa7ec5eef3583cb196f4aa1ca0cfcd65790de226
diff --git a/mapper/mapper_test.go b/mapper/mapper_test.go
index 13cb7a8..8d3e691 100644
--- a/mapper/mapper_test.go
+++ b/mapper/mapper_test.go
@@ -2,6 +2,7 @@
import (
"encoding/json"
+ "os"
"testing"
"github.com/KorAP/Koral-Mapper/ast"
@@ -9,6 +10,7 @@
"github.com/KorAP/Koral-Mapper/matcher"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
+ "gopkg.in/yaml.v3"
)
func TestMapper(t *testing.T) {
@@ -1309,3 +1311,664 @@
assert.Contains(t, err.Error(), "identical source and target field")
})
}
+
+func newSTTSUPoSMapper(t *testing.T) *Mapper {
+ t.Helper()
+ data, err := os.ReadFile("../mappings/stts-upos.yaml")
+ require.NoError(t, err, "failed to read stts-upos.yaml from disk")
+
+ var mappingList config.MappingList
+ err = yaml.Unmarshal(data, &mappingList)
+ require.NoError(t, err, "failed to parse stts-upos.yaml")
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+ return m
+}
+
+func TestFallbackRules(t *testing.T) {
+ m := newSTTSUPoSMapper(t)
+
+ t.Run("Bare ADJ (BtoA) maps to ADJA|ADJD disjunction", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ operands := wrap["operands"].([]any)
+ assert.Len(t, operands, 2)
+ keys := []string{
+ operands[0].(map[string]any)["key"].(string),
+ operands[1].(map[string]any)["key"].(string),
+ }
+ assert.Contains(t, keys, "ADJA")
+ assert.Contains(t, keys, "ADJD")
+ })
+
+ t.Run("ADJ & Variant=Short (BtoA) maps to ADJD only", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "Short",
+ "layer": "Variant",
+ "match": "match:eq"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ADJD", wrap["key"])
+ })
+
+ t.Run("Bare DET (BtoA) maps to DET subtypes disjunction", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ operands := wrap["operands"].([]any)
+ assert.Len(t, operands, 7)
+ var keys []string
+ for _, op := range operands {
+ keys = append(keys, op.(map[string]any)["key"].(string))
+ }
+ assert.Contains(t, keys, "ART")
+ assert.Contains(t, keys, "PDAT")
+ assert.Contains(t, keys, "PWAT")
+ })
+
+ t.Run("DET & PronType=Art (BtoA) maps to ART only", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "Art",
+ "layer": "PronType",
+ "match": "match:eq"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ART", wrap["key"])
+ })
+
+ t.Run("Bare SCONJ (BtoA) maps to KOUI|KOUS disjunction", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "SCONJ",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ operands := wrap["operands"].([]any)
+ assert.Len(t, operands, 2)
+ })
+
+ t.Run("Bare VERB (BtoA) maps to STTS verb subtypes disjunction", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "VERB",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ operands := wrap["operands"].([]any)
+ assert.Len(t, operands, 8)
+ })
+
+ t.Run("Bare AUX (BtoA) maps to AUX subtypes disjunction", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "AUX",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ operands := wrap["operands"].([]any)
+ assert.Len(t, operands, 4)
+ })
+
+ t.Run("Forward direction AtoB: ADJA maps to ADJ", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "ADJA",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: AtoB}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ADJ", wrap["key"])
+ })
+
+ t.Run("Forward direction AtoB: ART maps to DET & PronType=Art", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "ART",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: AtoB}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:and", wrap["relation"])
+ })
+}
+
+func TestOriginalProblemMultiTokenQuery(t *testing.T) {
+ m := newSTTSUPoSMapper(t)
+
+ t.Run("Multi-token [DET][ADJ][NOUN] BtoA produces correct disjunctions", func(t *testing.T) {
+ // This reproduces the exact problem from the issue:
+ // [upos/p=DET][upos/p=ADJ][upos/p=NOUN] mapped B->A
+ input := `{
+ "@type": "koral:group",
+ "operation": "operation:sequence",
+ "operands": [
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ },
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ },
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "NOUN",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }
+ ]
+ }`
+
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ operands := resultMap["operands"].([]any)
+ require.Len(t, operands, 3)
+
+ // Token 1: DET -> ART | PDAT | PIAT | PIDAT | PPOSAT | PRELAT | PWAT
+ token1 := operands[0].(map[string]any)
+ wrap1 := token1["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap1["@type"], "DET should be mapped to OR group")
+ assert.Equal(t, "relation:or", wrap1["relation"])
+ ops1 := wrap1["operands"].([]any)
+ assert.Len(t, ops1, 7, "DET fallback should have 7 alternatives")
+
+ // Token 2: ADJ -> ADJA | ADJD
+ token2 := operands[1].(map[string]any)
+ wrap2 := token2["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap2["@type"], "ADJ should be mapped to OR group")
+ assert.Equal(t, "relation:or", wrap2["relation"])
+ ops2 := wrap2["operands"].([]any)
+ assert.Len(t, ops2, 2, "ADJ fallback should have 2 alternatives")
+
+ adjKeys := []string{
+ ops2[0].(map[string]any)["key"].(string),
+ ops2[1].(map[string]any)["key"].(string),
+ }
+ assert.Contains(t, adjKeys, "ADJA")
+ assert.Contains(t, adjKeys, "ADJD")
+
+ // Token 3: NOUN -> NN (specific rule, not fallback, because
+ // [NN] <> [NOUN] has specificity 1 and [NN | NE] <> [NOUN | PROPN]
+ // has pattern specificity 0 on B-side (OR group))
+ token3 := operands[2].(map[string]any)
+ wrap3 := token3["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap3["@type"], "NOUN should map to single NN term")
+ assert.Equal(t, "NN", wrap3["key"])
+ })
+
+ t.Run("Specific input [ADJ & Variant=Short] maps to ADJD only", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "Short",
+ "layer": "Variant",
+ "match": "match:eq"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ADJD", wrap["key"])
+ })
+
+ t.Run("Specific input [DET & PronType=Art] maps to ART only", func(t *testing.T) {
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "Art",
+ "layer": "PronType",
+ "match": "match:eq"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+
+ var inputData any
+ err := json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ART", wrap["key"])
+ })
+}
+
+func TestSpecificityBasedRuleSelection(t *testing.T) {
+ t.Run("More specific rule wins over less specific", func(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "spec-test",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[ADJA] <> [ADJ]",
+ "[ADJD] <> [ADJ & Variant=Short]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ // Input: ADJ & Variant=Short — matches the internal representation
+ // where "Variant=Short" is parsed as layer="Variant", key="Short"
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "Short",
+ "layer": "Variant",
+ "match": "match:eq"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`
+
+ var inputData any
+ err = json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("spec-test", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, "ADJD", wrap["key"])
+ })
+
+ t.Run("Same specificity - first rule in file order wins", func(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "tie-test",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[KOUI] <> [SCONJ]",
+ "[KOUS] <> [SCONJ]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "SCONJ",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+
+ var inputData any
+ err = json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("tie-test", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "KOUI", wrap["key"])
+ })
+
+ t.Run("Single matching rule - identical to first-match-wins", func(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "single-test",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[NN] <> [NOUN]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "NOUN",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+
+ var inputData any
+ err = json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("single-test", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "NN", wrap["key"])
+ })
+
+ t.Run("No matching rule - node passes through unchanged", func(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "nomatch-test",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[NN] <> [NOUN]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "VERB",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+
+ var inputData any
+ err = json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("nomatch-test", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "VERB", wrap["key"])
+ })
+
+ t.Run("Fallback OR-disjunction rule loses to specific rule", func(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "fallback-test",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[ADJA] <> [ADJ]",
+ "[ADJA | ADJD] <> [ADJ]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ input := `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "ADJ",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`
+
+ var inputData any
+ err = json.Unmarshal([]byte(input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("fallback-test", MappingOptions{Direction: BtoA}, inputData)
+ require.NoError(t, err)
+
+ // Both rules match with pattern specificity 1 on B-side.
+ // Rule 1 replacement specificity = 1 (Term), Rule 2 replacement specificity = 0 (OR group).
+ // Lower replacement specificity wins (broader/fallback output) => rule 2 wins.
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ assert.Equal(t, "relation:or", wrap["relation"])
+ })
+}