Reject identical source/target in annotation and corpus mappings
Change-Id: I09a410e5d42392680c1ac1c5c9928e3a37aca0cc
diff --git a/cmd/koralmapper/main_test.go b/cmd/koralmapper/main_test.go
index ee6e067..2d0b85b 100644
--- a/cmd/koralmapper/main_test.go
+++ b/cmd/koralmapper/main_test.go
@@ -1622,13 +1622,13 @@
- id: step1
foundryA: opennlp
layerA: p
- foundryB: opennlp
+ foundryB: stts
layerB: p
rewrites: true
mappings:
- "[PIDAT] <> [DET]"
- id: step2
- foundryA: opennlp
+ foundryA: stts
layerA: p
foundryB: upos
layerB: p
@@ -1675,6 +1675,12 @@
map[string]any{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
+ "scope": "foundry",
+ "original": "opennlp",
+ },
+ map[string]any{
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
"scope": "key",
"original": "PIDAT",
},
@@ -1682,7 +1688,7 @@
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"scope": "foundry",
- "original": "opennlp",
+ "original": "stts",
},
map[string]any{
"@type": "koral:rewrite",
diff --git a/mapper/cascade_test.go b/mapper/cascade_test.go
index 3dec81c..b46f3ac 100644
--- a/mapper/cascade_test.go
+++ b/mapper/cascade_test.go
@@ -21,11 +21,11 @@
m, err := NewMapper([]config.MappingList{
{
ID: "ann-step1", FoundryA: "opennlp", LayerA: "p",
- FoundryB: "opennlp", LayerB: "p",
+ FoundryB: "stts", LayerB: "p",
Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
},
{
- ID: "ann-step2", FoundryA: "opennlp", LayerA: "p",
+ ID: "ann-step2", FoundryA: "stts", LayerA: "p",
FoundryB: "upos", LayerB: "p",
Mappings: []config.MappingRule{`[DET] <> [PRON]`},
},
@@ -193,17 +193,17 @@
// --- Response cascade tests ---
func TestCascadeQueryRewritesPreservedAcrossSteps(t *testing.T) {
- // Step 1 changes key (PIDAT->DET) within same foundry/layer.
- // Step 2 changes foundry+key (DET->PRON, opennlp->upos).
+ // Step 1 changes foundry (opennlp->stts) and key (PIDAT->DET).
+ // Step 2 changes foundry (stts->upos) and key (DET->PRON).
// Rewrites from step 1 must survive step 2's replacement.
m, err := NewMapper([]config.MappingList{
{
ID: "step1", FoundryA: "opennlp", LayerA: "p",
- FoundryB: "opennlp", LayerB: "p",
+ FoundryB: "stts", LayerB: "p",
Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
},
{
- ID: "step2", FoundryA: "opennlp", LayerA: "p",
+ ID: "step2", FoundryA: "stts", LayerA: "p",
FoundryB: "upos", LayerB: "p",
Mappings: []config.MappingRule{`[DET] <> [PRON]`},
},
@@ -232,8 +232,8 @@
require.NoError(t, err)
// After both steps, the term should have rewrites from both steps:
- // step 1 recorded scope=key original=PIDAT,
- // step 2 recorded scope=foundry original=opennlp and scope=key original=DET.
+ // step 1 recorded scope=foundry original=opennlp and scope=key original=PIDAT,
+ // step 2 recorded scope=foundry original=stts and scope=key original=DET.
expected := parseJSON(t, `{
"@type": "koral:token",
"wrap": {
@@ -246,6 +246,12 @@
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
+ "scope": "foundry",
+ "original": "opennlp"
+ },
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
"scope": "key",
"original": "PIDAT"
},
@@ -253,7 +259,7 @@
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"scope": "foundry",
- "original": "opennlp"
+ "original": "stts"
},
{
"@type": "koral:rewrite",
@@ -268,18 +274,18 @@
}
func TestCascadeQueryRewritesPreservedStructuralChange(t *testing.T) {
- // Step 1 changes key (PIDAT->DET) and records a scoped rewrite.
+ // Step 1 changes foundry (opennlp->stts) and key (PIDAT->DET).
// Step 2 replaces Term with TermGroup (structural change).
// Rewrites from step 1 must be carried into the new TermGroup.
m, err := NewMapper([]config.MappingList{
{
ID: "sc-step1", FoundryA: "opennlp", LayerA: "p",
- FoundryB: "opennlp", LayerB: "p",
+ FoundryB: "stts", LayerB: "p",
Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
},
{
- ID: "sc-step2", FoundryA: "opennlp", LayerA: "p",
- FoundryB: "opennlp", LayerB: "p",
+ ID: "sc-step2", FoundryA: "stts", LayerA: "p",
+ FoundryB: "tt", LayerB: "pos",
Mappings: []config.MappingRule{`[DET] <> [opennlp/p=DET & opennlp/p=PronType:Art]`},
},
})
@@ -306,17 +312,23 @@
)
require.NoError(t, err)
- // Step 1 rewrites (scope=key, original=PIDAT) must appear on the
- // TermGroup created by step 2, along with step 2's own structural rewrite.
+ // Step 1 rewrites (scope=foundry original=opennlp, scope=key original=PIDAT)
+ // must appear on the TermGroup created by step 2, along with step 2's
+ // own structural rewrite.
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
require.Equal(t, "koral:termGroup", wrap["@type"])
rewrites := wrap["rewrites"].([]any)
- // First rewrite is from step 1 (carried forward)
+ // First rewrite is from step 1 (carried forward): foundry change
rw0 := rewrites[0].(map[string]any)
- assert.Equal(t, "key", rw0["scope"])
- assert.Equal(t, "PIDAT", rw0["original"])
+ assert.Equal(t, "foundry", rw0["scope"])
+ assert.Equal(t, "opennlp", rw0["original"])
+
+ // Second rewrite is from step 1 (carried forward): key change
+ rw1 := rewrites[1].(map[string]any)
+ assert.Equal(t, "key", rw1["scope"])
+ assert.Equal(t, "PIDAT", rw1["original"])
// Last rewrite is from step 2 (structural: original is the full term)
rwLast := rewrites[len(rewrites)-1].(map[string]any)
diff --git a/mapper/mapper.go b/mapper/mapper.go
index 2085093..f72c5c5 100644
--- a/mapper/mapper.go
+++ b/mapper/mapper.go
@@ -91,6 +91,57 @@
AddRewrites bool
}
+// validateEffectiveOptions checks that the resolved source and target
+// identifiers are not identical, which would cause an infinite mapping loop.
+// For annotation mappings it compares the effective foundry+layer pair;
+// for corpus mappings it compares the effective field names.
+// The effective value is: query-parameter override if non-empty, otherwise
+// the YAML list default.
+func (m *Mapper) validateEffectiveOptions(mappingID string, opts MappingOptions) error {
+ list, exists := m.mappingLists[mappingID]
+ if !exists {
+ return nil // will be caught later
+ }
+
+ if list.IsCorpus() {
+ effFieldA := opts.FieldA
+ if effFieldA == "" {
+ effFieldA = list.FieldA
+ }
+ effFieldB := opts.FieldB
+ if effFieldB == "" {
+ effFieldB = list.FieldB
+ }
+ if effFieldA != "" && effFieldA == effFieldB {
+ return fmt.Errorf("identical source and target field (fieldA == fieldB == %q) in mapping list '%s': this would cause an infinite mapping loop", effFieldA, mappingID)
+ }
+ return nil
+ }
+
+ effFoundryA := opts.FoundryA
+ if effFoundryA == "" {
+ effFoundryA = list.FoundryA
+ }
+ effLayerA := opts.LayerA
+ if effLayerA == "" {
+ effLayerA = list.LayerA
+ }
+ effFoundryB := opts.FoundryB
+ if effFoundryB == "" {
+ effFoundryB = list.FoundryB
+ }
+ effLayerB := opts.LayerB
+ if effLayerB == "" {
+ effLayerB = list.LayerB
+ }
+
+ if effFoundryA != "" && effFoundryA == effFoundryB && effLayerA == effLayerB {
+ return fmt.Errorf("identical source and target (foundryA/layerA == foundryB/layerB == %q/%q) in mapping list '%s': this would cause an infinite mapping loop", effFoundryA, effLayerA, mappingID)
+ }
+
+ return nil
+}
+
// CascadeQueryMappings applies multiple mapping lists sequentially,
// feeding the output of each into the next. orderedIDs and
// perMappingOpts must have the same length. An empty list returns
diff --git a/mapper/mapper_test.go b/mapper/mapper_test.go
index 4d3a411..4e08d14 100644
--- a/mapper/mapper_test.go
+++ b/mapper/mapper_test.go
@@ -440,8 +440,8 @@
ID: "test-token-to-termgroup",
FoundryA: "opennlp",
LayerA: "p",
- FoundryB: "opennlp", // Keep the same foundry for both sides
- LayerB: "p",
+ FoundryB: "tt",
+ LayerB: "pos",
Mappings: []config.MappingRule{
"[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
},
@@ -809,7 +809,7 @@
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "opennlp",
- LayerB: "p",
+ LayerB: "pos",
Mappings: []config.MappingRule{
"[DET] <> [PRON]",
},
@@ -844,12 +844,18 @@
"@type": "koral:term",
"foundry": "opennlp",
"key": "PRON",
- "layer": "p",
+ "layer": "pos",
"match": "match:eq",
"rewrites": [
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
+ "scope": "layer",
+ "original": "p"
+ },
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
"scope": "key",
"original": "DET"
}
@@ -1120,3 +1126,211 @@
})
}
}
+
+func TestIdenticalEffectiveFoundryLayerRejected(t *testing.T) {
+ tests := []struct {
+ name string
+ list config.MappingList
+ opts MappingOptions
+ wantErr string
+ }{
+ {
+ name: "YAML defaults identical",
+ list: config.MappingList{
+ ID: "test", FoundryA: "opennlp", LayerA: "p",
+ FoundryB: "opennlp", LayerB: "p",
+ Mappings: []config.MappingRule{"[A] <> [B]"},
+ },
+ opts: MappingOptions{Direction: AtoB},
+ wantErr: "identical source and target",
+ },
+ {
+ name: "Query param override makes them identical",
+ list: config.MappingList{
+ ID: "test", FoundryA: "opennlp", LayerA: "p",
+ FoundryB: "upos", LayerB: "p",
+ Mappings: []config.MappingRule{"[A] <> [B]"},
+ },
+ opts: MappingOptions{Direction: AtoB, FoundryB: "opennlp"},
+ wantErr: "identical source and target",
+ },
+ {
+ name: "Query param override resolves the conflict",
+ list: config.MappingList{
+ ID: "test", FoundryA: "opennlp", LayerA: "p",
+ FoundryB: "opennlp", LayerB: "p",
+ Mappings: []config.MappingRule{"[A] <> [B]"},
+ },
+ opts: MappingOptions{Direction: AtoB, FoundryB: "upos"},
+ wantErr: "",
+ },
+ {
+ name: "Different foundry same layer is allowed",
+ list: config.MappingList{
+ ID: "test", FoundryA: "opennlp", LayerA: "p",
+ FoundryB: "upos", LayerB: "p",
+ Mappings: []config.MappingRule{"[A] <> [B]"},
+ },
+ opts: MappingOptions{Direction: AtoB},
+ wantErr: "",
+ },
+ {
+ name: "Both foundries empty is allowed",
+ list: config.MappingList{
+ ID: "test",
+ Mappings: []config.MappingRule{"[A] <> [B]"},
+ },
+ opts: MappingOptions{Direction: AtoB},
+ wantErr: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ m, err := NewMapper([]config.MappingList{tt.list})
+ require.NoError(t, err)
+
+ input := map[string]any{
+ "@type": "koral:token",
+ "wrap": map[string]any{
+ "@type": "koral:term",
+ "key": "A",
+ },
+ }
+
+ _, err = m.ApplyQueryMappings("test", tt.opts, input)
+ if tt.wantErr != "" {
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), tt.wantErr)
+ } else {
+ assert.NoError(t, err)
+ }
+ })
+ }
+}
+
+func TestIdenticalEffectiveFieldRejected(t *testing.T) {
+ tests := []struct {
+ name string
+ list config.MappingList
+ opts MappingOptions
+ wantErr string
+ }{
+ {
+ name: "YAML defaults identical",
+ list: config.MappingList{
+ ID: "test", Type: "corpus",
+ FieldA: "textClass", FieldB: "textClass",
+ Mappings: []config.MappingRule{"novel <> fiction"},
+ },
+ opts: MappingOptions{Direction: AtoB},
+ wantErr: "identical source and target field",
+ },
+ {
+ name: "Query param override makes them identical",
+ list: config.MappingList{
+ ID: "test", Type: "corpus",
+ FieldA: "textClass", FieldB: "genre",
+ Mappings: []config.MappingRule{"novel <> fiction"},
+ },
+ opts: MappingOptions{Direction: AtoB, FieldB: "textClass"},
+ wantErr: "identical source and target field",
+ },
+ {
+ name: "Query param override resolves the conflict",
+ list: config.MappingList{
+ ID: "test", Type: "corpus",
+ FieldA: "textClass", FieldB: "textClass",
+ Mappings: []config.MappingRule{"novel <> fiction"},
+ },
+ opts: MappingOptions{Direction: AtoB, FieldB: "genre"},
+ wantErr: "",
+ },
+ {
+ name: "Different fields is allowed",
+ list: config.MappingList{
+ ID: "test", Type: "corpus",
+ FieldA: "textClass", FieldB: "genre",
+ Mappings: []config.MappingRule{"novel <> fiction"},
+ },
+ opts: MappingOptions{Direction: AtoB},
+ wantErr: "",
+ },
+ {
+ name: "Both fields empty is allowed",
+ list: config.MappingList{
+ ID: "test", Type: "corpus",
+ Mappings: []config.MappingRule{"textClass=novel <> genre=fiction"},
+ },
+ opts: MappingOptions{Direction: AtoB},
+ wantErr: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ m, err := NewMapper([]config.MappingList{tt.list})
+ require.NoError(t, err)
+
+ input := map[string]any{
+ "collection": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ "match": "match:eq",
+ },
+ }
+
+ _, err = m.ApplyQueryMappings("test", tt.opts, input)
+ if tt.wantErr != "" {
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), tt.wantErr)
+ } else {
+ assert.NoError(t, err)
+ }
+ })
+ }
+}
+
+func TestIdenticalEffectiveValuesResponseEndpoint(t *testing.T) {
+ t.Run("annotation response rejects identical effective foundry/layer", func(t *testing.T) {
+ m, err := NewMapper([]config.MappingList{{
+ ID: "test", FoundryA: "marmot", LayerA: "p",
+ FoundryB: "marmot", LayerB: "p",
+ Mappings: []config.MappingRule{"[DET] <> [PRON]"},
+ }})
+ require.NoError(t, err)
+
+ input := map[string]any{
+ "snippet": `<span title="marmot/p:DET">Der</span>`,
+ }
+
+ _, err = m.ApplyResponseMappings("test", MappingOptions{Direction: AtoB}, input)
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "identical source and target")
+ })
+
+ t.Run("corpus response rejects identical effective field", func(t *testing.T) {
+ m, err := NewMapper([]config.MappingList{{
+ ID: "test", Type: "corpus",
+ FieldA: "textClass", FieldB: "textClass",
+ Mappings: []config.MappingRule{"novel <> fiction"},
+ }})
+ require.NoError(t, err)
+
+ input := map[string]any{
+ "fields": []any{
+ map[string]any{
+ "@type": "koral:field",
+ "key": "textClass",
+ "value": "novel",
+ "type": "type:string",
+ },
+ },
+ }
+
+ _, err = m.ApplyResponseMappings("test", MappingOptions{Direction: AtoB}, input)
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "identical source and target field")
+ })
+}
diff --git a/mapper/query.go b/mapper/query.go
index 72d1398..62022e5 100644
--- a/mapper/query.go
+++ b/mapper/query.go
@@ -17,6 +17,10 @@
return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
}
+ if err := m.validateEffectiveOptions(mappingID, opts); err != nil {
+ return nil, err
+ }
+
if m.mappingLists[mappingID].IsCorpus() {
return m.applyCorpusQueryMappings(mappingID, opts, jsonData)
}
diff --git a/mapper/response.go b/mapper/response.go
index e870a29..3ac6b02 100644
--- a/mapper/response.go
+++ b/mapper/response.go
@@ -19,6 +19,10 @@
return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
}
+ if err := m.validateEffectiveOptions(mappingID, opts); err != nil {
+ return nil, err
+ }
+
if m.mappingLists[mappingID].IsCorpus() {
return m.applyCorpusResponseMappings(mappingID, opts, jsonData)
}