blob: b46f3acc2774de0de1b72889e92f0e6fc158a024 [file] [log] [blame]
package mapper
import (
"encoding/json"
"testing"
"github.com/KorAP/Koral-Mapper/config"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// parseJSON is a test helper that unmarshals a JSON string.
func parseJSON(t *testing.T, s string) any {
t.Helper()
var v any
require.NoError(t, json.Unmarshal([]byte(s), &v))
return v
}
func TestCascadeQueryTwoAnnotationMappings(t *testing.T) {
m, err := NewMapper([]config.MappingList{
{
ID: "ann-step1", FoundryA: "opennlp", LayerA: "p",
FoundryB: "stts", LayerB: "p",
Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
},
{
ID: "ann-step2", FoundryA: "stts", LayerA: "p",
FoundryB: "upos", LayerB: "p",
Mappings: []config.MappingRule{`[DET] <> [PRON]`},
},
})
require.NoError(t, err)
input := parseJSON(t, `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`)
result, err := m.CascadeQueryMappings(
[]string{"ann-step1", "ann-step2"},
[]MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
input,
)
require.NoError(t, err)
expected := parseJSON(t, `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "PRON",
"layer": "p",
"match": "match:eq"
}
}`)
assert.Equal(t, expected, result)
}
func TestCascadeQueryMixAnnotationAndCorpus(t *testing.T) {
m, err := NewMapper([]config.MappingList{
{
ID: "ann-mapper", FoundryA: "opennlp", LayerA: "p",
FoundryB: "upos", LayerB: "p",
Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
},
{
ID: "corpus-mapper",
Type: "corpus",
Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
},
})
require.NoError(t, err)
input := parseJSON(t, `{
"query": {
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
},
"collection": {
"@type": "koral:doc",
"key": "textClass",
"value": "novel",
"match": "match:eq"
}
}`)
result, err := m.CascadeQueryMappings(
[]string{"ann-mapper", "corpus-mapper"},
[]MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
input,
)
require.NoError(t, err)
resultMap := result.(map[string]any)
query := resultMap["query"].(map[string]any)
wrap := query["wrap"].(map[string]any)
assert.Equal(t, "DET", wrap["key"])
assert.Equal(t, "upos", wrap["foundry"])
collection := resultMap["collection"].(map[string]any)
assert.Equal(t, "genre", collection["key"])
assert.Equal(t, "fiction", collection["value"])
}
func TestCascadeQuerySingleElement(t *testing.T) {
m, err := NewMapper([]config.MappingList{{
ID: "single", FoundryA: "opennlp", LayerA: "p",
FoundryB: "upos", LayerB: "p",
Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
}})
require.NoError(t, err)
makeInput := func() any {
return parseJSON(t, `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`)
}
opts := MappingOptions{Direction: AtoB}
cascadeResult, err := m.CascadeQueryMappings(
[]string{"single"}, []MappingOptions{opts}, makeInput(),
)
require.NoError(t, err)
directResult, err := m.ApplyQueryMappings("single", opts, makeInput())
require.NoError(t, err)
assert.Equal(t, directResult, cascadeResult)
}
func TestCascadeQueryEmptyList(t *testing.T) {
m, err := NewMapper([]config.MappingList{{
ID: "dummy", FoundryA: "x", LayerA: "y",
FoundryB: "a", LayerB: "b",
Mappings: []config.MappingRule{`[X] <> [Y]`},
}})
require.NoError(t, err)
input := parseJSON(t, `{
"@type": "koral:token",
"wrap": {"@type": "koral:term", "key": "Z"}
}`)
result, err := m.CascadeQueryMappings(nil, nil, input)
require.NoError(t, err)
assert.Equal(t, input, result)
}
func TestCascadeQueryUnknownID(t *testing.T) {
m, err := NewMapper([]config.MappingList{{
ID: "known", FoundryA: "x", LayerA: "y",
FoundryB: "a", LayerB: "b",
Mappings: []config.MappingRule{`[X] <> [Y]`},
}})
require.NoError(t, err)
input := parseJSON(t, `{
"@type": "koral:token",
"wrap": {"@type": "koral:term", "key": "X"}
}`)
_, err = m.CascadeQueryMappings(
[]string{"known", "nonexistent"},
[]MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
input,
)
assert.Error(t, err)
assert.Contains(t, err.Error(), "nonexistent")
}
// --- Response cascade tests ---
func TestCascadeQueryRewritesPreservedAcrossSteps(t *testing.T) {
// Step 1 changes foundry (opennlp->stts) and key (PIDAT->DET).
// Step 2 changes foundry (stts->upos) and key (DET->PRON).
// Rewrites from step 1 must survive step 2's replacement.
m, err := NewMapper([]config.MappingList{
{
ID: "step1", FoundryA: "opennlp", LayerA: "p",
FoundryB: "stts", LayerB: "p",
Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
},
{
ID: "step2", FoundryA: "stts", LayerA: "p",
FoundryB: "upos", LayerB: "p",
Mappings: []config.MappingRule{`[DET] <> [PRON]`},
},
})
require.NoError(t, err)
input := parseJSON(t, `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`)
result, err := m.CascadeQueryMappings(
[]string{"step1", "step2"},
[]MappingOptions{
{Direction: AtoB, AddRewrites: true},
{Direction: AtoB, AddRewrites: true},
},
input,
)
require.NoError(t, err)
// After both steps, the term should have rewrites from both steps:
// step 1 recorded scope=foundry original=opennlp and scope=key original=PIDAT,
// step 2 recorded scope=foundry original=stts and scope=key original=DET.
expected := parseJSON(t, `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "PRON",
"layer": "p",
"match": "match:eq",
"rewrites": [
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"scope": "foundry",
"original": "opennlp"
},
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"scope": "key",
"original": "PIDAT"
},
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"scope": "foundry",
"original": "stts"
},
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"scope": "key",
"original": "DET"
}
]
}
}`)
assert.Equal(t, expected, result)
}
func TestCascadeQueryRewritesPreservedStructuralChange(t *testing.T) {
// Step 1 changes foundry (opennlp->stts) and key (PIDAT->DET).
// Step 2 replaces Term with TermGroup (structural change).
// Rewrites from step 1 must be carried into the new TermGroup.
m, err := NewMapper([]config.MappingList{
{
ID: "sc-step1", FoundryA: "opennlp", LayerA: "p",
FoundryB: "stts", LayerB: "p",
Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
},
{
ID: "sc-step2", FoundryA: "stts", LayerA: "p",
FoundryB: "tt", LayerB: "pos",
Mappings: []config.MappingRule{`[DET] <> [opennlp/p=DET & opennlp/p=PronType:Art]`},
},
})
require.NoError(t, err)
input := parseJSON(t, `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`)
result, err := m.CascadeQueryMappings(
[]string{"sc-step1", "sc-step2"},
[]MappingOptions{
{Direction: AtoB, AddRewrites: true},
{Direction: AtoB, AddRewrites: true},
},
input,
)
require.NoError(t, err)
// Step 1 rewrites (scope=foundry original=opennlp, scope=key original=PIDAT)
// must appear on the TermGroup created by step 2, along with step 2's
// own structural rewrite.
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
require.Equal(t, "koral:termGroup", wrap["@type"])
rewrites := wrap["rewrites"].([]any)
// First rewrite is from step 1 (carried forward): foundry change
rw0 := rewrites[0].(map[string]any)
assert.Equal(t, "foundry", rw0["scope"])
assert.Equal(t, "opennlp", rw0["original"])
// Second rewrite is from step 1 (carried forward): key change
rw1 := rewrites[1].(map[string]any)
assert.Equal(t, "key", rw1["scope"])
assert.Equal(t, "PIDAT", rw1["original"])
// Last rewrite is from step 2 (structural: original is the full term)
rwLast := rewrites[len(rewrites)-1].(map[string]any)
assert.Equal(t, "Koral-Mapper", rwLast["editor"])
// Structural rewrite stores the full original node (no scope)
original := rwLast["original"].(map[string]any)
assert.Equal(t, "koral:term", original["@type"])
assert.Equal(t, "DET", original["key"])
}
func TestCascadeResponseTwoCorpusMappings(t *testing.T) {
m, err := NewMapper([]config.MappingList{
{
ID: "corpus-step1", Type: "corpus",
Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
},
{
ID: "corpus-step2", Type: "corpus",
Mappings: []config.MappingRule{`genre=fiction <> category=lit`},
},
})
require.NoError(t, err)
input := parseJSON(t, `{
"fields": [{
"@type": "koral:field",
"key": "textClass",
"value": "novel",
"type": "type:string"
}]
}`)
result, err := m.CascadeResponseMappings(
[]string{"corpus-step1", "corpus-step2"},
[]MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
input,
)
require.NoError(t, err)
fields := result.(map[string]any)["fields"].([]any)
require.GreaterOrEqual(t, len(fields), 3)
assert.Equal(t, "textClass", fields[0].(map[string]any)["key"])
assert.Equal(t, "genre", fields[1].(map[string]any)["key"])
assert.Equal(t, "fiction", fields[1].(map[string]any)["value"])
assert.Equal(t, "category", fields[2].(map[string]any)["key"])
assert.Equal(t, "lit", fields[2].(map[string]any)["value"])
}
func TestCascadeResponseMixAnnotationAndCorpus(t *testing.T) {
m, err := NewMapper([]config.MappingList{
{
ID: "ann-resp", FoundryA: "opennlp", LayerA: "p",
FoundryB: "upos", LayerB: "p",
Mappings: []config.MappingRule{`[DET] <> [PRON]`},
},
{
ID: "corpus-resp",
Type: "corpus",
Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
},
})
require.NoError(t, err)
input := parseJSON(t, `{
"snippet": "<span title=\"opennlp/p:DET\">Der</span>",
"fields": [{
"@type": "koral:field",
"key": "textClass",
"value": "novel",
"type": "type:string"
}]
}`)
result, err := m.CascadeResponseMappings(
[]string{"ann-resp", "corpus-resp"},
[]MappingOptions{{Direction: AtoB}, {Direction: AtoB}},
input,
)
require.NoError(t, err)
resultMap := result.(map[string]any)
snippet := resultMap["snippet"].(string)
assert.Contains(t, snippet, "opennlp/p:DET")
assert.Contains(t, snippet, "upos/p:PRON")
fields := resultMap["fields"].([]any)
require.GreaterOrEqual(t, len(fields), 2)
assert.Equal(t, "genre", fields[1].(map[string]any)["key"])
}
func TestCascadeResponseSingleElement(t *testing.T) {
m, err := NewMapper([]config.MappingList{{
ID: "corpus-single", Type: "corpus",
Mappings: []config.MappingRule{`textClass=novel <> genre=fiction`},
}})
require.NoError(t, err)
makeInput := func() any {
return parseJSON(t, `{
"fields": [{
"@type": "koral:field",
"key": "textClass",
"value": "novel",
"type": "type:string"
}]
}`)
}
opts := MappingOptions{Direction: AtoB}
cascadeResult, err := m.CascadeResponseMappings(
[]string{"corpus-single"}, []MappingOptions{opts}, makeInput(),
)
require.NoError(t, err)
directResult, err := m.ApplyResponseMappings("corpus-single", opts, makeInput())
require.NoError(t, err)
assert.Equal(t, directResult, cascadeResult)
}
func TestCascadeResponseEmptyList(t *testing.T) {
m, err := NewMapper([]config.MappingList{{
ID: "dummy", Type: "corpus",
Mappings: []config.MappingRule{`x=y <> a=b`},
}})
require.NoError(t, err)
input := parseJSON(t, `{"fields": []}`)
result, err := m.CascadeResponseMappings(nil, nil, input)
require.NoError(t, err)
assert.Equal(t, input, result)
}
func TestCascadeResponseUnknownID(t *testing.T) {
m, err := NewMapper([]config.MappingList{{
ID: "known", Type: "corpus",
Mappings: []config.MappingRule{`x=y <> a=b`},
}})
require.NoError(t, err)
_, err = m.CascadeResponseMappings(
[]string{"nonexistent"},
[]MappingOptions{{Direction: AtoB}},
parseJSON(t, `{"fields": []}`),
)
assert.Error(t, err)
assert.Contains(t, err.Error(), "nonexistent")
}