blob: 8d3e691b70164e673674f5cf8c274d253cd371eb [file] [log] [blame]
package mapper
import (
"encoding/json"
"os"
"testing"
"github.com/KorAP/Koral-Mapper/ast"
"github.com/KorAP/Koral-Mapper/config"
"github.com/KorAP/Koral-Mapper/matcher"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
)
func TestMapper(t *testing.T) {
// Create test mapping list
mappingList := config.MappingList{
ID: "test-mapper",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "upos",
LayerB: "p",
Mappings: []config.MappingRule{
"[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
"[DET] <> [opennlp/p=DET]",
},
}
// Create a new mapper
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
tests := []struct {
name string
mappingID string
opts MappingOptions
input string
expected string
expectError bool
}{
{
name: "Simple A to B mapping",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and"
}
}`,
},
{
name: "Simple A to B mapping with rewrites",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: AtoB,
AddRewrites: true,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and",
"rewrites": [
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"original": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}
]
}
}`,
},
{
name: "Mapping with foundry override and rewrites",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: AtoB,
FoundryB: "custom",
AddRewrites: true,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "custom",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "custom",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and",
"rewrites": [
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"original": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}
]
}
}`,
},
{
name: "B to A direction",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: BtoA,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expectError: false,
},
{
name: "Invalid mapping ID",
mappingID: "nonexistent",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expectError: true,
},
{
name: "Invalid direction",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: Direction(false),
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expectError: false,
},
{
name: "Query with legacy rewrite field names",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Legacy rewrite with source instead of editor",
"source": "LegacyEditor",
"operation": "operation:legacy",
"origin": "LegacySource"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Legacy rewrite with source instead of editor",
"editor": "LegacyEditor",
"operation": "operation:legacy",
"src": "LegacySource"
}
],
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and"
}
}`,
},
{
name: "Query with mixed legacy and modern rewrite fields",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Modern rewrite",
"editor": "ModernEditor",
"operation": "operation:modern",
"original": {
"@type": "koral:term",
"foundry": "original",
"key": "original-key"
}
},
{
"@type": "koral:rewrite",
"_comment": "Legacy rewrite with precedence test",
"editor": "PreferredEditor",
"source": "IgnoredSource",
"operation": "operation:precedence",
"original": "PreferredOriginal",
"src": "IgnoredSrc",
"origin": "IgnoredOrigin"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Modern rewrite",
"editor": "ModernEditor",
"operation": "operation:modern",
"original": {
"@type": "koral:term",
"foundry": "original",
"key": "original-key"
}
},
{
"@type": "koral:rewrite",
"_comment": "Legacy rewrite with precedence test",
"editor": "PreferredEditor",
"operation": "operation:precedence",
"original": "PreferredOriginal"
}
],
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and"
}
}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Parse input JSON
var inputData any
err := json.Unmarshal([]byte(tt.input), &inputData)
require.NoError(t, err)
// Apply mappings
result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
if tt.expectError {
assert.Error(t, err)
return
}
require.NoError(t, err)
// Parse expected JSON
var expectedData any
err = json.Unmarshal([]byte(tt.expected), &expectedData)
require.NoError(t, err)
// Compare results
assert.Equal(t, expectedData, result)
})
}
}
func TestTokenToTermGroupWithRewrites(t *testing.T) {
// Create test mapping list specifically for token to termGroup test
mappingList := config.MappingList{
ID: "test-token-to-termgroup",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "tt",
LayerB: "pos",
Mappings: []config.MappingRule{
"[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
},
}
// Create a new mapper
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
input := `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "This rewrite should be preserved",
"editor": "TestEditor",
"operation": "operation:test",
"src": "TestSource"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`
expected := `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "This rewrite should be preserved",
"editor": "TestEditor",
"operation": "operation:test",
"src": "TestSource"
}
],
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and"
}
}`
// Parse input JSON
var inputData any
err = json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
// Apply mappings
result, err := m.ApplyQueryMappings("test-token-to-termgroup", MappingOptions{Direction: AtoB}, inputData)
require.NoError(t, err)
// Parse expected JSON
var expectedData any
err = json.Unmarshal([]byte(expected), &expectedData)
require.NoError(t, err)
// Compare results
assert.Equal(t, expectedData, result)
}
func TestMatchComplexPatterns(t *testing.T) {
tests := []struct {
name string
pattern ast.Pattern
replacement ast.Replacement
input ast.Node
expected ast.Node
}{
{
name: "Deep nested pattern with mixed operators",
pattern: ast.Pattern{
Root: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Key: "A",
Match: ast.MatchEqual,
},
&ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Key: "B",
Match: ast.MatchEqual,
},
&ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Key: "C",
Match: ast.MatchEqual,
},
&ast.Term{
Key: "D",
Match: ast.MatchEqual,
},
},
Relation: ast.AndRelation,
},
},
Relation: ast.OrRelation,
},
},
Relation: ast.AndRelation,
},
},
replacement: ast.Replacement{
Root: &ast.Term{
Key: "RESULT",
Match: ast.MatchEqual,
},
},
input: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Key: "A",
Match: ast.MatchEqual,
},
&ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Key: "C",
Match: ast.MatchEqual,
},
&ast.Term{
Key: "D",
Match: ast.MatchEqual,
},
},
Relation: ast.AndRelation,
},
},
Relation: ast.AndRelation,
},
expected: &ast.Term{
Key: "RESULT",
Match: ast.MatchEqual,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m, err := matcher.NewMatcher(tt.pattern, tt.replacement)
require.NoError(t, err)
result := m.Replace(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestInvalidPatternReplacement(t *testing.T) {
// Create test mapping list
mappingList := config.MappingList{
ID: "test-mapper",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "upos",
LayerB: "p",
Mappings: []config.MappingRule{
"[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
},
}
// Create a new mapper
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
tests := []struct {
name string
input string
expectError bool
errorMsg string
}{
{
name: "Invalid input - empty term group",
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [],
"relation": "relation:and"
}
}`,
expectError: true,
errorMsg: "failed to parse JSON into AST: error parsing wrapped node: term group must have at least one operand",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var inputData any
err := json.Unmarshal([]byte(tt.input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
if tt.expectError {
assert.Error(t, err)
assert.Equal(t, tt.errorMsg, err.Error())
assert.Nil(t, result)
} else {
assert.NoError(t, err)
assert.NotNil(t, result)
}
})
}
}
func TestMultiFieldRewritesAreReversible(t *testing.T) {
mappingList := config.MappingList{
ID: "multi-field",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "upos",
LayerB: "pos",
Mappings: []config.MappingRule{
"[DET] <> [PRON]",
},
}
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
tests := []struct {
name string
opts MappingOptions
input string
expected string
}{
{
name: "Multi-field change: single rewrite with full original",
opts: MappingOptions{
Direction: AtoB,
AddRewrites: true,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "DET",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "PRON",
"layer": "pos",
"match": "match:eq",
"rewrites": [
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"original": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "DET",
"layer": "p",
"match": "match:eq"
}
}
]
}
}`,
},
{
name: "Reverse direction: single rewrite with full original",
opts: MappingOptions{
Direction: BtoA,
AddRewrites: true,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "PRON",
"layer": "pos",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "DET",
"layer": "p",
"match": "match:eq",
"rewrites": [
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"original": {
"@type": "koral:term",
"foundry": "upos",
"key": "PRON",
"layer": "pos",
"match": "match:eq"
}
}
]
}
}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var inputData any
err := json.Unmarshal([]byte(tt.input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("multi-field", tt.opts, inputData)
require.NoError(t, err)
var expectedData any
err = json.Unmarshal([]byte(tt.expected), &expectedData)
require.NoError(t, err)
assert.Equal(t, expectedData, result)
})
}
}
func TestSingleFieldRewrite(t *testing.T) {
mappingList := config.MappingList{
ID: "same-fl",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "opennlp",
LayerB: "pos",
Mappings: []config.MappingRule{
"[DET] <> [PRON]",
},
}
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
var inputData any
err = json.Unmarshal([]byte(`{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "DET",
"layer": "p",
"match": "match:eq"
}
}`), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("same-fl", MappingOptions{
Direction: AtoB,
AddRewrites: true,
}, inputData)
require.NoError(t, err)
var expectedData any
err = json.Unmarshal([]byte(`{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PRON",
"layer": "pos",
"match": "match:eq",
"rewrites": [
{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
"original": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "DET",
"layer": "p",
"match": "match:eq"
}
}
]
}
}`), &expectedData)
require.NoError(t, err)
assert.Equal(t, expectedData, result)
}
func TestBuildRewritesSingleObjectRewrite(t *testing.T) {
tests := []struct {
name string
original *ast.Term
new_ *ast.Term
}{
{
name: "All fields change",
original: &ast.Term{Foundry: "a", Layer: "l1", Key: "k1", Value: "v1", Match: ast.MatchEqual},
new_: &ast.Term{Foundry: "b", Layer: "l2", Key: "k2", Value: "v2", Match: ast.MatchEqual},
},
{
name: "Single field injection: empty value becomes non-empty",
original: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Match: ast.MatchEqual},
new_: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Value: "v", Match: ast.MatchEqual},
},
{
name: "Single field deletion: non-empty value becomes empty",
original: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Value: "v", Match: ast.MatchEqual},
new_: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Match: ast.MatchEqual},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rewrites := buildRewrites(tt.original, tt.new_)
require.Len(t, rewrites, 1, "one rule application should produce exactly one rewrite")
rw := rewrites[0]
assert.Equal(t, RewriteEditor, rw.Editor)
assert.Empty(t, rw.Scope, "object-level rewrite should have no scope")
assert.NotNil(t, rw.Original, "rewrite should contain the full original")
originalMap, ok := rw.Original.(map[string]any)
require.True(t, ok)
assert.Equal(t, "koral:term", originalMap["@type"])
})
}
}
func TestQueryWrapperMappings(t *testing.T) {
mappingList := config.MappingList{
ID: "test-wrapper",
FoundryA: "opennlp",
LayerA: "orth",
FoundryB: "upos",
LayerB: "orth",
Mappings: []config.MappingRule{
"[opennlp/orth=Baum] <> [opennlp/orth=X]",
},
}
// Create a new mapper
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
tests := []struct {
name string
mappingID string
opts MappingOptions
input string
expected string
expectError bool
}{
{
name: "Query wrapper case with rewrites preservation",
mappingID: "test-wrapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"collection": {
"@type": "koral:doc",
"key": "availability",
"match": "match:eq",
"type": "type:regex",
"value": "CC.*"
},
"query": {
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Original rewrite that should be preserved",
"editor": "Original",
"operation": "operation:original",
"src": "Original"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "Baum",
"layer": "orth",
"match": "match:eq"
}
}
}`,
expected: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"collection": {
"@type": "koral:doc",
"key": "availability",
"match": "match:eq",
"type": "type:regex",
"value": "CC.*"
},
"query": {
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Original rewrite that should be preserved",
"editor": "Original",
"operation": "operation:original",
"src": "Original"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "X",
"layer": "orth",
"match": "match:eq"
}
}
}`,
},
{
name: "Empty query field",
mappingID: "test-wrapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"query": null
}`,
expected: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"query": null
}`,
},
{
name: "Missing query field",
mappingID: "test-wrapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"collection": {
"@type": "koral:doc"
}
}`,
expected: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"collection": {
"@type": "koral:doc"
}
}`,
},
{
name: "Query field with non-object value",
mappingID: "test-wrapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"query": "invalid"
}`,
expected: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"query": "invalid"
}`,
},
{
name: "Query with rewrites in nested token",
mappingID: "test-wrapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Nested rewrite that should be preserved",
"editor": "Nested",
"operation": "operation:nested",
"src": "Nested"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "Baum",
"layer": "orth",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Nested rewrite that should be preserved",
"editor": "Nested",
"operation": "operation:nested",
"src": "Nested"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "X",
"layer": "orth",
"match": "match:eq"
}
}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Parse input JSON
var inputData any
err := json.Unmarshal([]byte(tt.input), &inputData)
require.NoError(t, err)
// Apply mappings
result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
if tt.expectError {
assert.Error(t, err)
return
}
require.NoError(t, err)
// Parse expected JSON
var expectedData any
err = json.Unmarshal([]byte(tt.expected), &expectedData)
require.NoError(t, err)
// Compare results
assert.Equal(t, expectedData, result)
})
}
}
func TestIdenticalEffectiveFoundryLayerRejected(t *testing.T) {
tests := []struct {
name string
list config.MappingList
opts MappingOptions
wantErr string
}{
{
name: "YAML defaults identical",
list: config.MappingList{
ID: "test", FoundryA: "opennlp", LayerA: "p",
FoundryB: "opennlp", LayerB: "p",
Mappings: []config.MappingRule{"[A] <> [B]"},
},
opts: MappingOptions{Direction: AtoB},
wantErr: "identical source and target",
},
{
name: "Query param override makes them identical",
list: config.MappingList{
ID: "test", FoundryA: "opennlp", LayerA: "p",
FoundryB: "upos", LayerB: "p",
Mappings: []config.MappingRule{"[A] <> [B]"},
},
opts: MappingOptions{Direction: AtoB, FoundryB: "opennlp"},
wantErr: "identical source and target",
},
{
name: "Query param override resolves the conflict",
list: config.MappingList{
ID: "test", FoundryA: "opennlp", LayerA: "p",
FoundryB: "opennlp", LayerB: "p",
Mappings: []config.MappingRule{"[A] <> [B]"},
},
opts: MappingOptions{Direction: AtoB, FoundryB: "upos"},
wantErr: "",
},
{
name: "Different foundry same layer is allowed",
list: config.MappingList{
ID: "test", FoundryA: "opennlp", LayerA: "p",
FoundryB: "upos", LayerB: "p",
Mappings: []config.MappingRule{"[A] <> [B]"},
},
opts: MappingOptions{Direction: AtoB},
wantErr: "",
},
{
name: "Both foundries empty is allowed",
list: config.MappingList{
ID: "test",
Mappings: []config.MappingRule{"[A] <> [B]"},
},
opts: MappingOptions{Direction: AtoB},
wantErr: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m, err := NewMapper([]config.MappingList{tt.list})
require.NoError(t, err)
input := map[string]any{
"@type": "koral:token",
"wrap": map[string]any{
"@type": "koral:term",
"key": "A",
},
}
_, err = m.ApplyQueryMappings("test", tt.opts, input)
if tt.wantErr != "" {
require.Error(t, err)
assert.Contains(t, err.Error(), tt.wantErr)
} else {
assert.NoError(t, err)
}
})
}
}
func TestIdenticalEffectiveFieldRejected(t *testing.T) {
tests := []struct {
name string
list config.MappingList
opts MappingOptions
wantErr string
}{
{
name: "YAML defaults identical",
list: config.MappingList{
ID: "test", Type: "corpus",
FieldA: "textClass", FieldB: "textClass",
Mappings: []config.MappingRule{"novel <> fiction"},
},
opts: MappingOptions{Direction: AtoB},
wantErr: "identical source and target field",
},
{
name: "Query param override makes them identical",
list: config.MappingList{
ID: "test", Type: "corpus",
FieldA: "textClass", FieldB: "genre",
Mappings: []config.MappingRule{"novel <> fiction"},
},
opts: MappingOptions{Direction: AtoB, FieldB: "textClass"},
wantErr: "identical source and target field",
},
{
name: "Query param override resolves the conflict",
list: config.MappingList{
ID: "test", Type: "corpus",
FieldA: "textClass", FieldB: "textClass",
Mappings: []config.MappingRule{"novel <> fiction"},
},
opts: MappingOptions{Direction: AtoB, FieldB: "genre"},
wantErr: "",
},
{
name: "Different fields is allowed",
list: config.MappingList{
ID: "test", Type: "corpus",
FieldA: "textClass", FieldB: "genre",
Mappings: []config.MappingRule{"novel <> fiction"},
},
opts: MappingOptions{Direction: AtoB},
wantErr: "",
},
{
name: "Both fields empty is allowed",
list: config.MappingList{
ID: "test", Type: "corpus",
Mappings: []config.MappingRule{"textClass=novel <> genre=fiction"},
},
opts: MappingOptions{Direction: AtoB},
wantErr: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m, err := NewMapper([]config.MappingList{tt.list})
require.NoError(t, err)
input := map[string]any{
"collection": map[string]any{
"@type": "koral:doc",
"key": "textClass",
"value": "novel",
"match": "match:eq",
},
}
_, err = m.ApplyQueryMappings("test", tt.opts, input)
if tt.wantErr != "" {
require.Error(t, err)
assert.Contains(t, err.Error(), tt.wantErr)
} else {
assert.NoError(t, err)
}
})
}
}
func TestIdenticalEffectiveValuesResponseEndpoint(t *testing.T) {
t.Run("annotation response rejects identical effective foundry/layer", func(t *testing.T) {
m, err := NewMapper([]config.MappingList{{
ID: "test", FoundryA: "marmot", LayerA: "p",
FoundryB: "marmot", LayerB: "p",
Mappings: []config.MappingRule{"[DET] <> [PRON]"},
}})
require.NoError(t, err)
input := map[string]any{
"snippet": `<span title="marmot/p:DET">Der</span>`,
}
_, err = m.ApplyResponseMappings("test", MappingOptions{Direction: AtoB}, input)
require.Error(t, err)
assert.Contains(t, err.Error(), "identical source and target")
})
t.Run("corpus response rejects identical effective field", func(t *testing.T) {
m, err := NewMapper([]config.MappingList{{
ID: "test", Type: "corpus",
FieldA: "textClass", FieldB: "textClass",
Mappings: []config.MappingRule{"novel <> fiction"},
}})
require.NoError(t, err)
input := map[string]any{
"fields": []any{
map[string]any{
"@type": "koral:field",
"key": "textClass",
"value": "novel",
"type": "type:string",
},
},
}
_, err = m.ApplyResponseMappings("test", MappingOptions{Direction: AtoB}, input)
require.Error(t, err)
assert.Contains(t, err.Error(), "identical source and target field")
})
}
func newSTTSUPoSMapper(t *testing.T) *Mapper {
t.Helper()
data, err := os.ReadFile("../mappings/stts-upos.yaml")
require.NoError(t, err, "failed to read stts-upos.yaml from disk")
var mappingList config.MappingList
err = yaml.Unmarshal(data, &mappingList)
require.NoError(t, err, "failed to parse stts-upos.yaml")
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
return m
}
func TestFallbackRules(t *testing.T) {
m := newSTTSUPoSMapper(t)
t.Run("Bare ADJ (BtoA) maps to ADJA|ADJD disjunction", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "ADJ",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:termGroup", wrap["@type"])
assert.Equal(t, "relation:or", wrap["relation"])
operands := wrap["operands"].([]any)
assert.Len(t, operands, 2)
keys := []string{
operands[0].(map[string]any)["key"].(string),
operands[1].(map[string]any)["key"].(string),
}
assert.Contains(t, keys, "ADJA")
assert.Contains(t, keys, "ADJD")
})
t.Run("ADJ & Variant=Short (BtoA) maps to ADJD only", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "upos",
"key": "ADJ",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "upos",
"key": "Short",
"layer": "Variant",
"match": "match:eq"
}
],
"relation": "relation:and"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:term", wrap["@type"])
assert.Equal(t, "ADJD", wrap["key"])
})
t.Run("Bare DET (BtoA) maps to DET subtypes disjunction", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "DET",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:termGroup", wrap["@type"])
assert.Equal(t, "relation:or", wrap["relation"])
operands := wrap["operands"].([]any)
assert.Len(t, operands, 7)
var keys []string
for _, op := range operands {
keys = append(keys, op.(map[string]any)["key"].(string))
}
assert.Contains(t, keys, "ART")
assert.Contains(t, keys, "PDAT")
assert.Contains(t, keys, "PWAT")
})
t.Run("DET & PronType=Art (BtoA) maps to ART only", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "upos",
"key": "DET",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "upos",
"key": "Art",
"layer": "PronType",
"match": "match:eq"
}
],
"relation": "relation:and"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:term", wrap["@type"])
assert.Equal(t, "ART", wrap["key"])
})
t.Run("Bare SCONJ (BtoA) maps to KOUI|KOUS disjunction", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "SCONJ",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:termGroup", wrap["@type"])
assert.Equal(t, "relation:or", wrap["relation"])
operands := wrap["operands"].([]any)
assert.Len(t, operands, 2)
})
t.Run("Bare VERB (BtoA) maps to STTS verb subtypes disjunction", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "VERB",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:termGroup", wrap["@type"])
assert.Equal(t, "relation:or", wrap["relation"])
operands := wrap["operands"].([]any)
assert.Len(t, operands, 8)
})
t.Run("Bare AUX (BtoA) maps to AUX subtypes disjunction", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "AUX",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:termGroup", wrap["@type"])
assert.Equal(t, "relation:or", wrap["relation"])
operands := wrap["operands"].([]any)
assert.Len(t, operands, 4)
})
t.Run("Forward direction AtoB: ADJA maps to ADJ", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "ADJA",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: AtoB}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:term", wrap["@type"])
assert.Equal(t, "ADJ", wrap["key"])
})
t.Run("Forward direction AtoB: ART maps to DET & PronType=Art", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "ART",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: AtoB}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:termGroup", wrap["@type"])
assert.Equal(t, "relation:and", wrap["relation"])
})
}
func TestOriginalProblemMultiTokenQuery(t *testing.T) {
m := newSTTSUPoSMapper(t)
t.Run("Multi-token [DET][ADJ][NOUN] BtoA produces correct disjunctions", func(t *testing.T) {
// This reproduces the exact problem from the issue:
// [upos/p=DET][upos/p=ADJ][upos/p=NOUN] mapped B->A
input := `{
"@type": "koral:group",
"operation": "operation:sequence",
"operands": [
{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "DET",
"layer": "p",
"match": "match:eq"
}
},
{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "ADJ",
"layer": "p",
"match": "match:eq"
}
},
{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "NOUN",
"layer": "p",
"match": "match:eq"
}
}
]
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
operands := resultMap["operands"].([]any)
require.Len(t, operands, 3)
// Token 1: DET -> ART | PDAT | PIAT | PIDAT | PPOSAT | PRELAT | PWAT
token1 := operands[0].(map[string]any)
wrap1 := token1["wrap"].(map[string]any)
assert.Equal(t, "koral:termGroup", wrap1["@type"], "DET should be mapped to OR group")
assert.Equal(t, "relation:or", wrap1["relation"])
ops1 := wrap1["operands"].([]any)
assert.Len(t, ops1, 7, "DET fallback should have 7 alternatives")
// Token 2: ADJ -> ADJA | ADJD
token2 := operands[1].(map[string]any)
wrap2 := token2["wrap"].(map[string]any)
assert.Equal(t, "koral:termGroup", wrap2["@type"], "ADJ should be mapped to OR group")
assert.Equal(t, "relation:or", wrap2["relation"])
ops2 := wrap2["operands"].([]any)
assert.Len(t, ops2, 2, "ADJ fallback should have 2 alternatives")
adjKeys := []string{
ops2[0].(map[string]any)["key"].(string),
ops2[1].(map[string]any)["key"].(string),
}
assert.Contains(t, adjKeys, "ADJA")
assert.Contains(t, adjKeys, "ADJD")
// Token 3: NOUN -> NN (specific rule, not fallback, because
// [NN] <> [NOUN] has specificity 1 and [NN | NE] <> [NOUN | PROPN]
// has pattern specificity 0 on B-side (OR group))
token3 := operands[2].(map[string]any)
wrap3 := token3["wrap"].(map[string]any)
assert.Equal(t, "koral:term", wrap3["@type"], "NOUN should map to single NN term")
assert.Equal(t, "NN", wrap3["key"])
})
t.Run("Specific input [ADJ & Variant=Short] maps to ADJD only", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "upos",
"key": "ADJ",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "upos",
"key": "Short",
"layer": "Variant",
"match": "match:eq"
}
],
"relation": "relation:and"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:term", wrap["@type"])
assert.Equal(t, "ADJD", wrap["key"])
})
t.Run("Specific input [DET & PronType=Art] maps to ART only", func(t *testing.T) {
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "upos",
"key": "DET",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "upos",
"key": "Art",
"layer": "PronType",
"match": "match:eq"
}
],
"relation": "relation:and"
}
}`
var inputData any
err := json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("stts-upos", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:term", wrap["@type"])
assert.Equal(t, "ART", wrap["key"])
})
}
func TestSpecificityBasedRuleSelection(t *testing.T) {
t.Run("More specific rule wins over less specific", func(t *testing.T) {
mappingList := config.MappingList{
ID: "spec-test",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "upos",
LayerB: "p",
Mappings: []config.MappingRule{
"[ADJA] <> [ADJ]",
"[ADJD] <> [ADJ & Variant=Short]",
},
}
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
// Input: ADJ & Variant=Short — matches the internal representation
// where "Variant=Short" is parsed as layer="Variant", key="Short"
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "upos",
"key": "ADJ",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "upos",
"key": "Short",
"layer": "Variant",
"match": "match:eq"
}
],
"relation": "relation:and"
}
}`
var inputData any
err = json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("spec-test", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:term", wrap["@type"])
assert.Equal(t, "ADJD", wrap["key"])
})
t.Run("Same specificity - first rule in file order wins", func(t *testing.T) {
mappingList := config.MappingList{
ID: "tie-test",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "upos",
LayerB: "p",
Mappings: []config.MappingRule{
"[KOUI] <> [SCONJ]",
"[KOUS] <> [SCONJ]",
},
}
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "SCONJ",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err = json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("tie-test", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "KOUI", wrap["key"])
})
t.Run("Single matching rule - identical to first-match-wins", func(t *testing.T) {
mappingList := config.MappingList{
ID: "single-test",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "upos",
LayerB: "p",
Mappings: []config.MappingRule{
"[NN] <> [NOUN]",
},
}
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "NOUN",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err = json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("single-test", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "NN", wrap["key"])
})
t.Run("No matching rule - node passes through unchanged", func(t *testing.T) {
mappingList := config.MappingList{
ID: "nomatch-test",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "upos",
LayerB: "p",
Mappings: []config.MappingRule{
"[NN] <> [NOUN]",
},
}
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "VERB",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err = json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("nomatch-test", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "VERB", wrap["key"])
})
t.Run("Fallback OR-disjunction rule loses to specific rule", func(t *testing.T) {
mappingList := config.MappingList{
ID: "fallback-test",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "upos",
LayerB: "p",
Mappings: []config.MappingRule{
"[ADJA] <> [ADJ]",
"[ADJA | ADJD] <> [ADJ]",
},
}
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
input := `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "upos",
"key": "ADJ",
"layer": "p",
"match": "match:eq"
}
}`
var inputData any
err = json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("fallback-test", MappingOptions{Direction: BtoA}, inputData)
require.NoError(t, err)
// Both rules match with pattern specificity 1 on B-side.
// Rule 1 replacement specificity = 1 (Term), Rule 2 replacement specificity = 0 (OR group).
// Lower replacement specificity wins (broader/fallback output) => rule 2 wins.
resultMap := result.(map[string]any)
wrap := resultMap["wrap"].(map[string]any)
assert.Equal(t, "koral:termGroup", wrap["@type"])
assert.Equal(t, "relation:or", wrap["relation"])
})
}