blob: c561663d59b6e2cf879fbf86b58fc63febf001a8 [file] [log] [blame]
package mapper
import (
"encoding/json"
"testing"
"github.com/KorAP/Koral-Mapper/ast"
"github.com/KorAP/Koral-Mapper/config"
"github.com/KorAP/Koral-Mapper/matcher"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMapper(t *testing.T) {
// Create test mapping list
mappingList := config.MappingList{
ID: "test-mapper",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "upos",
LayerB: "p",
Mappings: []config.MappingRule{
"[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
"[DET] <> [opennlp/p=DET]",
},
}
// Create a new mapper
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
tests := []struct {
name string
mappingID string
opts MappingOptions
input string
expected string
expectError bool
}{
{
name: "Simple A to B mapping",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and"
}
}`,
},
{
name: "Simple A to B mapping with rewrites",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: AtoB,
AddRewrites: true,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and",
"rewrites": [
{
"@type": "koral:rewrite",
"editor": "termMapper",
"original": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}
]
}
}`,
},
{
name: "Mapping with foundry override and rewrites",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: AtoB,
FoundryB: "custom",
AddRewrites: true,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "custom",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "custom",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and",
"rewrites": [
{
"@type": "koral:rewrite",
"editor": "termMapper",
"original": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}
]
}
}`,
},
{
name: "B to A direction",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: BtoA,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expectError: false,
},
{
name: "Invalid mapping ID",
mappingID: "nonexistent",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expectError: true,
},
{
name: "Invalid direction",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: Direction(false),
},
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expectError: false,
},
{
name: "Query with legacy rewrite field names",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Legacy rewrite with source instead of editor",
"source": "LegacyEditor",
"operation": "operation:legacy",
"origin": "LegacySource"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Legacy rewrite with source instead of editor",
"editor": "LegacyEditor",
"operation": "operation:legacy",
"src": "LegacySource"
}
],
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and"
}
}`,
},
{
name: "Query with mixed legacy and modern rewrite fields",
mappingID: "test-mapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Modern rewrite",
"editor": "ModernEditor",
"operation": "operation:modern",
"original": {
"@type": "koral:term",
"foundry": "original",
"key": "original-key"
}
},
{
"@type": "koral:rewrite",
"_comment": "Legacy rewrite with precedence test",
"editor": "PreferredEditor",
"source": "IgnoredSource",
"operation": "operation:precedence",
"original": "PreferredOriginal",
"src": "IgnoredSrc",
"origin": "IgnoredOrigin"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Modern rewrite",
"editor": "ModernEditor",
"operation": "operation:modern",
"original": {
"@type": "koral:term",
"foundry": "original",
"key": "original-key"
}
},
{
"@type": "koral:rewrite",
"_comment": "Legacy rewrite with precedence test",
"editor": "PreferredEditor",
"operation": "operation:precedence",
"original": "PreferredOriginal"
}
],
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and"
}
}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Parse input JSON
var inputData any
err := json.Unmarshal([]byte(tt.input), &inputData)
require.NoError(t, err)
// Apply mappings
result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
if tt.expectError {
assert.Error(t, err)
return
}
require.NoError(t, err)
// Parse expected JSON
var expectedData any
err = json.Unmarshal([]byte(tt.expected), &expectedData)
require.NoError(t, err)
// Compare results
assert.Equal(t, expectedData, result)
})
}
}
func TestTokenToTermGroupWithRewrites(t *testing.T) {
// Create test mapping list specifically for token to termGroup test
mappingList := config.MappingList{
ID: "test-token-to-termgroup",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "opennlp", // Keep the same foundry for both sides
LayerB: "p",
Mappings: []config.MappingRule{
"[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
},
}
// Create a new mapper
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
input := `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "This rewrite should be preserved",
"editor": "TestEditor",
"operation": "operation:test",
"src": "TestSource"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
}
}`
expected := `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "This rewrite should be preserved",
"editor": "TestEditor",
"operation": "operation:test",
"src": "TestSource"
}
],
"wrap": {
"@type": "koral:termGroup",
"operands": [
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "PIDAT",
"layer": "p",
"match": "match:eq"
},
{
"@type": "koral:term",
"foundry": "opennlp",
"key": "AdjType",
"layer": "p",
"match": "match:eq",
"value": "Pdt"
}
],
"relation": "relation:and"
}
}`
// Parse input JSON
var inputData any
err = json.Unmarshal([]byte(input), &inputData)
require.NoError(t, err)
// Apply mappings
result, err := m.ApplyQueryMappings("test-token-to-termgroup", MappingOptions{Direction: AtoB}, inputData)
require.NoError(t, err)
// Parse expected JSON
var expectedData any
err = json.Unmarshal([]byte(expected), &expectedData)
require.NoError(t, err)
// Compare results
assert.Equal(t, expectedData, result)
}
func TestMatchComplexPatterns(t *testing.T) {
tests := []struct {
name string
pattern ast.Pattern
replacement ast.Replacement
input ast.Node
expected ast.Node
}{
{
name: "Deep nested pattern with mixed operators",
pattern: ast.Pattern{
Root: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Key: "A",
Match: ast.MatchEqual,
},
&ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Key: "B",
Match: ast.MatchEqual,
},
&ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Key: "C",
Match: ast.MatchEqual,
},
&ast.Term{
Key: "D",
Match: ast.MatchEqual,
},
},
Relation: ast.AndRelation,
},
},
Relation: ast.OrRelation,
},
},
Relation: ast.AndRelation,
},
},
replacement: ast.Replacement{
Root: &ast.Term{
Key: "RESULT",
Match: ast.MatchEqual,
},
},
input: &ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Key: "A",
Match: ast.MatchEqual,
},
&ast.TermGroup{
Operands: []ast.Node{
&ast.Term{
Key: "C",
Match: ast.MatchEqual,
},
&ast.Term{
Key: "D",
Match: ast.MatchEqual,
},
},
Relation: ast.AndRelation,
},
},
Relation: ast.AndRelation,
},
expected: &ast.Term{
Key: "RESULT",
Match: ast.MatchEqual,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m, err := matcher.NewMatcher(tt.pattern, tt.replacement)
require.NoError(t, err)
result := m.Replace(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestInvalidPatternReplacement(t *testing.T) {
// Create test mapping list
mappingList := config.MappingList{
ID: "test-mapper",
FoundryA: "opennlp",
LayerA: "p",
FoundryB: "upos",
LayerB: "p",
Mappings: []config.MappingRule{
"[PIDAT] <> [opennlp/p=PIDAT & opennlp/p=AdjType:Pdt]",
},
}
// Create a new mapper
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
tests := []struct {
name string
input string
expectError bool
errorMsg string
}{
{
name: "Invalid input - empty term group",
input: `{
"@type": "koral:token",
"wrap": {
"@type": "koral:termGroup",
"operands": [],
"relation": "relation:and"
}
}`,
expectError: true,
errorMsg: "failed to parse JSON into AST: error parsing wrapped node: term group must have at least one operand",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var inputData any
err := json.Unmarshal([]byte(tt.input), &inputData)
require.NoError(t, err)
result, err := m.ApplyQueryMappings("test-mapper", MappingOptions{Direction: AtoB}, inputData)
if tt.expectError {
assert.Error(t, err)
assert.Equal(t, tt.errorMsg, err.Error())
assert.Nil(t, result)
} else {
assert.NoError(t, err)
assert.NotNil(t, result)
}
})
}
}
func TestQueryWrapperMappings(t *testing.T) {
mappingList := config.MappingList{
ID: "test-wrapper",
FoundryA: "opennlp",
LayerA: "orth",
FoundryB: "upos",
LayerB: "orth",
Mappings: []config.MappingRule{
"[opennlp/orth=Baum] <> [opennlp/orth=X]",
},
}
// Create a new mapper
m, err := NewMapper([]config.MappingList{mappingList})
require.NoError(t, err)
tests := []struct {
name string
mappingID string
opts MappingOptions
input string
expected string
expectError bool
}{
{
name: "Query wrapper case with rewrites preservation",
mappingID: "test-wrapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"collection": {
"@type": "koral:doc",
"key": "availability",
"match": "match:eq",
"type": "type:regex",
"value": "CC.*"
},
"query": {
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Original rewrite that should be preserved",
"editor": "Original",
"operation": "operation:original",
"src": "Original"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "Baum",
"layer": "orth",
"match": "match:eq"
}
}
}`,
expected: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"collection": {
"@type": "koral:doc",
"key": "availability",
"match": "match:eq",
"type": "type:regex",
"value": "CC.*"
},
"query": {
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Original rewrite that should be preserved",
"editor": "Original",
"operation": "operation:original",
"src": "Original"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "X",
"layer": "orth",
"match": "match:eq"
}
}
}`,
},
{
name: "Empty query field",
mappingID: "test-wrapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"query": null
}`,
expected: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"query": null
}`,
},
{
name: "Missing query field",
mappingID: "test-wrapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"collection": {
"@type": "koral:doc"
}
}`,
expected: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"collection": {
"@type": "koral:doc"
}
}`,
},
{
name: "Query field with non-object value",
mappingID: "test-wrapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"query": "invalid"
}`,
expected: `{
"@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
"query": "invalid"
}`,
},
{
name: "Query with rewrites in nested token",
mappingID: "test-wrapper",
opts: MappingOptions{
Direction: AtoB,
},
input: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Nested rewrite that should be preserved",
"editor": "Nested",
"operation": "operation:nested",
"src": "Nested"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "Baum",
"layer": "orth",
"match": "match:eq"
}
}`,
expected: `{
"@type": "koral:token",
"rewrites": [
{
"@type": "koral:rewrite",
"_comment": "Nested rewrite that should be preserved",
"editor": "Nested",
"operation": "operation:nested",
"src": "Nested"
}
],
"wrap": {
"@type": "koral:term",
"foundry": "opennlp",
"key": "X",
"layer": "orth",
"match": "match:eq"
}
}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Parse input JSON
var inputData any
err := json.Unmarshal([]byte(tt.input), &inputData)
require.NoError(t, err)
// Apply mappings
result, err := m.ApplyQueryMappings(tt.mappingID, tt.opts, inputData)
if tt.expectError {
assert.Error(t, err)
return
}
require.NoError(t, err)
// Parse expected JSON
var expectedData any
err = json.Unmarshal([]byte(tt.expected), &expectedData)
require.NoError(t, err)
// Compare results
assert.Equal(t, expectedData, result)
})
}
}