Unify corpus and annotation rewrite entries
Change-Id: Ie2e91e76bb46a2b8b7db7b15b00b09a0188f7a59
diff --git a/ast/ast.go b/ast/ast.go
index 918a8cd..b944824 100644
--- a/ast/ast.go
+++ b/ast/ast.go
@@ -101,8 +101,10 @@
}
}
-// MarshalJSON implements custom JSON marshaling to ensure clean output
-func (r *Rewrite) MarshalJSON() ([]byte, error) {
+// MarshalJSON implements custom JSON marshaling to ensure clean output.
+// Uses a value receiver so both json.Marshal(rw) and json.Marshal(&rw)
+// produce identical output including the @type field.
+func (r Rewrite) MarshalJSON() ([]byte, error) {
// Create a map with only the modern field names
result := make(map[string]any)
@@ -131,6 +133,34 @@
return json.Marshal(result)
}
+// ToMap converts the Rewrite to a map[string]any suitable for direct
+// injection into map-based JSON structures. The output is identical to
+// what MarshalJSON produces.
+func (r *Rewrite) ToMap() map[string]any {
+ result := map[string]any{
+ "@type": "koral:rewrite",
+ }
+ if r.Editor != "" {
+ result["editor"] = r.Editor
+ }
+ if r.Operation != "" {
+ result["operation"] = r.Operation
+ }
+ if r.Scope != "" {
+ result["scope"] = r.Scope
+ }
+ if r.Src != "" {
+ result["src"] = r.Src
+ }
+ if r.Comment != "" {
+ result["_comment"] = r.Comment
+ }
+ if r.Original != nil {
+ result["original"] = r.Original
+ }
+ return result
+}
+
// Token represents a koral:token
type Token struct {
Wrap Node `json:"wrap"`
diff --git a/ast/rewrite_test.go b/ast/rewrite_test.go
index 4fe0ffb..6aa524d 100644
--- a/ast/rewrite_test.go
+++ b/ast/rewrite_test.go
@@ -386,6 +386,7 @@
err = json.Unmarshal(data, &result)
require.NoError(t, err)
+ assert.Equal(t, "koral:rewrite", result["@type"])
assert.Equal(t, "termMapper", result["editor"])
assert.Equal(t, "operation:mapping", result["operation"])
assert.Equal(t, "foundry", result["scope"])
@@ -397,3 +398,90 @@
assert.NotContains(t, result, "source")
assert.NotContains(t, result, "origin")
}
+
+func TestRewriteMarshalJSONValueAndPointerConsistent(t *testing.T) {
+ rw := Rewrite{
+ Editor: "Koral-Mapper",
+ Scope: "key",
+ Original: "textClass",
+ }
+
+ valueBytes, err := json.Marshal(rw)
+ require.NoError(t, err)
+
+ pointerBytes, err := json.Marshal(&rw)
+ require.NoError(t, err)
+
+ assert.JSONEq(t, string(pointerBytes), string(valueBytes))
+}
+
+func TestRewriteToMap(t *testing.T) {
+ t.Run("All fields set", func(t *testing.T) {
+ rw := Rewrite{
+ Editor: "termMapper",
+ Operation: "operation:mapping",
+ Scope: "foundry",
+ Src: "source-value",
+ Comment: "Test comment",
+ Original: "original-value",
+ }
+
+ m := rw.ToMap()
+ assert.Equal(t, "koral:rewrite", m["@type"])
+ assert.Equal(t, "termMapper", m["editor"])
+ assert.Equal(t, "operation:mapping", m["operation"])
+ assert.Equal(t, "foundry", m["scope"])
+ assert.Equal(t, "source-value", m["src"])
+ assert.Equal(t, "Test comment", m["_comment"])
+ assert.Equal(t, "original-value", m["original"])
+ })
+
+ t.Run("Only editor and scope", func(t *testing.T) {
+ rw := Rewrite{
+ Editor: "Koral-Mapper",
+ Scope: "key",
+ }
+
+ m := rw.ToMap()
+ assert.Equal(t, "koral:rewrite", m["@type"])
+ assert.Equal(t, "Koral-Mapper", m["editor"])
+ assert.Equal(t, "key", m["scope"])
+ assert.NotContains(t, m, "operation")
+ assert.NotContains(t, m, "src")
+ assert.NotContains(t, m, "_comment")
+ assert.NotContains(t, m, "original")
+ })
+
+ t.Run("With complex original", func(t *testing.T) {
+ original := map[string]any{
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ }
+ rw := Rewrite{
+ Editor: "Koral-Mapper",
+ Original: original,
+ }
+
+ m := rw.ToMap()
+ assert.Equal(t, "koral:rewrite", m["@type"])
+ assert.Equal(t, "Koral-Mapper", m["editor"])
+ assert.Equal(t, original, m["original"])
+ })
+
+ t.Run("Matches MarshalJSON output", func(t *testing.T) {
+ rw := Rewrite{
+ Editor: "Koral-Mapper",
+ Scope: "key",
+ Original: "textClass",
+ }
+
+ toMapResult := rw.ToMap()
+
+ data, err := json.Marshal(&rw)
+ require.NoError(t, err)
+ var fromJSON map[string]any
+ require.NoError(t, json.Unmarshal(data, &fromJSON))
+
+ assert.Equal(t, fromJSON, toMapResult)
+ })
+}
diff --git a/mapper/corpus.go b/mapper/corpus.go
index 18759d6..1a5db5b 100644
--- a/mapper/corpus.go
+++ b/mapper/corpus.go
@@ -5,6 +5,7 @@
"regexp"
"slices"
+ "github.com/KorAP/Koral-Mapper/ast"
"github.com/KorAP/Koral-Mapper/parser"
)
@@ -362,25 +363,23 @@
origAtType, _ := original["@type"].(string)
- // If the original was a group, store the whole structure as the rewrite original
+ var rw ast.Rewrite
+
if origAtType == "koral:docGroup" || origAtType == "koral:fieldGroup" {
- rewrite := newRewriteEntry("", original)
- replacedMap["rewrites"] = []any{rewrite}
- return
- }
-
- origKey, _ := original["key"].(string)
- newKey, _ := replacedMap["key"].(string)
-
- var rewrite map[string]any
- if origKey != newKey && origKey != "" {
- rewrite = newRewriteEntry("key", origKey)
+ rw = ast.Rewrite{Editor: RewriteEditor, Original: original}
} else {
- origValue, _ := original["value"].(string)
- rewrite = newRewriteEntry("value", origValue)
+ origKey, _ := original["key"].(string)
+ newKey, _ := replacedMap["key"].(string)
+
+ if origKey != newKey && origKey != "" {
+ rw = ast.Rewrite{Editor: RewriteEditor, Scope: "key", Original: origKey}
+ } else {
+ origValue, _ := original["value"].(string)
+ rw = ast.Rewrite{Editor: RewriteEditor, Scope: "value", Original: origValue}
+ }
}
- replacedMap["rewrites"] = []any{rewrite}
+ replacedMap["rewrites"] = []any{rw.ToMap()}
}
// applyCorpusResponseMappings processes fields arrays with corpus rules.
diff --git a/mapper/corpus_test.go b/mapper/corpus_test.go
index a65c7ff..8ab9be2 100644
--- a/mapper/corpus_test.go
+++ b/mapper/corpus_test.go
@@ -5,6 +5,7 @@
"os"
"testing"
+ "github.com/KorAP/Koral-Mapper/ast"
"github.com/KorAP/Koral-Mapper/config"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -1583,3 +1584,198 @@
fields := result.(map[string]any)["fields"].([]any)
require.Len(t, fields, 1, "OR replacement should be skipped")
}
+
+func TestCorpusRewriteRoundTripsAsAstRewrite(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB, AddRewrites: true}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+
+ rewrites, ok := corpus["rewrites"].([]any)
+ require.True(t, ok)
+ require.Len(t, rewrites, 1)
+
+ rewriteBytes, err := json.Marshal(rewrites[0])
+ require.NoError(t, err)
+
+ var rw ast.Rewrite
+ require.NoError(t, json.Unmarshal(rewriteBytes, &rw))
+
+ assert.Equal(t, "Koral-Mapper", rw.Editor)
+ assert.Equal(t, "key", rw.Scope)
+ assert.Equal(t, "textClass", rw.Original)
+}
+
+func TestCorpusRewriteValueScopeRoundTripsAsAstRewrite(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> textClass=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB, AddRewrites: true}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "textClass", corpus["key"])
+ assert.Equal(t, "fiction", corpus["value"])
+
+ rewrites, ok := corpus["rewrites"].([]any)
+ require.True(t, ok)
+ require.Len(t, rewrites, 1)
+
+ rewriteBytes, err := json.Marshal(rewrites[0])
+ require.NoError(t, err)
+
+ var rw ast.Rewrite
+ require.NoError(t, json.Unmarshal(rewriteBytes, &rw))
+
+ assert.Equal(t, "Koral-Mapper", rw.Editor)
+ assert.Equal(t, "value", rw.Scope)
+ assert.Equal(t, "novel", rw.Original)
+}
+
+func TestAddCorpusRewriteKeyScope(t *testing.T) {
+ replaced := map[string]any{
+ "@type": "koral:doc",
+ "key": "genre",
+ "value": "fiction",
+ }
+ original := map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ }
+
+ addCorpusRewrite(replaced, original)
+
+ rewrites, ok := replaced["rewrites"].([]any)
+ require.True(t, ok)
+ require.Len(t, rewrites, 1)
+
+ rewriteBytes, err := json.Marshal(rewrites[0])
+ require.NoError(t, err)
+
+ var rw ast.Rewrite
+ require.NoError(t, json.Unmarshal(rewriteBytes, &rw))
+
+ assert.Equal(t, RewriteEditor, rw.Editor)
+ assert.Equal(t, "key", rw.Scope)
+ assert.Equal(t, "textClass", rw.Original)
+}
+
+func TestAddCorpusRewriteValueScope(t *testing.T) {
+ replaced := map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "fiction",
+ }
+ original := map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ }
+
+ addCorpusRewrite(replaced, original)
+
+ rewrites, ok := replaced["rewrites"].([]any)
+ require.True(t, ok)
+ require.Len(t, rewrites, 1)
+
+ rewriteBytes, err := json.Marshal(rewrites[0])
+ require.NoError(t, err)
+
+ var rw ast.Rewrite
+ require.NoError(t, json.Unmarshal(rewriteBytes, &rw))
+
+ assert.Equal(t, RewriteEditor, rw.Editor)
+ assert.Equal(t, "value", rw.Scope)
+ assert.Equal(t, "novel", rw.Original)
+}
+
+func TestAddCorpusRewriteGroupOriginal(t *testing.T) {
+ replaced := map[string]any{
+ "@type": "koral:doc",
+ "key": "genre",
+ "value": "fiction",
+ }
+ original := map[string]any{
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": []any{
+ map[string]any{"@type": "koral:doc", "key": "textClass", "value": "kultur"},
+ map[string]any{"@type": "koral:doc", "key": "textClass", "value": "musik"},
+ },
+ }
+
+ addCorpusRewrite(replaced, original)
+
+ rewrites, ok := replaced["rewrites"].([]any)
+ require.True(t, ok)
+ require.Len(t, rewrites, 1)
+
+ rewriteBytes, err := json.Marshal(rewrites[0])
+ require.NoError(t, err)
+
+ var rw ast.Rewrite
+ require.NoError(t, json.Unmarshal(rewriteBytes, &rw))
+
+ assert.Equal(t, RewriteEditor, rw.Editor)
+ assert.Empty(t, rw.Scope)
+ require.NotNil(t, rw.Original)
+
+ originalMap, ok := rw.Original.(map[string]any)
+ require.True(t, ok)
+ assert.Equal(t, "koral:docGroup", originalMap["@type"])
+}
+
+func TestCorpusRewriteGroupOriginalRoundTripsAsAstRewrite(t *testing.T) {
+ m := newCorpusMapper(t, "genre=fiction <> (textClass=kultur & textClass=musik)")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": []any{
+ map[string]any{"@type": "koral:doc", "key": "textClass", "value": "kultur"},
+ map[string]any{"@type": "koral:doc", "key": "textClass", "value": "musik"},
+ },
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: BtoA, AddRewrites: true}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+
+ rewrites, ok := corpus["rewrites"].([]any)
+ require.True(t, ok)
+ require.Len(t, rewrites, 1)
+
+ rewriteBytes, err := json.Marshal(rewrites[0])
+ require.NoError(t, err)
+
+ var rw ast.Rewrite
+ require.NoError(t, json.Unmarshal(rewriteBytes, &rw))
+
+ assert.Equal(t, "Koral-Mapper", rw.Editor)
+ assert.Empty(t, rw.Scope)
+ require.NotNil(t, rw.Original)
+
+ originalMap, ok := rw.Original.(map[string]any)
+ require.True(t, ok)
+ assert.Equal(t, "koral:docGroup", originalMap["@type"])
+}
diff --git a/mapper/mapper.go b/mapper/mapper.go
index ce172de..2085093 100644
--- a/mapper/mapper.go
+++ b/mapper/mapper.go
@@ -17,21 +17,6 @@
RewriteEditor = "Koral-Mapper"
)
-// newRewriteEntry creates a koral:rewrite annotation entry.
-func newRewriteEntry(scope string, original any) map[string]any {
- r := map[string]any{
- "@type": "koral:rewrite",
- "editor": RewriteEditor,
- }
- if scope != "" {
- r["scope"] = scope
- }
- if original != nil {
- r["original"] = original
- }
- return r
-}
-
// String converts the Direction to its string representation
func (d Direction) String() string {
if d {