Emit per-field scoped rewrites for all changed term attributes to ensure reversibility
Change-Id: Ifd3dd025e217fb1ffee47b7aa571934a02870778
diff --git a/cmd/koralmapper/main_test.go b/cmd/koralmapper/main_test.go
index a9bb530..4b57422 100644
--- a/cmd/koralmapper/main_test.go
+++ b/cmd/koralmapper/main_test.go
@@ -1678,6 +1678,12 @@
"scope": "foundry",
"original": "opennlp",
},
+ map[string]any{
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "key",
+ "original": "DET",
+ },
},
},
},
diff --git a/mapper/mapper_test.go b/mapper/mapper_test.go
index ed36cd3..4d3a411 100644
--- a/mapper/mapper_test.go
+++ b/mapper/mapper_test.go
@@ -668,6 +668,247 @@
}
}
+func TestMultiFieldRewritesAreReversible(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "multi-field",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "pos",
+ Mappings: []config.MappingRule{
+ "[DET] <> [PRON]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ tests := []struct {
+ name string
+ opts MappingOptions
+ input string
+ expected string
+ }{
+ {
+ name: "Multi-field change: foundry + layer + key all change",
+ opts: MappingOptions{
+ Direction: AtoB,
+ AddRewrites: true,
+ },
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`,
+ expected: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "PRON",
+ "layer": "pos",
+ "match": "match:eq",
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "foundry",
+ "original": "opennlp"
+ },
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "layer",
+ "original": "p"
+ },
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "key",
+ "original": "DET"
+ }
+ ]
+ }
+ }`,
+ },
+ {
+ name: "Reverse direction: foundry + layer + key all change back",
+ opts: MappingOptions{
+ Direction: BtoA,
+ AddRewrites: true,
+ },
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "PRON",
+ "layer": "pos",
+ "match": "match:eq"
+ }
+ }`,
+ expected: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq",
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "foundry",
+ "original": "upos"
+ },
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "layer",
+ "original": "pos"
+ },
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "key",
+ "original": "PRON"
+ }
+ ]
+ }
+ }`,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var inputData any
+ err := json.Unmarshal([]byte(tt.input), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("multi-field", tt.opts, inputData)
+ require.NoError(t, err)
+
+ var expectedData any
+ err = json.Unmarshal([]byte(tt.expected), &expectedData)
+ require.NoError(t, err)
+
+ assert.Equal(t, expectedData, result)
+ })
+ }
+}
+
+func TestSingleFieldRewrite(t *testing.T) {
+ mappingList := config.MappingList{
+ ID: "same-fl",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "opennlp",
+ LayerB: "p",
+ Mappings: []config.MappingRule{
+ "[DET] <> [PRON]",
+ },
+ }
+
+ m, err := NewMapper([]config.MappingList{mappingList})
+ require.NoError(t, err)
+
+ var inputData any
+ err = json.Unmarshal([]byte(`{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`), &inputData)
+ require.NoError(t, err)
+
+ result, err := m.ApplyQueryMappings("same-fl", MappingOptions{
+ Direction: AtoB,
+ AddRewrites: true,
+ }, inputData)
+ require.NoError(t, err)
+
+ var expectedData any
+ err = json.Unmarshal([]byte(`{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PRON",
+ "layer": "p",
+ "match": "match:eq",
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "key",
+ "original": "DET"
+ }
+ ]
+ }
+ }`), &expectedData)
+ require.NoError(t, err)
+
+ assert.Equal(t, expectedData, result)
+}
+
+func TestBuildRewritesFieldInjection(t *testing.T) {
+ tests := []struct {
+ name string
+ original *ast.Term
+ new_ *ast.Term
+ expectedScopes []string
+ hasOriginals []bool
+ }{
+ {
+ name: "All fields change with originals",
+ original: &ast.Term{Foundry: "a", Layer: "l1", Key: "k1", Value: "v1", Match: ast.MatchEqual},
+ new_: &ast.Term{Foundry: "b", Layer: "l2", Key: "k2", Value: "v2", Match: ast.MatchEqual},
+ expectedScopes: []string{"foundry", "layer", "key", "value"},
+ hasOriginals: []bool{true, true, true, true},
+ },
+ {
+ name: "Injection: empty value becomes non-empty",
+ original: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Match: ast.MatchEqual},
+ new_: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Value: "v", Match: ast.MatchEqual},
+ expectedScopes: []string{"value"},
+ hasOriginals: []bool{false},
+ },
+ {
+ name: "Deletion: non-empty value becomes empty",
+ original: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Value: "v", Match: ast.MatchEqual},
+ new_: &ast.Term{Foundry: "a", Layer: "l", Key: "k", Match: ast.MatchEqual},
+ expectedScopes: []string{"value"},
+ hasOriginals: []bool{true},
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ rewrites := buildRewrites(tt.original, tt.new_)
+ require.Len(t, rewrites, len(tt.expectedScopes))
+ for i, rw := range rewrites {
+ assert.Equal(t, RewriteEditor, rw.Editor)
+ assert.Equal(t, tt.expectedScopes[i], rw.Scope)
+ if tt.hasOriginals[i] {
+ assert.NotNil(t, rw.Original, "expected original for scope %s", tt.expectedScopes[i])
+ } else {
+ assert.Nil(t, rw.Original, "expected no original for scope %s (injection)", tt.expectedScopes[i])
+ }
+ }
+ })
+ }
+}
+
func TestQueryWrapperMappings(t *testing.T) {
mappingList := config.MappingList{
diff --git a/mapper/query.go b/mapper/query.go
index 9aed2db..57de912 100644
--- a/mapper/query.go
+++ b/mapper/query.go
@@ -266,44 +266,67 @@
addRewriteToNode(newInner, oldInner)
}
-// addRewriteToNode creates and attaches a rewrite entry to a node,
+// addRewriteToNode creates and attaches rewrite entries to a node,
// recording what the node looked like before the change.
func addRewriteToNode(newNode, originalNode ast.Node) {
- rw := buildRewrite(originalNode, newNode)
- ast.AppendRewrite(newNode, rw)
+ for _, rw := range buildRewrites(originalNode, newNode) {
+ ast.AppendRewrite(newNode, rw)
+ }
}
-// buildRewrite creates a Rewrite describing what changed between
-// originalNode and newNode. For simple term-level changes (just foundry,
-// layer, key, or value), it uses a scoped rewrite. For structural changes,
-// it stores the full original as an object.
-func buildRewrite(originalNode, newNode ast.Node) ast.Rewrite {
+// buildRewrites creates Rewrite entries describing what changed between
+// originalNode and newNode. For term-level changes it emits one scoped
+// rewrite per changed field so the transformation is fully reversible.
+// For structural changes it stores the full original as an object.
+func buildRewrites(originalNode, newNode ast.Node) []ast.Rewrite {
if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(newNode) && originalNode.Type() == newNode.Type() {
newTerm := newNode.(*ast.Term)
+ var rewrites []ast.Rewrite
+
if term.Foundry != newTerm.Foundry {
- return ast.Rewrite{Editor: RewriteEditor, Scope: "foundry", Original: term.Foundry}
+ rw := ast.Rewrite{Editor: RewriteEditor, Scope: "foundry"}
+ if term.Foundry != "" {
+ rw.Original = term.Foundry
+ }
+ rewrites = append(rewrites, rw)
}
if term.Layer != newTerm.Layer {
- return ast.Rewrite{Editor: RewriteEditor, Scope: "layer", Original: term.Layer}
+ rw := ast.Rewrite{Editor: RewriteEditor, Scope: "layer"}
+ if term.Layer != "" {
+ rw.Original = term.Layer
+ }
+ rewrites = append(rewrites, rw)
}
if term.Key != newTerm.Key {
- return ast.Rewrite{Editor: RewriteEditor, Scope: "key", Original: term.Key}
+ rw := ast.Rewrite{Editor: RewriteEditor, Scope: "key"}
+ if term.Key != "" {
+ rw.Original = term.Key
+ }
+ rewrites = append(rewrites, rw)
}
if term.Value != newTerm.Value {
- return ast.Rewrite{Editor: RewriteEditor, Scope: "value", Original: term.Value}
+ rw := ast.Rewrite{Editor: RewriteEditor, Scope: "value"}
+ if term.Value != "" {
+ rw.Original = term.Value
+ }
+ rewrites = append(rewrites, rw)
+ }
+
+ if len(rewrites) > 0 {
+ return rewrites
}
}
// Structural change: serialize the original as the rewrite value
originalBytes, err := parser.SerializeToJSON(originalNode)
if err != nil {
- return ast.Rewrite{Editor: RewriteEditor}
+ return []ast.Rewrite{{Editor: RewriteEditor}}
}
var originalJSON any
if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
- return ast.Rewrite{Editor: RewriteEditor}
+ return []ast.Rewrite{{Editor: RewriteEditor}}
}
- return ast.Rewrite{Editor: RewriteEditor, Original: originalJSON}
+ return []ast.Rewrite{{Editor: RewriteEditor, Original: originalJSON}}
}
// isValidQueryObject returns true if data is a JSON object with an @type field.