Keep cascaded rewrites
Change-Id: I8dd4b3f16ff01b03b5fc74148414b24d1b43c7c7
diff --git a/cmd/koralmapper/main_test.go b/cmd/koralmapper/main_test.go
index 4b57422..ee6e067 100644
--- a/cmd/koralmapper/main_test.go
+++ b/cmd/koralmapper/main_test.go
@@ -1675,6 +1675,12 @@
map[string]any{
"@type": "koral:rewrite",
"editor": "Koral-Mapper",
+ "scope": "key",
+ "original": "PIDAT",
+ },
+ map[string]any{
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
"scope": "foundry",
"original": "opennlp",
},
diff --git a/mapper/cascade_test.go b/mapper/cascade_test.go
index 998d437..3dec81c 100644
--- a/mapper/cascade_test.go
+++ b/mapper/cascade_test.go
@@ -192,6 +192,141 @@
// --- Response cascade tests ---
+func TestCascadeQueryRewritesPreservedAcrossSteps(t *testing.T) {
+ // Step 1 changes key (PIDAT->DET) within same foundry/layer.
+ // Step 2 changes foundry+key (DET->PRON, opennlp->upos).
+ // Rewrites from step 1 must survive step 2's replacement.
+ m, err := NewMapper([]config.MappingList{
+ {
+ ID: "step1", FoundryA: "opennlp", LayerA: "p",
+ FoundryB: "opennlp", LayerB: "p",
+ Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
+ },
+ {
+ ID: "step2", FoundryA: "opennlp", LayerA: "p",
+ FoundryB: "upos", LayerB: "p",
+ Mappings: []config.MappingRule{`[DET] <> [PRON]`},
+ },
+ })
+ require.NoError(t, err)
+
+ input := parseJSON(t, `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PIDAT",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`)
+
+ result, err := m.CascadeQueryMappings(
+ []string{"step1", "step2"},
+ []MappingOptions{
+ {Direction: AtoB, AddRewrites: true},
+ {Direction: AtoB, AddRewrites: true},
+ },
+ input,
+ )
+ require.NoError(t, err)
+
+ // After both steps, the term should have rewrites from both steps:
+ // step 1 recorded scope=key original=PIDAT,
+ // step 2 recorded scope=foundry original=opennlp and scope=key original=DET.
+ expected := parseJSON(t, `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "upos",
+ "key": "PRON",
+ "layer": "p",
+ "match": "match:eq",
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "key",
+ "original": "PIDAT"
+ },
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "foundry",
+ "original": "opennlp"
+ },
+ {
+ "@type": "koral:rewrite",
+ "editor": "Koral-Mapper",
+ "scope": "key",
+ "original": "DET"
+ }
+ ]
+ }
+ }`)
+ assert.Equal(t, expected, result)
+}
+
+func TestCascadeQueryRewritesPreservedStructuralChange(t *testing.T) {
+ // Step 1 changes key (PIDAT->DET) and records a scoped rewrite.
+ // Step 2 replaces Term with TermGroup (structural change).
+ // Rewrites from step 1 must be carried into the new TermGroup.
+ m, err := NewMapper([]config.MappingList{
+ {
+ ID: "sc-step1", FoundryA: "opennlp", LayerA: "p",
+ FoundryB: "opennlp", LayerB: "p",
+ Mappings: []config.MappingRule{`[PIDAT] <> [DET]`},
+ },
+ {
+ ID: "sc-step2", FoundryA: "opennlp", LayerA: "p",
+ FoundryB: "opennlp", LayerB: "p",
+ Mappings: []config.MappingRule{`[DET] <> [opennlp/p=DET & opennlp/p=PronType:Art]`},
+ },
+ })
+ require.NoError(t, err)
+
+ input := parseJSON(t, `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PIDAT",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`)
+
+ result, err := m.CascadeQueryMappings(
+ []string{"sc-step1", "sc-step2"},
+ []MappingOptions{
+ {Direction: AtoB, AddRewrites: true},
+ {Direction: AtoB, AddRewrites: true},
+ },
+ input,
+ )
+ require.NoError(t, err)
+
+ // Step 1 rewrites (scope=key, original=PIDAT) must appear on the
+ // TermGroup created by step 2, along with step 2's own structural rewrite.
+ resultMap := result.(map[string]any)
+ wrap := resultMap["wrap"].(map[string]any)
+ require.Equal(t, "koral:termGroup", wrap["@type"])
+
+ rewrites := wrap["rewrites"].([]any)
+ // First rewrite is from step 1 (carried forward)
+ rw0 := rewrites[0].(map[string]any)
+ assert.Equal(t, "key", rw0["scope"])
+ assert.Equal(t, "PIDAT", rw0["original"])
+
+ // Last rewrite is from step 2 (structural: original is the full term)
+ rwLast := rewrites[len(rewrites)-1].(map[string]any)
+ assert.Equal(t, "Koral-Mapper", rwLast["editor"])
+ // Structural rewrite stores the full original node (no scope)
+ original := rwLast["original"].(map[string]any)
+ assert.Equal(t, "koral:term", original["@type"])
+ assert.Equal(t, "DET", original["key"])
+}
+
func TestCascadeResponseTwoCorpusMappings(t *testing.T) {
m, err := NewMapper([]config.MappingList{
{
diff --git a/mapper/query.go b/mapper/query.go
index 57de912..72d1398 100644
--- a/mapper/query.go
+++ b/mapper/query.go
@@ -143,12 +143,21 @@
beforeNode = node.Clone()
}
+ // Collect pre-existing rewrites before replacement so they
+ // survive when the matcher creates a fresh replacement node.
+ existingRewrites := collectRewrites(node)
+
actualMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: processedReplacement})
if err != nil {
return nil, fmt.Errorf("failed to create matcher: %w", err)
}
node = actualMatcher.Replace(node)
+ // Carry forward pre-existing rewrites from earlier cascade steps.
+ if len(existingRewrites) > 0 {
+ prependRewrites(node, existingRewrites)
+ }
+
if opts.AddRewrites {
recordRewrites(node, beforeNode)
}
@@ -329,6 +338,44 @@
return []ast.Rewrite{{Editor: RewriteEditor, Original: originalJSON}}
}
+// collectRewrites returns the rewrites from the deepest rewritable node.
+// For a Token wrapping a Term, it returns the Term's rewrites.
+// This captures rewrites added by previous cascade steps.
+func collectRewrites(node ast.Node) []ast.Rewrite {
+ if node == nil {
+ return nil
+ }
+ // Unwrap Token to reach the inner node that carries rewrites
+ if tok, ok := node.(*ast.Token); ok && tok.Wrap != nil {
+ return collectRewrites(tok.Wrap)
+ }
+ if r, ok := node.(ast.Rewriteable); ok {
+ return r.GetRewrites()
+ }
+ return nil
+}
+
+// prependRewrites inserts existing rewrites at the front of the node's
+// rewrite list so they appear before any rewrites added by the current step.
+func prependRewrites(node ast.Node, rewrites []ast.Rewrite) {
+ if node == nil || len(rewrites) == 0 {
+ return
+ }
+ // Unwrap Token to reach the inner rewritable node
+ if tok, ok := node.(*ast.Token); ok && tok.Wrap != nil {
+ prependRewrites(tok.Wrap, rewrites)
+ return
+ }
+ if r, ok := node.(ast.Rewriteable); ok {
+ current := r.GetRewrites()
+ // Prepend old rewrites before any newly added ones
+ combined := make([]ast.Rewrite, 0, len(rewrites)+len(current))
+ combined = append(combined, rewrites...)
+ combined = append(combined, current...)
+ r.SetRewrites(combined)
+ }
+}
+
// isValidQueryObject returns true if data is a JSON object with an @type field.
func isValidQueryObject(data any) bool {
queryMap, ok := data.(map[string]any)