Support legacy fields
diff --git a/ast/ast.go b/ast/ast.go
index 07a6df1..dc7d7f3 100644
--- a/ast/ast.go
+++ b/ast/ast.go
@@ -38,12 +38,86 @@
Scope string `json:"scope,omitempty"`
Src string `json:"src,omitempty"`
Comment string `json:"_comment,omitempty"`
+ Original any `json:"original,omitempty"`
+}
+
+// UnmarshalJSON implements custom JSON unmarshaling for backward compatibility
+func (r *Rewrite) UnmarshalJSON(data []byte) error {
+ // Create a temporary struct to hold all possible fields
+ var temp struct {
+ Type string `json:"@type,omitempty"`
+ Editor string `json:"editor,omitempty"`
+ Source string `json:"source,omitempty"` // legacy field
+ Operation string `json:"operation,omitempty"` // legacy field
+ Scope string `json:"scope,omitempty"`
+ Src string `json:"src,omitempty"`
+ Origin string `json:"origin,omitempty"` // legacy field
+ Original any `json:"original,omitempty"`
+ Comment string `json:"_comment,omitempty"`
+ }
+
+ if err := json.Unmarshal(data, &temp); err != nil {
+ return err
+ }
+
+ // Apply precedence for editor field: editor >> source
+ if temp.Editor != "" {
+ r.Editor = temp.Editor
+ } else if temp.Source != "" {
+ r.Editor = temp.Source
+ }
+
+ // Apply precedence for original/src/origin: original >> src >> origin
+ if temp.Original != nil {
+ r.Original = temp.Original
+ } else if temp.Src != "" {
+ r.Src = temp.Src
+ } else if temp.Origin != "" {
+ r.Src = temp.Origin
+ }
+
+ // Copy other fields
+ r.Operation = temp.Operation
+ r.Scope = temp.Scope
+ r.Comment = temp.Comment
+
+ return nil
}
func (r *Rewrite) Type() NodeType {
return RewriteNode
}
+// MarshalJSON implements custom JSON marshaling to ensure clean output
+func (r *Rewrite) MarshalJSON() ([]byte, error) {
+ // Create a map with only the modern field names
+ result := make(map[string]any)
+
+ // Always include @type if this is a rewrite
+ result["@type"] = "koral:rewrite"
+
+ if r.Editor != "" {
+ result["editor"] = r.Editor
+ }
+ if r.Operation != "" {
+ result["operation"] = r.Operation
+ }
+ if r.Scope != "" {
+ result["scope"] = r.Scope
+ }
+ if r.Src != "" {
+ result["src"] = r.Src
+ }
+ if r.Comment != "" {
+ result["_comment"] = r.Comment
+ }
+ if r.Original != nil {
+ result["original"] = r.Original
+ }
+
+ return json.Marshal(result)
+}
+
// Token represents a koral:token
type Token struct {
Wrap Node `json:"wrap"`
diff --git a/ast/rewrite_test.go b/ast/rewrite_test.go
new file mode 100644
index 0000000..63f5e53
--- /dev/null
+++ b/ast/rewrite_test.go
@@ -0,0 +1,229 @@
+package ast
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestRewriteUnmarshalJSON(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected Rewrite
+ }{
+ {
+ name: "Standard rewrite with editor and original",
+ input: `{
+ "@type": "koral:rewrite",
+ "editor": "termMapper",
+ "operation": "operation:mapping",
+ "scope": "foundry",
+ "original": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PIDAT",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`,
+ expected: Rewrite{
+ Editor: "termMapper",
+ Operation: "operation:mapping",
+ Scope: "foundry",
+ Original: map[string]any{
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PIDAT",
+ "layer": "p",
+ "match": "match:eq",
+ },
+ },
+ },
+ {
+ name: "Legacy rewrite with source instead of editor",
+ input: `{
+ "@type": "koral:rewrite",
+ "source": "legacy-mapper",
+ "operation": "operation:mapping",
+ "scope": "foundry",
+ "src": "legacy-source"
+ }`,
+ expected: Rewrite{
+ Editor: "legacy-mapper",
+ Operation: "operation:mapping",
+ Scope: "foundry",
+ Src: "legacy-source",
+ },
+ },
+ {
+ name: "Legacy rewrite with origin instead of original/src",
+ input: `{
+ "@type": "koral:rewrite",
+ "editor": "termMapper",
+ "operation": "operation:mapping",
+ "scope": "foundry",
+ "origin": "legacy-origin"
+ }`,
+ expected: Rewrite{
+ Editor: "termMapper",
+ Operation: "operation:mapping",
+ Scope: "foundry",
+ Src: "legacy-origin",
+ },
+ },
+ {
+ name: "Precedence test: editor over source",
+ input: `{
+ "@type": "koral:rewrite",
+ "editor": "preferred-editor",
+ "source": "legacy-source",
+ "operation": "operation:mapping"
+ }`,
+ expected: Rewrite{
+ Editor: "preferred-editor",
+ Operation: "operation:mapping",
+ },
+ },
+ {
+ name: "Precedence test: original over src over origin",
+ input: `{
+ "@type": "koral:rewrite",
+ "editor": "termMapper",
+ "operation": "operation:mapping",
+ "original": "preferred-original",
+ "src": "middle-src",
+ "origin": "lowest-origin"
+ }`,
+ expected: Rewrite{
+ Editor: "termMapper",
+ Operation: "operation:mapping",
+ Original: "preferred-original",
+ },
+ },
+ {
+ name: "Precedence test: src over origin when no original",
+ input: `{
+ "@type": "koral:rewrite",
+ "editor": "termMapper",
+ "operation": "operation:mapping",
+ "src": "preferred-src",
+ "origin": "lowest-origin"
+ }`,
+ expected: Rewrite{
+ Editor: "termMapper",
+ Operation: "operation:mapping",
+ Src: "preferred-src",
+ },
+ },
+ {
+ name: "Only legacy fields",
+ input: `{
+ "@type": "koral:rewrite",
+ "source": "legacy-editor",
+ "operation": "operation:mapping",
+ "origin": "legacy-origin",
+ "_comment": "Legacy rewrite"
+ }`,
+ expected: Rewrite{
+ Editor: "legacy-editor",
+ Operation: "operation:mapping",
+ Src: "legacy-origin",
+ Comment: "Legacy rewrite",
+ },
+ },
+ {
+ name: "Mixed with comment",
+ input: `{
+ "@type": "koral:rewrite",
+ "editor": "termMapper",
+ "operation": "operation:mapping",
+ "scope": "foundry",
+ "src": "original-source",
+ "_comment": "This is a comment"
+ }`,
+ expected: Rewrite{
+ Editor: "termMapper",
+ Operation: "operation:mapping",
+ Scope: "foundry",
+ Src: "original-source",
+ Comment: "This is a comment",
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var rewrite Rewrite
+ err := json.Unmarshal([]byte(tt.input), &rewrite)
+ require.NoError(t, err)
+ assert.Equal(t, tt.expected, rewrite)
+ })
+ }
+}
+
+func TestRewriteArrayUnmarshal(t *testing.T) {
+ // Test unmarshaling an array of rewrites with mixed legacy and modern fields
+ input := `[
+ {
+ "@type": "koral:rewrite",
+ "editor": "termMapper",
+ "operation": "operation:mapping",
+ "original": "modern-original"
+ },
+ {
+ "@type": "koral:rewrite",
+ "source": "legacy-editor",
+ "operation": "operation:legacy",
+ "origin": "legacy-origin"
+ }
+ ]`
+
+ var rewrites []Rewrite
+ err := json.Unmarshal([]byte(input), &rewrites)
+ require.NoError(t, err)
+ require.Len(t, rewrites, 2)
+
+ // Check first rewrite (modern)
+ assert.Equal(t, "termMapper", rewrites[0].Editor)
+ assert.Equal(t, "operation:mapping", rewrites[0].Operation)
+ assert.Equal(t, "modern-original", rewrites[0].Original)
+
+ // Check second rewrite (legacy)
+ assert.Equal(t, "legacy-editor", rewrites[1].Editor)
+ assert.Equal(t, "operation:legacy", rewrites[1].Operation)
+ assert.Equal(t, "legacy-origin", rewrites[1].Src)
+}
+
+func TestRewriteMarshalJSON(t *testing.T) {
+ // Test that marshaling works correctly and maintains the modern field names
+ rewrite := Rewrite{
+ Editor: "termMapper",
+ Operation: "operation:mapping",
+ Scope: "foundry",
+ Src: "source-value",
+ Comment: "Test comment",
+ Original: "original-value",
+ }
+
+ data, err := json.Marshal(rewrite)
+ require.NoError(t, err)
+
+ // Parse back to verify structure
+ var result map[string]any
+ err = json.Unmarshal(data, &result)
+ require.NoError(t, err)
+
+ assert.Equal(t, "termMapper", result["editor"])
+ assert.Equal(t, "operation:mapping", result["operation"])
+ assert.Equal(t, "foundry", result["scope"])
+ assert.Equal(t, "source-value", result["src"])
+ assert.Equal(t, "Test comment", result["_comment"])
+ assert.Equal(t, "original-value", result["original"])
+
+ // Ensure legacy fields are not present in output
+ assert.NotContains(t, result, "source")
+ assert.NotContains(t, result, "origin")
+}
diff --git a/mapper/mapper.go b/mapper/mapper.go
index 0377c88..424896e 100644
--- a/mapper/mapper.go
+++ b/mapper/mapper.go
@@ -178,17 +178,36 @@
replacement = token.Wrap
}
- // Apply foundry and layer overrides
- if opts.Direction { // true means AtoB
- applyFoundryAndLayerOverrides(pattern, opts.FoundryA, opts.LayerA)
- applyFoundryAndLayerOverrides(replacement, opts.FoundryB, opts.LayerB)
- } else {
- applyFoundryAndLayerOverrides(pattern, opts.FoundryB, opts.LayerB)
- applyFoundryAndLayerOverrides(replacement, opts.FoundryA, opts.LayerA)
+ // Create deep copies of pattern and replacement to avoid modifying the original parsed rules
+ patternBytes, err := parser.SerializeToJSON(pattern)
+ if err != nil {
+ return nil, fmt.Errorf("failed to serialize pattern for copying: %w", err)
+ }
+ patternCopy, err := parser.ParseJSON(patternBytes)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse pattern copy: %w", err)
}
- // Create matcher and apply replacement
- m, err := matcher.NewMatcher(ast.Pattern{Root: pattern}, ast.Replacement{Root: replacement})
+ replacementBytes, err := parser.SerializeToJSON(replacement)
+ if err != nil {
+ return nil, fmt.Errorf("failed to serialize replacement for copying: %w", err)
+ }
+ replacementCopy, err := parser.ParseJSON(replacementBytes)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse replacement copy: %w", err)
+ }
+
+ // Apply foundry and layer overrides to the copies
+ if opts.Direction { // true means AtoB
+ applyFoundryAndLayerOverrides(patternCopy, opts.FoundryA, opts.LayerA)
+ applyFoundryAndLayerOverrides(replacementCopy, opts.FoundryB, opts.LayerB)
+ } else {
+ applyFoundryAndLayerOverrides(patternCopy, opts.FoundryB, opts.LayerB)
+ applyFoundryAndLayerOverrides(replacementCopy, opts.FoundryA, opts.LayerA)
+ }
+
+ // Create matcher and apply replacement using the copies
+ m, err := matcher.NewMatcher(ast.Pattern{Root: patternCopy}, ast.Replacement{Root: replacementCopy})
if err != nil {
return nil, fmt.Errorf("failed to create matcher: %w", err)
}
@@ -293,8 +312,38 @@
// Restore rewrites if they existed
if oldRewrites != nil {
- if resultMap, ok := resultData.(map[string]any); ok {
- resultMap["rewrites"] = oldRewrites
+ // Process old rewrites through AST to ensure backward compatibility
+ if rewritesList, ok := oldRewrites.([]any); ok {
+ processedRewrites := make([]any, len(rewritesList))
+ for i, rewriteData := range rewritesList {
+ // Marshal and unmarshal each rewrite to apply backward compatibility
+ rewriteBytes, err := json.Marshal(rewriteData)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal old rewrite %d: %w", i, err)
+ }
+ var rewrite ast.Rewrite
+ if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
+ return nil, fmt.Errorf("failed to unmarshal old rewrite %d: %w", i, err)
+ }
+ // Marshal back to get the transformed version
+ transformedBytes, err := json.Marshal(&rewrite)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal transformed rewrite %d: %w", i, err)
+ }
+ var transformedRewrite any
+ if err := json.Unmarshal(transformedBytes, &transformedRewrite); err != nil {
+ return nil, fmt.Errorf("failed to unmarshal transformed rewrite %d: %w", i, err)
+ }
+ processedRewrites[i] = transformedRewrite
+ }
+ if resultMap, ok := resultData.(map[string]any); ok {
+ resultMap["rewrites"] = processedRewrites
+ }
+ } else {
+ // If it's not a list, restore as-is
+ if resultMap, ok := resultData.(map[string]any); ok {
+ resultMap["rewrites"] = oldRewrites
+ }
}
}
diff --git a/mapper/mapper_test.go b/mapper/mapper_test.go
index c97238b..b2fb377 100644
--- a/mapper/mapper_test.go
+++ b/mapper/mapper_test.go
@@ -263,6 +263,149 @@
}`,
expectError: false,
},
+ {
+ name: "Query with legacy rewrite field names",
+ mappingID: "test-mapper",
+ opts: MappingOptions{
+ Direction: AtoB,
+ },
+ input: `{
+ "@type": "koral:token",
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "_comment": "Legacy rewrite with source instead of editor",
+ "source": "LegacyEditor",
+ "operation": "operation:legacy",
+ "origin": "LegacySource"
+ }
+ ],
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PIDAT",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`,
+ expected: `{
+ "@type": "koral:token",
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "_comment": "Legacy rewrite with source instead of editor",
+ "editor": "LegacyEditor",
+ "operation": "operation:legacy",
+ "src": "LegacySource"
+ }
+ ],
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PIDAT",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "p",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`,
+ },
+ {
+ name: "Query with mixed legacy and modern rewrite fields",
+ mappingID: "test-mapper",
+ opts: MappingOptions{
+ Direction: AtoB,
+ },
+ input: `{
+ "@type": "koral:token",
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "_comment": "Modern rewrite",
+ "editor": "ModernEditor",
+ "operation": "operation:modern",
+ "original": {
+ "@type": "koral:term",
+ "foundry": "original",
+ "key": "original-key"
+ }
+ },
+ {
+ "@type": "koral:rewrite",
+ "_comment": "Legacy rewrite with precedence test",
+ "editor": "PreferredEditor",
+ "source": "IgnoredSource",
+ "operation": "operation:precedence",
+ "original": "PreferredOriginal",
+ "src": "IgnoredSrc",
+ "origin": "IgnoredOrigin"
+ }
+ ],
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PIDAT",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`,
+ expected: `{
+ "@type": "koral:token",
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "_comment": "Modern rewrite",
+ "editor": "ModernEditor",
+ "operation": "operation:modern",
+ "original": {
+ "@type": "koral:term",
+ "foundry": "original",
+ "key": "original-key"
+ }
+ },
+ {
+ "@type": "koral:rewrite",
+ "_comment": "Legacy rewrite with precedence test",
+ "editor": "PreferredEditor",
+ "operation": "operation:precedence",
+ "original": "PreferredOriginal"
+ }
+ ],
+ "wrap": {
+ "@type": "koral:termGroup",
+ "operands": [
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PIDAT",
+ "layer": "p",
+ "match": "match:eq"
+ },
+ {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "AdjType",
+ "layer": "p",
+ "match": "match:eq",
+ "value": "Pdt"
+ }
+ ],
+ "relation": "relation:and"
+ }
+ }`,
+ },
}
for _, tt := range tests {
diff --git a/parser/parser.go b/parser/parser.go
index 6c3c750..0dc2b03 100644
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -24,7 +24,7 @@
Layer string `json:"layer,omitempty"`
Match string `json:"match,omitempty"`
Value string `json:"value,omitempty"`
- Rewrites []ast.Rewrite `json:"rewrites,omitempty"`
+ Rewrites []ast.Rewrite `json:"-"` // Handle manually
// Store any additional fields
Extra map[string]any `json:"-"`
}
@@ -37,13 +37,52 @@
return err
}
- // Create a temporary struct to unmarshal known fields
- type tempNode rawNode
+ // Create a temporary struct without the problematic fields
+ type tempNode struct {
+ Type string `json:"@type"`
+ Wrap json.RawMessage `json:"wrap,omitempty"`
+ Operands []rawNode `json:"operands,omitempty"`
+ Relation string `json:"relation,omitempty"`
+ Foundry string `json:"foundry,omitempty"`
+ Key string `json:"key,omitempty"`
+ Layer string `json:"layer,omitempty"`
+ Match string `json:"match,omitempty"`
+ Value string `json:"value,omitempty"`
+ }
+
var temp tempNode
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
- *r = rawNode(temp)
+
+ // Copy the fields
+ r.Type = temp.Type
+ r.Wrap = temp.Wrap
+ r.Operands = temp.Operands
+ r.Relation = temp.Relation
+ r.Foundry = temp.Foundry
+ r.Key = temp.Key
+ r.Layer = temp.Layer
+ r.Match = temp.Match
+ r.Value = temp.Value
+
+ // Handle rewrites manually
+ if rewritesData, exists := raw["rewrites"]; exists && rewritesData != nil {
+ if rewritesList, ok := rewritesData.([]any); ok {
+ r.Rewrites = make([]ast.Rewrite, len(rewritesList))
+ for i, rewriteData := range rewritesList {
+ rewriteBytes, err := json.Marshal(rewriteData)
+ if err != nil {
+ return fmt.Errorf("failed to marshal rewrite %d: %w", i, err)
+ }
+ var rewrite ast.Rewrite
+ if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
+ return fmt.Errorf("failed to unmarshal rewrite %d: %w", i, err)
+ }
+ r.Rewrites[i] = rewrite
+ }
+ }
+ }
// Store any fields not in the struct in Extra
r.Extra = make(map[string]any)