Add koral:rewrite to query and corpus transformations

Change-Id: I97e3050d39b936256616bdf46203a784de6a3414
diff --git a/mapper/query.go b/mapper/query.go
index 4980f85..9aed2db 100644
--- a/mapper/query.go
+++ b/mapper/query.go
@@ -1,4 +1,4 @@
-package mapper // ApplyQueryMappings applies the specified mapping rules to a JSON object
+package mapper
 
 import (
 	"encoding/json"
@@ -9,9 +9,10 @@
 	"github.com/KorAP/Koral-Mapper/parser"
 )
 
-// ApplyQueryMappings applies the specified mapping rules to a JSON object
+// ApplyQueryMappings transforms a JSON query object using the mapping rules
+// identified by mappingID. The input may be a bare query node or a wrapper
+// object containing a "query" field; both forms are accepted.
 func (m *Mapper) ApplyQueryMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
-	// Validate mapping ID
 	if _, exists := m.mappingLists[mappingID]; !exists {
 		return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
 	}
@@ -20,10 +21,9 @@
 		return m.applyCorpusQueryMappings(mappingID, opts, jsonData)
 	}
 
-	// Get the parsed rules
 	rules := m.parsedQueryRules[mappingID]
 
-	// Check if we have a wrapper object with a "query" field
+	// Detect wrapper: input may be {"query": ...} or a bare koral:token
 	var queryData any
 	var hasQueryWrapper bool
 
@@ -34,20 +34,17 @@
 		}
 	}
 
-	// If no query wrapper was found, use the entire input
 	if !hasQueryWrapper {
-		// If the input itself is not a valid query object, return it as is
 		if !isValidQueryObject(jsonData) {
 			return jsonData, nil
 		}
 		queryData = jsonData
 	} else if queryData == nil || !isValidQueryObject(queryData) {
-		// If we have a query wrapper but the query is nil or not a valid object,
-		// return the original data
 		return jsonData, nil
 	}
 
-	// Store rewrites if they exist
+	// Strip pre-existing rewrites before AST conversion so they do not
+	// interfere with matching. They are restored after transformation.
 	var oldRewrites any
 	if queryMap, ok := queryData.(map[string]any); ok {
 		if rewrites, exists := queryMap["rewrites"]; exists {
@@ -56,7 +53,6 @@
 		}
 	}
 
-	// Convert input JSON to AST
 	jsonBytes, err := json.Marshal(queryData)
 	if err != nil {
 		return nil, fmt.Errorf("failed to marshal input JSON: %w", err)
@@ -67,7 +63,7 @@
 		return nil, fmt.Errorf("failed to parse JSON into AST: %w", err)
 	}
 
-	// Store whether the input was a Token
+	// Unwrap Token so matching operates on the inner node; re-wrapped later.
 	isToken := false
 	var tokenWrap ast.Node
 	if token, ok := node.(*ast.Token); ok {
@@ -76,15 +72,9 @@
 		node = tokenWrap
 	}
 
-	// Store original node for rewrite if needed
-	var originalNode ast.Node
-	if opts.AddRewrites {
-		originalNode = node.Clone()
-	}
-
-	// Pre-check foundry/layer overrides to optimize processing
+	// Resolve foundry/layer overrides per direction once, before the rule loop.
 	var patternFoundry, patternLayer, replacementFoundry, replacementLayer string
-	if opts.Direction { // true means AtoB
+	if opts.Direction {
 		patternFoundry, patternLayer = opts.FoundryA, opts.LayerA
 		replacementFoundry, replacementLayer = opts.FoundryB, opts.LayerB
 	} else {
@@ -92,7 +82,8 @@
 		replacementFoundry, replacementLayer = opts.FoundryA, opts.LayerA
 	}
 
-	// Create a pattern cache key for memoization
+	// patternCache avoids redundant Clone+Override for the same rule index
+	// and foundry/layer combination across repeated calls.
 	type patternCacheKey struct {
 		ruleIndex     int
 		foundry       string
@@ -101,11 +92,9 @@
 	}
 	patternCache := make(map[patternCacheKey]ast.Node)
 
-	// Apply each rule to the AST
 	for i, rule := range rules {
-		// Create pattern and replacement based on direction
 		var pattern, replacement ast.Node
-		if opts.Direction { // true means AtoB
+		if opts.Direction {
 			pattern = rule.Upper
 			replacement = rule.Lower
 		} else {
@@ -113,7 +102,6 @@
 			replacement = rule.Upper
 		}
 
-		// Extract the inner nodes from the pattern and replacement tokens
 		if token, ok := pattern.(*ast.Token); ok {
 			pattern = token.Wrap
 		}
@@ -121,52 +109,51 @@
 			replacement = token.Wrap
 		}
 
-		// Get or create pattern with overrides
 		patternKey := patternCacheKey{ruleIndex: i, foundry: patternFoundry, layer: patternLayer, isReplacement: false}
 		processedPattern, exists := patternCache[patternKey]
 		if !exists {
-			// Clone pattern only when needed
 			processedPattern = pattern.Clone()
-			// Apply foundry and layer overrides only if they're non-empty
 			if patternFoundry != "" || patternLayer != "" {
 				ast.ApplyFoundryAndLayerOverrides(processedPattern, patternFoundry, patternLayer)
 			}
 			patternCache[patternKey] = processedPattern
 		}
 
-		// Create a temporary matcher to check for actual matches
+		// Probe for a match before cloning the replacement (lazy evaluation)
 		tempMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: &ast.Term{}})
 		if err != nil {
 			return nil, fmt.Errorf("failed to create temporary matcher: %w", err)
 		}
-
-		// Only proceed if there's an actual match
 		if !tempMatcher.Match(node) {
 			continue
 		}
 
-		// Get or create replacement with overrides (lazy evaluation)
 		replacementKey := patternCacheKey{ruleIndex: i, foundry: replacementFoundry, layer: replacementLayer, isReplacement: true}
 		processedReplacement, exists := patternCache[replacementKey]
 		if !exists {
-			// Clone replacement only when we have a match
 			processedReplacement = replacement.Clone()
-			// Apply foundry and layer overrides only if they're non-empty
 			if replacementFoundry != "" || replacementLayer != "" {
 				ast.ApplyFoundryAndLayerOverrides(processedReplacement, replacementFoundry, replacementLayer)
 			}
 			patternCache[replacementKey] = processedReplacement
 		}
 
-		// Create the actual matcher and apply replacement
+		var beforeNode ast.Node
+		if opts.AddRewrites {
+			beforeNode = node.Clone()
+		}
+
 		actualMatcher, err := matcher.NewMatcher(ast.Pattern{Root: processedPattern}, ast.Replacement{Root: processedReplacement})
 		if err != nil {
 			return nil, fmt.Errorf("failed to create matcher: %w", err)
 		}
 		node = actualMatcher.Replace(node)
+
+		if opts.AddRewrites {
+			recordRewrites(node, beforeNode)
+		}
 	}
 
-	// Wrap the result in a token if the input was a token
 	var result ast.Node
 	if isToken {
 		result = &ast.Token{Wrap: node}
@@ -174,45 +161,23 @@
 		result = node
 	}
 
-	// Convert AST back to JSON
 	resultBytes, err := parser.SerializeToJSON(result)
 	if err != nil {
 		return nil, fmt.Errorf("failed to serialize AST to JSON: %w", err)
 	}
 
-	// Parse the JSON string back into
 	var resultData any
 	if err := json.Unmarshal(resultBytes, &resultData); err != nil {
 		return nil, fmt.Errorf("failed to parse result JSON: %w", err)
 	}
 
-	// Add rewrites if enabled and node was changed
-	if opts.AddRewrites && !ast.NodesEqual(node, originalNode) {
-		rewrite := buildQueryRewrite(originalNode, node)
-
-		// Add rewrite to the node
-		if resultMap, ok := resultData.(map[string]any); ok {
-			if wrapMap, ok := resultMap["wrap"].(map[string]any); ok {
-				rewrites, exists := wrapMap["rewrites"]
-				if !exists {
-					rewrites = []any{}
-				}
-				if rewritesList, ok := rewrites.([]any); ok {
-					wrapMap["rewrites"] = append(rewritesList, rewrite)
-				} else {
-					wrapMap["rewrites"] = []any{rewrite}
-				}
-			}
-		}
-	}
-
-	// Restore rewrites if they existed
+	// Restore pre-existing rewrites. The round-trip through ast.Rewrite
+	// normalizes legacy field names (e.g. "source" -> "editor") so the
+	// output always uses the modern schema.
 	if oldRewrites != nil {
-		// Process old rewrites through AST to ensure backward compatibility
 		if rewritesList, ok := oldRewrites.([]any); ok {
 			processedRewrites := make([]any, len(rewritesList))
 			for i, rewriteData := range rewritesList {
-				// Marshal and unmarshal each rewrite to apply backward compatibility
 				rewriteBytes, err := json.Marshal(rewriteData)
 				if err != nil {
 					return nil, fmt.Errorf("failed to marshal old rewrite %d: %w", i, err)
@@ -221,7 +186,6 @@
 				if err := json.Unmarshal(rewriteBytes, &rewrite); err != nil {
 					return nil, fmt.Errorf("failed to unmarshal old rewrite %d: %w", i, err)
 				}
-				// Marshal back to get the transformed version
 				transformedBytes, err := json.Marshal(&rewrite)
 				if err != nil {
 					return nil, fmt.Errorf("failed to marshal transformed rewrite %d: %w", i, err)
@@ -236,14 +200,12 @@
 				resultMap["rewrites"] = processedRewrites
 			}
 		} else {
-			// If it's not a list, restore as-is
 			if resultMap, ok := resultData.(map[string]any); ok {
 				resultMap["rewrites"] = oldRewrites
 			}
 		}
 	}
 
-	// If we had a query wrapper, put the transformed data back in it
 	if hasQueryWrapper {
 		if wrapper, ok := jsonData.(map[string]any); ok {
 			wrapper["query"] = resultData
@@ -254,48 +216,102 @@
 	return resultData, nil
 }
 
-// buildQueryRewrite creates a rewrite entry for a query-level transformation
-// by comparing the original and new AST nodes.
-func buildQueryRewrite(originalNode, newNode ast.Node) map[string]any {
+// recordRewrites compares the new node against the before-snapshot and
+// attaches rewrite entries to any changed nodes. It handles both simple
+// nodes (Term, TermGroup) and container nodes (CatchallNode with operands).
+func recordRewrites(newNode, beforeNode ast.Node) {
+	if ast.NodesEqual(newNode, beforeNode) {
+		return
+	}
+
+	// For CatchallNodes with operands (e.g. token sequences), attach
+	// per-operand rewrites so each changed token gets its own annotation.
+	if newCatchall, ok := newNode.(*ast.CatchallNode); ok {
+		if oldCatchall, ok := beforeNode.(*ast.CatchallNode); ok && len(newCatchall.Operands) > 0 {
+			for i, newOp := range newCatchall.Operands {
+				if i >= len(oldCatchall.Operands) {
+					break
+				}
+				oldOp := oldCatchall.Operands[i]
+				recordRewritesForOperand(newOp, oldOp)
+			}
+			return
+		}
+	}
+
+	addRewriteToNode(newNode, beforeNode)
+}
+
+// recordRewritesForOperand handles rewrite recording for a single operand,
+// unwrapping Token nodes so the rewrite attaches to the inner term/termGroup
+// rather than the token wrapper.
+func recordRewritesForOperand(newOp, oldOp ast.Node) {
+	if ast.NodesEqual(newOp, oldOp) {
+		return
+	}
+
+	newInner := newOp
+	oldInner := oldOp
+	if tok, ok := newOp.(*ast.Token); ok {
+		newInner = tok.Wrap
+	}
+	if tok, ok := oldOp.(*ast.Token); ok {
+		oldInner = tok.Wrap
+	}
+
+	if newInner == nil || ast.NodesEqual(newInner, oldInner) {
+		return
+	}
+
+	addRewriteToNode(newInner, oldInner)
+}
+
+// addRewriteToNode creates and attaches a rewrite entry to a node,
+// recording what the node looked like before the change.
+func addRewriteToNode(newNode, originalNode ast.Node) {
+	rw := buildRewrite(originalNode, newNode)
+	ast.AppendRewrite(newNode, rw)
+}
+
+// buildRewrite creates a Rewrite describing what changed between
+// originalNode and newNode. For simple term-level changes (just foundry,
+// layer, key, or value), it uses a scoped rewrite. For structural changes,
+// it stores the full original as an object.
+func buildRewrite(originalNode, newNode ast.Node) ast.Rewrite {
 	if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(newNode) && originalNode.Type() == newNode.Type() {
 		newTerm := newNode.(*ast.Term)
 		if term.Foundry != newTerm.Foundry {
-			return newRewriteEntry("foundry", term.Foundry)
+			return ast.Rewrite{Editor: RewriteEditor, Scope: "foundry", Original: term.Foundry}
 		}
 		if term.Layer != newTerm.Layer {
-			return newRewriteEntry("layer", term.Layer)
+			return ast.Rewrite{Editor: RewriteEditor, Scope: "layer", Original: term.Layer}
 		}
 		if term.Key != newTerm.Key {
-			return newRewriteEntry("key", term.Key)
+			return ast.Rewrite{Editor: RewriteEditor, Scope: "key", Original: term.Key}
 		}
 		if term.Value != newTerm.Value {
-			return newRewriteEntry("value", term.Value)
+			return ast.Rewrite{Editor: RewriteEditor, Scope: "value", Original: term.Value}
 		}
 	}
 
+	// Structural change: serialize the original as the rewrite value
 	originalBytes, err := parser.SerializeToJSON(originalNode)
 	if err != nil {
-		return newRewriteEntry("", nil)
+		return ast.Rewrite{Editor: RewriteEditor}
 	}
 	var originalJSON any
 	if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
-		return newRewriteEntry("", nil)
+		return ast.Rewrite{Editor: RewriteEditor}
 	}
-	return newRewriteEntry("", originalJSON)
+	return ast.Rewrite{Editor: RewriteEditor, Original: originalJSON}
 }
 
-// isValidQueryObject checks if the query data is a valid object that can be processed
+// isValidQueryObject returns true if data is a JSON object with an @type field.
 func isValidQueryObject(data any) bool {
-	// Check if it's a map
 	queryMap, ok := data.(map[string]any)
 	if !ok {
 		return false
 	}
-
-	// Check if it has the required @type field
-	if _, ok := queryMap["@type"]; !ok {
-		return false
-	}
-
-	return true
+	_, ok = queryMap["@type"]
+	return ok
 }