Fix corpus response parsing on "document"

Change-Id: Ib305c6430e997f5afdf07310844fab6c7f6443ac
diff --git a/mapper/benchmark_test.go b/mapper/benchmark_test.go
index fcf28a6..289cd8c 100644
--- a/mapper/benchmark_test.go
+++ b/mapper/benchmark_test.go
@@ -181,7 +181,7 @@
 func BenchmarkApplyQueryMappingsWorstCase(b *testing.B) {
 	// Create a mapper with many rules
 	manyRules := make([]config.MappingRule, 100)
-	for i := 0; i < 100; i++ {
+	for i := range 100 {
 		ruleChar := string(rune('A' + i%26))
 		manyRules[i] = config.MappingRule("[UNUSED" + ruleChar + "] <> [TARGET" + ruleChar + "]")
 	}
diff --git a/mapper/corpus.go b/mapper/corpus.go
index 9aad21e..f3f2b0c 100644
--- a/mapper/corpus.go
+++ b/mapper/corpus.go
@@ -1,7 +1,9 @@
 package mapper
 
 import (
+	"maps"
 	"regexp"
+	"slices"
 
 	"github.com/KorAP/Koral-Mapper/parser"
 )
@@ -390,12 +392,7 @@
 		return jsonData, nil
 	}
 
-	fieldsRaw, exists := jsonMap["fields"]
-	if !exists {
-		return jsonData, nil
-	}
-
-	fields, ok := fieldsRaw.([]any)
+	fieldsInDocument, fields, ok := extractResponseFieldsContainer(jsonMap)
 	if !ok {
 		return jsonData, nil
 	}
@@ -421,11 +418,52 @@
 		newFields = append(newFields, mapped...)
 	}
 
+	fieldValues := collectResponseFieldValues(fields)
+	newFields = append(newFields, m.matchGroupPatternsAndCollect(fieldValues, rules, opts)...)
+
 	result := shallowCopyMap(jsonMap)
-	result["fields"] = newFields
+	if !fieldsInDocument {
+		result["fields"] = newFields
+		return result, nil
+	}
+
+	if document, ok := jsonMap["document"].(map[string]any); ok {
+		documentCopy := shallowCopyMap(document)
+		documentCopy["fields"] = newFields
+		result["document"] = documentCopy
+	}
+
 	return result, nil
 }
 
+// extractResponseFieldsContainer finds the response field array either at
+// top-level ("fields") or in document-level ("document.fields").
+func extractResponseFieldsContainer(jsonMap map[string]any) (bool, []any, bool) {
+	if fieldsRaw, exists := jsonMap["fields"]; exists {
+		if fields, ok := fieldsRaw.([]any); ok {
+			return false, fields, true
+		}
+	}
+
+	documentRaw, exists := jsonMap["document"]
+	if !exists {
+		return false, nil, false
+	}
+	document, ok := documentRaw.(map[string]any)
+	if !ok {
+		return false, nil, false
+	}
+	fieldsRaw, exists := document["fields"]
+	if !exists {
+		return false, nil, false
+	}
+	fields, ok := fieldsRaw.([]any)
+	if !ok {
+		return false, nil, false
+	}
+	return true, fields, true
+}
+
 // matchFieldAndCollect matches a field's key/value against rules and returns mapped entries.
 // For array values, each element is matched individually.
 func (m *Mapper) matchFieldAndCollect(key string, value any, rules []*parser.CorpusMappingResult, opts MappingOptions) []any {
@@ -474,6 +512,118 @@
 	return results
 }
 
+// matchGroupPatternsAndCollect matches group-based rule patterns against the
+// complete set of response field values (e.g. AND combinations across
+// multi-valued textClass fields).
+func (m *Mapper) matchGroupPatternsAndCollect(values map[string][]string, rules []*parser.CorpusMappingResult, opts MappingOptions) []any {
+	var results []any
+
+	for _, rule := range rules {
+		var pattern, replacement parser.CorpusNode
+		if opts.Direction == AtoB {
+			pattern, replacement = rule.Upper, rule.Lower
+		} else {
+			pattern, replacement = rule.Lower, rule.Upper
+		}
+
+		if !patternNeedsAggregateMatching(pattern) {
+			continue
+		}
+		if !matchCorpusPatternAgainstValues(pattern, values) {
+			continue
+		}
+
+		results = append(results, collectReplacementFields(replacement)...)
+	}
+
+	return results
+}
+
+func collectResponseFieldValues(fields []any) map[string][]string {
+	values := make(map[string][]string)
+
+	for _, fieldRaw := range fields {
+		fieldMap, ok := fieldRaw.(map[string]any)
+		if !ok {
+			continue
+		}
+
+		fieldKey, _ := fieldMap["key"].(string)
+		if fieldKey == "" {
+			continue
+		}
+
+		switch v := fieldMap["value"].(type) {
+		case string:
+			values[fieldKey] = append(values[fieldKey], v)
+		case []any:
+			for _, elem := range v {
+				if s, ok := elem.(string); ok {
+					values[fieldKey] = append(values[fieldKey], s)
+				}
+			}
+		}
+	}
+
+	return values
+}
+
+func matchCorpusPatternAgainstValues(pattern parser.CorpusNode, values map[string][]string) bool {
+	switch p := pattern.(type) {
+	case *parser.CorpusField:
+		if p.Key == "" {
+			for key, keyValues := range values {
+				for _, value := range keyValues {
+					if matchCorpusField(p, map[string]any{"key": key, "value": value}) {
+						return true
+					}
+				}
+			}
+			return false
+		}
+		for _, value := range values[p.Key] {
+			if matchCorpusField(p, map[string]any{"key": p.Key, "value": value}) {
+				return true
+			}
+		}
+		return false
+
+	case *parser.CorpusGroup:
+		if p.Operation == "or" {
+			for _, op := range p.Operands {
+				if matchCorpusPatternAgainstValues(op, values) {
+					return true
+				}
+			}
+			return false
+		}
+
+		for _, op := range p.Operands {
+			if !matchCorpusPatternAgainstValues(op, values) {
+				return false
+			}
+		}
+		return true
+	}
+
+	return false
+}
+
+func patternNeedsAggregateMatching(pattern parser.CorpusNode) bool {
+	switch p := pattern.(type) {
+	case *parser.CorpusField:
+		return false
+	case *parser.CorpusGroup:
+		if p.Operation == "and" {
+			return true
+		}
+		if slices.ContainsFunc(p.Operands, patternNeedsAggregateMatching) {
+			return true
+		}
+	}
+	return false
+}
+
 // matchCorpusFieldPattern checks if a single response field matches a pattern.
 // Field patterns match directly. OR group patterns match if any operand matches.
 // AND group patterns cannot match a single field.
@@ -529,9 +679,7 @@
 
 func shallowCopyMap(m map[string]any) map[string]any {
 	result := make(map[string]any, len(m))
-	for k, v := range m {
-		result[k] = v
-	}
+	maps.Copy(result, m)
 	return result
 }
 
@@ -571,4 +719,3 @@
 		}
 	}
 }
-
diff --git a/mapper/corpus_test.go b/mapper/corpus_test.go
index c5e129c..27a1a3c 100644
--- a/mapper/corpus_test.go
+++ b/mapper/corpus_test.go
@@ -1,6 +1,8 @@
 package mapper
 
 import (
+	"encoding/json"
+	"os"
 	"testing"
 
 	"github.com/KorAP/Koral-Mapper/config"
@@ -598,6 +600,45 @@
 	assert.Equal(t, true, mapped2["mapped"])
 }
 
+func TestCorpusResponseWikiDeReKoFixtureEnrichment(t *testing.T) {
+	cfg, err := config.LoadFromSources("", []string{"../mappings/wiki-dereko.yaml"})
+	require.NoError(t, err)
+
+	m, err := NewMapper(cfg.Lists)
+	require.NoError(t, err)
+
+	raw, err := os.ReadFile("../testdata/corpus-response.json")
+	require.NoError(t, err)
+
+	var input map[string]any
+	require.NoError(t, json.Unmarshal(raw, &input))
+
+	result, err := m.ApplyResponseMappings("wiki-dereko", MappingOptions{Direction: BtoA}, input)
+	require.NoError(t, err)
+
+	resultMap := result.(map[string]any)
+	document := resultMap["document"].(map[string]any)
+	fields := document["fields"].([]any)
+
+	var wikiValues []string
+	for _, fieldRaw := range fields {
+		field, ok := fieldRaw.(map[string]any)
+		if !ok {
+			continue
+		}
+		key, _ := field["key"].(string)
+		if key != "wikiCat" {
+			continue
+		}
+		if value, ok := field["value"].(string); ok {
+			wikiValues = append(wikiValues, value)
+		}
+	}
+
+	assert.NotEmpty(t, wikiValues, "expected wiki categories to be enriched from textClass values")
+	assert.Contains(t, wikiValues, "Science")
+}
+
 func TestCorpusResponseRegexMatch(t *testing.T) {
 	m := newCorpusMapper(t, "textClass=wissenschaft.*#regex <> genre=science")
 
diff --git a/mapper/response.go b/mapper/response.go
index 14fd978..e870a29 100644
--- a/mapper/response.go
+++ b/mapper/response.go
@@ -2,6 +2,7 @@
 
 import (
 	"fmt"
+	"maps"
 	"strings"
 
 	"github.com/KorAP/Koral-Mapper/ast"
@@ -143,9 +144,7 @@
 
 	// Create a copy of the input data and update the snippet
 	result := make(map[string]any)
-	for k, v := range jsonMap {
-		result[k] = v
-	}
+	maps.Copy(result, jsonMap)
 	result["snippet"] = processedSnippet
 
 	return result, nil
@@ -242,9 +241,9 @@
 			trimmed := strings.TrimSpace(text)
 
 			if token, ok := tokenByStartPos[textPos]; ok && trimmed != "" && trimmed == token.Text {
-				trimStart := strings.Index(text, trimmed)
-				leadingWS := text[:trimStart]
-				trailingWS := text[trimStart+len(trimmed):]
+				before, after, _ := strings.Cut(text, trimmed)
+				leadingWS := before
+				trailingWS := after
 
 				result.WriteString(leadingWS)