Support corpus mappings
Change-Id: I25e987b0ca668a1cf733424b22edb4f0fca37bf2
diff --git a/mapper/corpus.go b/mapper/corpus.go
new file mode 100644
index 0000000..d45dc92
--- /dev/null
+++ b/mapper/corpus.go
@@ -0,0 +1,344 @@
+package mapper
+
+import (
+ "regexp"
+
+ "github.com/KorAP/Koral-Mapper/parser"
+)
+
+// applyCorpusQueryMappings processes corpus/collection section with corpus rules.
+func (m *Mapper) applyCorpusQueryMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
+ rules := m.parsedCorpusRules[mappingID]
+
+ jsonMap, ok := jsonData.(map[string]any)
+ if !ok {
+ return jsonData, nil
+ }
+
+ // Find corpus or collection attribute
+ corpusKey := ""
+ if _, exists := jsonMap["corpus"]; exists {
+ corpusKey = "corpus"
+ } else if _, exists := jsonMap["collection"]; exists {
+ corpusKey = "collection"
+ }
+
+ if corpusKey == "" {
+ return jsonData, nil
+ }
+
+ corpusData, ok := jsonMap[corpusKey].(map[string]any)
+ if !ok {
+ return jsonData, nil
+ }
+
+ result := shallowCopyMap(jsonMap)
+ rewritten := m.rewriteCorpusNode(corpusData, rules, opts)
+ result[corpusKey] = rewritten
+
+ return result, nil
+}
+
+// rewriteCorpusNode recursively walks a corpus tree and applies matching rules.
+func (m *Mapper) rewriteCorpusNode(node map[string]any, rules []*parser.CorpusMappingResult, opts MappingOptions) any {
+ atType, _ := node["@type"].(string)
+
+ switch atType {
+ case "koral:doc", "koral:field":
+ return m.rewriteCorpusDoc(node, rules, opts)
+ case "koral:docGroup", "koral:fieldGroup":
+ return m.rewriteCorpusDocGroup(node, rules, opts)
+ case "koral:docGroupRef":
+ return node
+ default:
+ return node
+ }
+}
+
+// rewriteCorpusDoc attempts to match a koral:doc node against rules and replace it.
+func (m *Mapper) rewriteCorpusDoc(node map[string]any, rules []*parser.CorpusMappingResult, opts MappingOptions) any {
+ for _, rule := range rules {
+ var pattern, replacement parser.CorpusNode
+ if opts.Direction == AtoB {
+ pattern, replacement = rule.Upper, rule.Lower
+ } else {
+ pattern, replacement = rule.Lower, rule.Upper
+ }
+
+ patternField, ok := pattern.(*parser.CorpusField)
+ if !ok {
+ continue
+ }
+
+ if !matchCorpusField(patternField, node) {
+ continue
+ }
+
+ replaced := buildReplacementFromNode(replacement, node)
+
+ if opts.AddRewrites {
+ addCorpusRewrite(replaced, node)
+ }
+
+ return replaced
+ }
+
+ return node
+}
+
+// rewriteCorpusDocGroup recursively rewrites operands of a koral:docGroup.
+func (m *Mapper) rewriteCorpusDocGroup(node map[string]any, rules []*parser.CorpusMappingResult, opts MappingOptions) any {
+ result := shallowCopyMap(node)
+
+ operandsRaw, ok := node["operands"].([]any)
+ if !ok {
+ return result
+ }
+
+ newOperands := make([]any, len(operandsRaw))
+ for i, opRaw := range operandsRaw {
+ opMap, ok := opRaw.(map[string]any)
+ if !ok {
+ newOperands[i] = opRaw
+ continue
+ }
+ newOperands[i] = m.rewriteCorpusNode(opMap, rules, opts)
+ }
+ result["operands"] = newOperands
+
+ return result
+}
+
+// matchCorpusField checks if a koral:doc JSON node matches a CorpusField pattern.
+func matchCorpusField(pattern *parser.CorpusField, doc map[string]any) bool {
+ docKey, _ := doc["key"].(string)
+ if docKey != pattern.Key {
+ return false
+ }
+
+ docValue, _ := doc["value"].(string)
+ if pattern.Type == "regex" {
+ re, err := regexp.Compile("^" + pattern.Value + "$")
+ if err != nil {
+ return false
+ }
+ if !re.MatchString(docValue) {
+ return false
+ }
+ } else if docValue != pattern.Value {
+ return false
+ }
+
+ if pattern.Match != "" {
+ docMatch, _ := doc["match"].(string)
+ expected := "match:" + pattern.Match
+ if docMatch != expected {
+ return false
+ }
+ }
+
+ if pattern.Type != "" && pattern.Type != "regex" {
+ docType, _ := doc["type"].(string)
+ expected := "type:" + pattern.Type
+ if docType != "" && docType != expected {
+ return false
+ }
+ }
+
+ return true
+}
+
+// buildReplacementFromNode builds a replacement JSON structure from a CorpusNode pattern.
+// Preserves match and type from the original doc when the rule doesn't specify them.
+func buildReplacementFromNode(replacement parser.CorpusNode, originalDoc map[string]any) any {
+ switch r := replacement.(type) {
+ case *parser.CorpusField:
+ result := map[string]any{
+ "@type": originalDoc["@type"],
+ "key": r.Key,
+ "value": r.Value,
+ }
+
+ if r.Match != "" {
+ result["match"] = "match:" + r.Match
+ } else if m, ok := originalDoc["match"]; ok {
+ result["match"] = m
+ }
+
+ if r.Type != "" {
+ result["type"] = "type:" + r.Type
+ } else if t, ok := originalDoc["type"]; ok {
+ result["type"] = t
+ }
+
+ return result
+
+ case *parser.CorpusGroup:
+ operands := make([]any, len(r.Operands))
+ for i, op := range r.Operands {
+ operands[i] = buildReplacementFromNode(op, originalDoc)
+ }
+ return map[string]any{
+ "@type": "koral:docGroup",
+ "operation": "operation:" + r.Operation,
+ "operands": operands,
+ }
+
+ default:
+ return originalDoc
+ }
+}
+
+// addCorpusRewrite adds a koral:rewrite annotation to the replaced node.
+func addCorpusRewrite(replaced any, original map[string]any) {
+ replacedMap, ok := replaced.(map[string]any)
+ if !ok {
+ return
+ }
+
+ origKey, _ := original["key"].(string)
+ newKey, _ := replacedMap["key"].(string)
+
+ var rewrite map[string]any
+ if origKey != newKey && origKey != "" {
+ rewrite = newRewriteEntry("key", origKey)
+ } else {
+ origValue, _ := original["value"].(string)
+ rewrite = newRewriteEntry("value", origValue)
+ }
+
+ replacedMap["rewrites"] = []any{rewrite}
+}
+
+// applyCorpusResponseMappings processes fields arrays with corpus rules.
+func (m *Mapper) applyCorpusResponseMappings(mappingID string, opts MappingOptions, jsonData any) (any, error) {
+ rules := m.parsedCorpusRules[mappingID]
+
+ jsonMap, ok := jsonData.(map[string]any)
+ if !ok {
+ return jsonData, nil
+ }
+
+ fieldsRaw, exists := jsonMap["fields"]
+ if !exists {
+ return jsonData, nil
+ }
+
+ fields, ok := fieldsRaw.([]any)
+ if !ok {
+ return jsonData, nil
+ }
+
+ var newFields []any
+ for _, fieldRaw := range fields {
+ newFields = append(newFields, fieldRaw)
+
+ fieldMap, ok := fieldRaw.(map[string]any)
+ if !ok {
+ continue
+ }
+
+ atType, _ := fieldMap["@type"].(string)
+ if atType != "koral:field" && atType != "koral:doc" {
+ continue
+ }
+
+ fieldKey, _ := fieldMap["key"].(string)
+ fieldValue := fieldMap["value"]
+
+ mapped := m.matchFieldAndCollect(fieldKey, fieldValue, rules, opts)
+ newFields = append(newFields, mapped...)
+ }
+
+ result := shallowCopyMap(jsonMap)
+ result["fields"] = newFields
+ return result, nil
+}
+
+// matchFieldAndCollect matches a field's key/value against rules and returns mapped entries.
+// For array values, each element is matched individually.
+func (m *Mapper) matchFieldAndCollect(key string, value any, rules []*parser.CorpusMappingResult, opts MappingOptions) []any {
+ var results []any
+
+ switch v := value.(type) {
+ case string:
+ results = append(results, m.matchSingleValue(key, v, rules, opts)...)
+ case []any:
+ for _, elem := range v {
+ if s, ok := elem.(string); ok {
+ results = append(results, m.matchSingleValue(key, s, rules, opts)...)
+ }
+ }
+ }
+
+ return results
+}
+
+// matchSingleValue checks a single key+value pair against all rules and returns mapped field entries.
+func (m *Mapper) matchSingleValue(key, value string, rules []*parser.CorpusMappingResult, opts MappingOptions) []any {
+ var results []any
+
+ pseudoDoc := map[string]any{
+ "key": key,
+ "value": value,
+ }
+
+ for _, rule := range rules {
+ var pattern, replacement parser.CorpusNode
+ if opts.Direction == AtoB {
+ pattern, replacement = rule.Upper, rule.Lower
+ } else {
+ pattern, replacement = rule.Lower, rule.Upper
+ }
+
+ patternField, ok := pattern.(*parser.CorpusField)
+ if !ok {
+ continue
+ }
+
+ if !matchCorpusField(patternField, pseudoDoc) {
+ continue
+ }
+
+ results = append(results, collectReplacementFields(replacement)...)
+ }
+
+ return results
+}
+
+// collectReplacementFields flattens a replacement CorpusNode into individual mapped field entries.
+func collectReplacementFields(node parser.CorpusNode) []any {
+ var results []any
+
+ switch n := node.(type) {
+ case *parser.CorpusField:
+ entry := map[string]any{
+ "@type": "koral:field",
+ "key": n.Key,
+ "value": n.Value,
+ "mapped": true,
+ }
+ if n.Type != "" {
+ entry["type"] = "type:" + n.Type
+ } else {
+ entry["type"] = "type:string"
+ }
+ results = append(results, entry)
+
+ case *parser.CorpusGroup:
+ for _, op := range n.Operands {
+ results = append(results, collectReplacementFields(op)...)
+ }
+ }
+
+ return results
+}
+
+func shallowCopyMap(m map[string]any) map[string]any {
+ result := make(map[string]any, len(m))
+ for k, v := range m {
+ result[k] = v
+ }
+ return result
+}
+
diff --git a/mapper/corpus_test.go b/mapper/corpus_test.go
new file mode 100644
index 0000000..4f607e7
--- /dev/null
+++ b/mapper/corpus_test.go
@@ -0,0 +1,686 @@
+package mapper
+
+import (
+ "testing"
+
+ "github.com/KorAP/Koral-Mapper/config"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func newCorpusMapper(t *testing.T, rules ...string) *Mapper {
+ t.Helper()
+ mappingRules := make([]config.MappingRule, len(rules))
+ for i, r := range rules {
+ mappingRules[i] = config.MappingRule(r)
+ }
+ m, err := NewMapper([]config.MappingList{{
+ ID: "corpus-test",
+ Type: "corpus",
+ Mappings: mappingRules,
+ }})
+ require.NoError(t, err)
+ return m
+}
+
+// --- Corpus query mapping tests ---
+
+func TestCorpusQuerySimpleFieldRewrite(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "koral:doc", corpus["@type"])
+ assert.Equal(t, "genre", corpus["key"])
+ assert.Equal(t, "fiction", corpus["value"])
+ assert.Equal(t, "match:eq", corpus["match"])
+}
+
+func TestCorpusQueryNoMatch(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "science",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "textClass", corpus["key"])
+ assert.Equal(t, "science", corpus["value"])
+}
+
+func TestCorpusQueryBtoA(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "genre",
+ "value": "fiction",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: BtoA}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "textClass", corpus["key"])
+ assert.Equal(t, "novel", corpus["value"])
+}
+
+func TestCorpusQueryDocGroupRecursive(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": []any{
+ map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ "match": "match:eq",
+ },
+ map[string]any{
+ "@type": "koral:doc",
+ "key": "author",
+ "value": "Fontane",
+ "match": "match:eq",
+ },
+ },
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "koral:docGroup", corpus["@type"])
+ assert.Equal(t, "operation:and", corpus["operation"])
+
+ operands := corpus["operands"].([]any)
+ require.Len(t, operands, 2)
+
+ first := operands[0].(map[string]any)
+ assert.Equal(t, "genre", first["key"])
+ assert.Equal(t, "fiction", first["value"])
+
+ second := operands[1].(map[string]any)
+ assert.Equal(t, "author", second["key"])
+ assert.Equal(t, "Fontane", second["value"])
+}
+
+func TestCorpusQueryDocGroupRefPassthrough(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:docGroupRef",
+ "ref": "https://korap.ids-mannheim.de/@ndiewald/MyCorpus",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "koral:docGroupRef", corpus["@type"])
+ assert.Equal(t, "https://korap.ids-mannheim.de/@ndiewald/MyCorpus", corpus["ref"])
+}
+
+func TestCorpusQueryFieldAlias(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:field",
+ "key": "textClass",
+ "value": "novel",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "genre", corpus["key"])
+ assert.Equal(t, "fiction", corpus["value"])
+}
+
+func TestCorpusQueryFieldGroupAlias(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:fieldGroup",
+ "operation": "operation:and",
+ "operands": []any{
+ map[string]any{
+ "@type": "koral:field",
+ "key": "textClass",
+ "value": "novel",
+ },
+ },
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ operands := corpus["operands"].([]any)
+ first := operands[0].(map[string]any)
+ assert.Equal(t, "genre", first["key"])
+}
+
+func TestCorpusQueryCollectionAttribute(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "collection": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["collection"].(map[string]any)
+ assert.Equal(t, "genre", corpus["key"])
+ assert.Equal(t, "fiction", corpus["value"])
+}
+
+func TestCorpusQuerySingleToGroupReplacement(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> (genre=fiction & type=book)")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "koral:docGroup", corpus["@type"])
+ assert.Equal(t, "operation:and", corpus["operation"])
+
+ operands := corpus["operands"].([]any)
+ require.Len(t, operands, 2)
+ assert.Equal(t, "genre", operands[0].(map[string]any)["key"])
+ assert.Equal(t, "type", operands[1].(map[string]any)["key"])
+}
+
+func TestCorpusQueryRegexMatch(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=wissenschaft.*#regex <> genre=science")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "wissenschaft-populaer",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "genre", corpus["key"])
+ assert.Equal(t, "science", corpus["value"])
+}
+
+func TestCorpusQueryRegexNoMatch(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=wissenschaft.*#regex <> genre=science")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "belletristik",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "textClass", corpus["key"])
+ assert.Equal(t, "belletristik", corpus["value"])
+}
+
+func TestCorpusQueryMatchTypeFilter(t *testing.T) {
+ m := newCorpusMapper(t, "pubDate=2020:geq <> yearFrom=2020:geq")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "pubDate",
+ "value": "2020",
+ "match": "match:geq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "yearFrom", corpus["key"])
+ assert.Equal(t, "match:geq", corpus["match"])
+}
+
+func TestCorpusQueryMatchTypeFilterNoMatchTest(t *testing.T) {
+ m := newCorpusMapper(t, "pubDate=2020 <> yearFrom=2020")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "pubDate",
+ "value": "2020",
+ "match": "match:geq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "yearFrom", corpus["key"])
+ assert.Equal(t, "match:geq", corpus["match"])
+}
+
+func TestCorpusQueryMatchTypeFilterNoMatch(t *testing.T) {
+ m := newCorpusMapper(t, "pubDate=2020:geq <> yearFrom=2020:geq")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "pubDate",
+ "value": "2020",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "pubDate", corpus["key"])
+}
+
+func TestCorpusQueryRewriteAnnotation(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ "match": "match:eq",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB, AddRewrites: true}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "genre", corpus["key"])
+
+ rewrites, ok := corpus["rewrites"].([]any)
+ require.True(t, ok)
+ require.Len(t, rewrites, 1)
+
+ rewrite := rewrites[0].(map[string]any)
+ assert.Equal(t, "koral:rewrite", rewrite["@type"])
+ assert.Equal(t, "Koral-Mapper", rewrite["editor"])
+}
+
+func TestCorpusQueryPreservesMatchTypeFromOriginal(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ "match": "match:contains",
+ "type": "type:string",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "genre", corpus["key"])
+ assert.Equal(t, "match:contains", corpus["match"])
+ assert.Equal(t, "type:string", corpus["type"])
+}
+
+func TestCorpusQueryNoCorpusSection(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "query": map[string]any{"@type": "koral:token"},
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+ assert.Equal(t, input, result)
+}
+
+func TestCorpusQueryMultipleRules(t *testing.T) {
+ m := newCorpusMapper(t,
+ "textClass=novel <> genre=fiction",
+ "textClass=science <> genre=nonfiction",
+ )
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:docGroup",
+ "operation": "operation:or",
+ "operands": []any{
+ map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ },
+ map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "science",
+ },
+ },
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ operands := corpus["operands"].([]any)
+ require.Len(t, operands, 2)
+ assert.Equal(t, "genre", operands[0].(map[string]any)["key"])
+ assert.Equal(t, "fiction", operands[0].(map[string]any)["value"])
+ assert.Equal(t, "genre", operands[1].(map[string]any)["key"])
+ assert.Equal(t, "nonfiction", operands[1].(map[string]any)["value"])
+}
+
+func TestCorpusQueryNestedDocGroups(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": []any{
+ map[string]any{
+ "@type": "koral:docGroup",
+ "operation": "operation:or",
+ "operands": []any{
+ map[string]any{
+ "@type": "koral:doc",
+ "key": "textClass",
+ "value": "novel",
+ },
+ },
+ },
+ map[string]any{
+ "@type": "koral:doc",
+ "key": "author",
+ "value": "Fontane",
+ },
+ },
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ outerOperands := corpus["operands"].([]any)
+ innerGroup := outerOperands[0].(map[string]any)
+ innerOperands := innerGroup["operands"].([]any)
+ assert.Equal(t, "genre", innerOperands[0].(map[string]any)["key"])
+}
+
+// --- Corpus response mapping tests ---
+
+func TestCorpusResponseSimpleFieldEnrichment(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "fields": []any{
+ map[string]any{
+ "@type": "koral:field",
+ "key": "genre",
+ "value": "fiction",
+ "type": "type:string",
+ },
+ },
+ }
+ result, err := m.ApplyResponseMappings("corpus-test", MappingOptions{Direction: BtoA}, input)
+ require.NoError(t, err)
+
+ fields := result.(map[string]any)["fields"].([]any)
+ require.Len(t, fields, 2)
+
+ original := fields[0].(map[string]any)
+ assert.Equal(t, "genre", original["key"])
+
+ mapped := fields[1].(map[string]any)
+ assert.Equal(t, "textClass", mapped["key"])
+ assert.Equal(t, "novel", mapped["value"])
+ assert.Equal(t, true, mapped["mapped"])
+}
+
+func TestCorpusResponseNoMatch(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "fields": []any{
+ map[string]any{
+ "@type": "koral:field",
+ "key": "author",
+ "value": "Fontane",
+ "type": "type:string",
+ },
+ },
+ }
+ result, err := m.ApplyResponseMappings("corpus-test", MappingOptions{Direction: BtoA}, input)
+ require.NoError(t, err)
+
+ fields := result.(map[string]any)["fields"].([]any)
+ require.Len(t, fields, 1)
+}
+
+func TestCorpusResponseMultiValuedField(t *testing.T) {
+ m := newCorpusMapper(t,
+ "textClass=wissenschaft <> genre=science",
+ "textClass=populaerwissenschaft <> genre=popsci",
+ )
+
+ input := map[string]any{
+ "fields": []any{
+ map[string]any{
+ "@type": "koral:field",
+ "key": "textClass",
+ "value": []any{"wissenschaft", "populaerwissenschaft"},
+ "type": "type:keywords",
+ },
+ },
+ }
+ result, err := m.ApplyResponseMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ fields := result.(map[string]any)["fields"].([]any)
+ require.Len(t, fields, 3)
+
+ mapped1 := fields[1].(map[string]any)
+ assert.Equal(t, "genre", mapped1["key"])
+ assert.Equal(t, "science", mapped1["value"])
+ assert.Equal(t, true, mapped1["mapped"])
+
+ mapped2 := fields[2].(map[string]any)
+ assert.Equal(t, "genre", mapped2["key"])
+ assert.Equal(t, "popsci", mapped2["value"])
+ assert.Equal(t, true, mapped2["mapped"])
+}
+
+func TestCorpusResponseRegexMatch(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=wissenschaft.*#regex <> genre=science")
+
+ input := map[string]any{
+ "fields": []any{
+ map[string]any{
+ "@type": "koral:field",
+ "key": "textClass",
+ "value": "wissenschaft-populaer",
+ "type": "type:string",
+ },
+ },
+ }
+ result, err := m.ApplyResponseMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ fields := result.(map[string]any)["fields"].([]any)
+ require.Len(t, fields, 2)
+
+ mapped := fields[1].(map[string]any)
+ assert.Equal(t, "genre", mapped["key"])
+ assert.Equal(t, "science", mapped["value"])
+}
+
+func TestCorpusResponseDocTypeAlias(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "fields": []any{
+ map[string]any{
+ "@type": "koral:doc",
+ "key": "genre",
+ "value": "fiction",
+ "type": "type:string",
+ },
+ },
+ }
+ result, err := m.ApplyResponseMappings("corpus-test", MappingOptions{Direction: BtoA}, input)
+ require.NoError(t, err)
+
+ fields := result.(map[string]any)["fields"].([]any)
+ require.Len(t, fields, 2)
+
+ mapped := fields[1].(map[string]any)
+ assert.Equal(t, "textClass", mapped["key"])
+}
+
+func TestCorpusResponseGroupReplacement(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> (genre=fiction & type=book)")
+
+ input := map[string]any{
+ "fields": []any{
+ map[string]any{
+ "@type": "koral:field",
+ "key": "textClass",
+ "value": "novel",
+ "type": "type:string",
+ },
+ },
+ }
+ result, err := m.ApplyResponseMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ fields := result.(map[string]any)["fields"].([]any)
+ require.Len(t, fields, 3)
+
+ mapped1 := fields[1].(map[string]any)
+ assert.Equal(t, "genre", mapped1["key"])
+ assert.Equal(t, "fiction", mapped1["value"])
+ assert.Equal(t, true, mapped1["mapped"])
+
+ mapped2 := fields[2].(map[string]any)
+ assert.Equal(t, "type", mapped2["key"])
+ assert.Equal(t, "book", mapped2["value"])
+ assert.Equal(t, true, mapped2["mapped"])
+}
+
+func TestCorpusResponseNoFieldsSection(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "snippet": "<span>test</span>",
+ }
+ result, err := m.ApplyResponseMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+ assert.Equal(t, input, result)
+}
+
+func TestCorpusResponseDirectionAtoB(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+
+ input := map[string]any{
+ "fields": []any{
+ map[string]any{
+ "@type": "koral:field",
+ "key": "textClass",
+ "value": "novel",
+ "type": "type:string",
+ },
+ },
+ }
+ result, err := m.ApplyResponseMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ fields := result.(map[string]any)["fields"].([]any)
+ require.Len(t, fields, 2)
+
+ mapped := fields[1].(map[string]any)
+ assert.Equal(t, "genre", mapped["key"])
+ assert.Equal(t, "fiction", mapped["value"])
+}
+
+func TestCorpusQueryValueTypeInReplacement(t *testing.T) {
+ m := newCorpusMapper(t, "pubDate=2020-01#date <> publicationYear=2020#string")
+
+ input := map[string]any{
+ "corpus": map[string]any{
+ "@type": "koral:doc",
+ "key": "pubDate",
+ "value": "2020-01",
+ "match": "match:eq",
+ "type": "type:date",
+ },
+ }
+ result, err := m.ApplyQueryMappings("corpus-test", MappingOptions{Direction: AtoB}, input)
+ require.NoError(t, err)
+
+ corpus := result.(map[string]any)["corpus"].(map[string]any)
+ assert.Equal(t, "publicationYear", corpus["key"])
+ assert.Equal(t, "2020", corpus["value"])
+ assert.Equal(t, "type:string", corpus["type"])
+}
+
+func TestCorpusQueryMappingListNotFound(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+ _, err := m.ApplyQueryMappings("nonexistent", MappingOptions{Direction: AtoB}, map[string]any{})
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "not found")
+}
+
+func TestCorpusResponseMappingListNotFound(t *testing.T) {
+ m := newCorpusMapper(t, "textClass=novel <> genre=fiction")
+ _, err := m.ApplyResponseMappings("nonexistent", MappingOptions{Direction: AtoB}, map[string]any{})
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "not found")
+}
diff --git a/mapper/mapper.go b/mapper/mapper.go
index d11ae01..e55e77a 100644
--- a/mapper/mapper.go
+++ b/mapper/mapper.go
@@ -13,8 +13,25 @@
const (
AtoB Direction = true
BtoA Direction = false
+
+ RewriteEditor = "Koral-Mapper"
)
+// newRewriteEntry creates a koral:rewrite annotation entry.
+func newRewriteEntry(scope string, original any) map[string]any {
+ r := map[string]any{
+ "@type": "koral:rewrite",
+ "editor": RewriteEditor,
+ }
+ if scope != "" {
+ r["scope"] = scope
+ }
+ if original != nil {
+ r["original"] = original
+ }
+ return r
+}
+
// String converts the Direction to its string representation
func (d Direction) String() string {
if d {
@@ -37,15 +54,17 @@
// Mapper handles the application of mapping rules to JSON objects
type Mapper struct {
- mappingLists map[string]*config.MappingList
- parsedRules map[string][]*parser.MappingResult
+ mappingLists map[string]*config.MappingList
+ parsedQueryRules map[string][]*parser.MappingResult
+ parsedCorpusRules map[string][]*parser.CorpusMappingResult
}
// NewMapper creates a new Mapper instance from a list of MappingLists
func NewMapper(lists []config.MappingList) (*Mapper, error) {
m := &Mapper{
- mappingLists: make(map[string]*config.MappingList),
- parsedRules: make(map[string][]*parser.MappingResult),
+ mappingLists: make(map[string]*config.MappingList),
+ parsedQueryRules: make(map[string][]*parser.MappingResult),
+ parsedCorpusRules: make(map[string][]*parser.CorpusMappingResult),
}
// Store mapping lists by ID
@@ -54,16 +73,22 @@
return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
}
- // Create a copy of the list to store
listCopy := list
m.mappingLists[list.ID] = &listCopy
- // Parse the rules immediately
- parsedRules, err := list.ParseMappings()
- if err != nil {
- return nil, fmt.Errorf("failed to parse mappings for list %s: %w", list.ID, err)
+ if list.IsCorpus() {
+ corpusRules, err := list.ParseCorpusMappings()
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse corpus mappings for list %s: %w", list.ID, err)
+ }
+ m.parsedCorpusRules[list.ID] = corpusRules
+ } else {
+ queryRules, err := list.ParseMappings()
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse mappings for list %s: %w", list.ID, err)
+ }
+ m.parsedQueryRules[list.ID] = queryRules
}
- m.parsedRules[list.ID] = parsedRules
}
return m, nil
diff --git a/mapper/mapper_test.go b/mapper/mapper_test.go
index c561663..ed36cd3 100644
--- a/mapper/mapper_test.go
+++ b/mapper/mapper_test.go
@@ -120,7 +120,7 @@
"rewrites": [
{
"@type": "koral:rewrite",
- "editor": "termMapper",
+ "editor": "Koral-Mapper",
"original": {
"@type": "koral:term",
"foundry": "opennlp",
@@ -176,7 +176,7 @@
"rewrites": [
{
"@type": "koral:rewrite",
- "editor": "termMapper",
+ "editor": "Koral-Mapper",
"original": {
"@type": "koral:term",
"foundry": "opennlp",
diff --git a/mapper/query.go b/mapper/query.go
index 73bc880..4980f85 100644
--- a/mapper/query.go
+++ b/mapper/query.go
@@ -16,8 +16,12 @@
return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
}
+ if m.mappingLists[mappingID].IsCorpus() {
+ return m.applyCorpusQueryMappings(mappingID, opts, jsonData)
+ }
+
// Get the parsed rules
- rules := m.parsedRules[mappingID]
+ rules := m.parsedQueryRules[mappingID]
// Check if we have a wrapper object with a "query" field
var queryData any
@@ -184,63 +188,7 @@
// Add rewrites if enabled and node was changed
if opts.AddRewrites && !ast.NodesEqual(node, originalNode) {
- // Create rewrite object
- rewrite := map[string]any{
- "@type": "koral:rewrite",
- "editor": "termMapper",
- }
-
- // Check if the node types are different (structural change)
- if originalNode.Type() != node.Type() {
- // Full node replacement
- originalBytes, err := parser.SerializeToJSON(originalNode)
- if err != nil {
- return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
- }
- var originalJSON any
- if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
- return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
- }
- rewrite["original"] = originalJSON
- } else if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(node) {
- // Check which attributes changed
- newTerm := node.(*ast.Term)
- if term.Foundry != newTerm.Foundry {
- rewrite["scope"] = "foundry"
- rewrite["original"] = term.Foundry
- } else if term.Layer != newTerm.Layer {
- rewrite["scope"] = "layer"
- rewrite["original"] = term.Layer
- } else if term.Key != newTerm.Key {
- rewrite["scope"] = "key"
- rewrite["original"] = term.Key
- } else if term.Value != newTerm.Value {
- rewrite["scope"] = "value"
- rewrite["original"] = term.Value
- } else {
- // No specific attribute changed, use full node replacement
- originalBytes, err := parser.SerializeToJSON(originalNode)
- if err != nil {
- return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
- }
- var originalJSON any
- if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
- return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
- }
- rewrite["original"] = originalJSON
- }
- } else {
- // Full node replacement
- originalBytes, err := parser.SerializeToJSON(originalNode)
- if err != nil {
- return nil, fmt.Errorf("failed to serialize original node for rewrite: %w", err)
- }
- var originalJSON any
- if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
- return nil, fmt.Errorf("failed to parse original node JSON for rewrite: %w", err)
- }
- rewrite["original"] = originalJSON
- }
+ rewrite := buildQueryRewrite(originalNode, node)
// Add rewrite to the node
if resultMap, ok := resultData.(map[string]any); ok {
@@ -306,6 +254,36 @@
return resultData, nil
}
+// buildQueryRewrite creates a rewrite entry for a query-level transformation
+// by comparing the original and new AST nodes.
+func buildQueryRewrite(originalNode, newNode ast.Node) map[string]any {
+ if term, ok := originalNode.(*ast.Term); ok && ast.IsTermNode(newNode) && originalNode.Type() == newNode.Type() {
+ newTerm := newNode.(*ast.Term)
+ if term.Foundry != newTerm.Foundry {
+ return newRewriteEntry("foundry", term.Foundry)
+ }
+ if term.Layer != newTerm.Layer {
+ return newRewriteEntry("layer", term.Layer)
+ }
+ if term.Key != newTerm.Key {
+ return newRewriteEntry("key", term.Key)
+ }
+ if term.Value != newTerm.Value {
+ return newRewriteEntry("value", term.Value)
+ }
+ }
+
+ originalBytes, err := parser.SerializeToJSON(originalNode)
+ if err != nil {
+ return newRewriteEntry("", nil)
+ }
+ var originalJSON any
+ if err := json.Unmarshal(originalBytes, &originalJSON); err != nil {
+ return newRewriteEntry("", nil)
+ }
+ return newRewriteEntry("", originalJSON)
+}
+
// isValidQueryObject checks if the query data is a valid object that can be processed
func isValidQueryObject(data any) bool {
// Check if it's a map
diff --git a/mapper/response.go b/mapper/response.go
index d756edc..ced97f3 100644
--- a/mapper/response.go
+++ b/mapper/response.go
@@ -17,8 +17,12 @@
return nil, fmt.Errorf("mapping list with ID %s not found", mappingID)
}
+ if m.mappingLists[mappingID].IsCorpus() {
+ return m.applyCorpusResponseMappings(mappingID, opts, jsonData)
+ }
+
// Get the parsed rules
- rules := m.parsedRules[mappingID]
+ rules := m.parsedQueryRules[mappingID]
// Check if we have a snippet to process
jsonMap, ok := jsonData.(map[string]any)
diff --git a/mapper/response_test.go b/mapper/response_test.go
index dadba6c..88273c5 100644
--- a/mapper/response_test.go
+++ b/mapper/response_test.go
@@ -74,7 +74,7 @@
require.NoError(t, err)
// Debug: Print what the parsed rules look like
- rules := m.parsedRules["test-mapper"]
+ rules := m.parsedQueryRules["test-mapper"]
t.Logf("Number of parsed rules: %d", len(rules))
for i, rule := range rules {
t.Logf("Rule %d - Upper: %+v", i, rule.Upper)