Add mapping-configuration string builder and parser
Change-Id: I78833fe8666d501846e7ce05d13097e02701a5f4
diff --git a/cmd/koralmapper/cfgparam.go b/cmd/koralmapper/cfgparam.go
new file mode 100644
index 0000000..dc00da5
--- /dev/null
+++ b/cmd/koralmapper/cfgparam.go
@@ -0,0 +1,115 @@
+package main
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/KorAP/Koral-Mapper/config"
+)
+
+// CascadeEntry represents a single mapping configuration parsed from
+// the cfg URL parameter. After parsing, empty override fields are
+// merged with the YAML defaults from the corresponding MappingList.
+type CascadeEntry struct {
+ ID string
+ Direction string
+ FoundryA string
+ LayerA string
+ FoundryB string
+ LayerB string
+}
+
+// ParseCfgParam parses the compact cfg URL parameter into a slice of
+// CascadeEntry structs. Empty override fields are merged with YAML
+// defaults from the matching MappingList.
+//
+// Format: entry (";" entry)*
+//
+// entry = id ":" dir [ ":" foundryA ":" layerA ":" foundryB ":" layerB ]
+//
+// An entry has either 2 fields (all foundry/layer use defaults) or
+// 6 fields (explicit values, empty means use default).
+func ParseCfgParam(raw string, lists []config.MappingList) ([]CascadeEntry, error) {
+ if raw == "" {
+ return nil, nil
+ }
+
+ listsByID := make(map[string]*config.MappingList, len(lists))
+ for i := range lists {
+ listsByID[lists[i].ID] = &lists[i]
+ }
+
+ parts := strings.Split(raw, ";")
+ result := make([]CascadeEntry, 0, len(parts))
+
+ for _, part := range parts {
+ fields := strings.Split(part, ":")
+ n := len(fields)
+
+ if n != 2 && n != 6 {
+ return nil, fmt.Errorf("invalid entry %q: expected 2 or 6 colon-separated fields, got %d", part, n)
+ }
+
+ id := fields[0]
+ dir := fields[1]
+
+ if dir != "atob" && dir != "btoa" {
+ return nil, fmt.Errorf("invalid direction %q in entry %q", dir, part)
+ }
+
+ list, ok := listsByID[id]
+ if !ok {
+ return nil, fmt.Errorf("unknown mapping ID %q", id)
+ }
+
+ ce := CascadeEntry{
+ ID: id,
+ Direction: dir,
+ }
+
+ if n == 6 {
+ ce.FoundryA = fields[2]
+ ce.LayerA = fields[3]
+ ce.FoundryB = fields[4]
+ ce.LayerB = fields[5]
+ }
+
+ if ce.FoundryA == "" {
+ ce.FoundryA = list.FoundryA
+ }
+ if ce.LayerA == "" {
+ ce.LayerA = list.LayerA
+ }
+ if ce.FoundryB == "" {
+ ce.FoundryB = list.FoundryB
+ }
+ if ce.LayerB == "" {
+ ce.LayerB = list.LayerB
+ }
+
+ result = append(result, ce)
+ }
+
+ return result, nil
+}
+
+// BuildCfgParam serialises a slice of CascadeEntry back to the compact
+// cfg string format. Entries with all foundry/layer fields empty use
+// the short 2-field format (id:dir). Entries with any non-empty
+// foundry/layer field use the full 6-field format.
+func BuildCfgParam(entries []CascadeEntry) string {
+ if len(entries) == 0 {
+ return ""
+ }
+
+ parts := make([]string, len(entries))
+ for i, e := range entries {
+ if e.FoundryA == "" && e.LayerA == "" && e.FoundryB == "" && e.LayerB == "" {
+ parts[i] = e.ID + ":" + e.Direction
+ } else {
+ parts[i] = e.ID + ":" + e.Direction + ":" + e.FoundryA + ":" + e.LayerA + ":" + e.FoundryB + ":" + e.LayerB
+ }
+ }
+
+ return strings.Join(parts, ";")
+}
diff --git a/cmd/koralmapper/cfgparam_test.go b/cmd/koralmapper/cfgparam_test.go
new file mode 100644
index 0000000..398869c
--- /dev/null
+++ b/cmd/koralmapper/cfgparam_test.go
@@ -0,0 +1,227 @@
+package main
+
+import (
+ "testing"
+
+ tmconfig "github.com/KorAP/Koral-Mapper/config"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+var cfgTestLists = []tmconfig.MappingList{
+ {
+ ID: "stts-upos",
+ FoundryA: "opennlp",
+ LayerA: "p",
+ FoundryB: "upos",
+ LayerB: "p",
+ Mappings: []tmconfig.MappingRule{"[PIDAT] <> [DET]"},
+ },
+ {
+ ID: "other-mapper",
+ FoundryA: "stts",
+ LayerA: "p",
+ FoundryB: "ud",
+ LayerB: "pos",
+ Mappings: []tmconfig.MappingRule{"[A] <> [B]"},
+ },
+ {
+ ID: "corpus-map",
+ Type: "corpus",
+ Mappings: []tmconfig.MappingRule{"textClass=science <> textClass=akademisch"},
+ },
+}
+
+func TestParseCfgParam(t *testing.T) {
+ tests := []struct {
+ name string
+ raw string
+ expected []CascadeEntry
+ wantErr string
+ }{
+ {
+ name: "Full 6-field entry",
+ raw: "stts-upos:atob:opennlp:p:upos:p",
+ expected: []CascadeEntry{
+ {ID: "stts-upos", Direction: "atob", FoundryA: "opennlp", LayerA: "p", FoundryB: "upos", LayerB: "p"},
+ },
+ },
+ {
+ name: "Short 2-field entry defaults to YAML values",
+ raw: "stts-upos:atob",
+ expected: []CascadeEntry{
+ {ID: "stts-upos", Direction: "atob", FoundryA: "opennlp", LayerA: "p", FoundryB: "upos", LayerB: "p"},
+ },
+ },
+ {
+ name: "Short 2-field entry with btoa direction",
+ raw: "other-mapper:btoa",
+ expected: []CascadeEntry{
+ {ID: "other-mapper", Direction: "btoa", FoundryA: "stts", LayerA: "p", FoundryB: "ud", LayerB: "pos"},
+ },
+ },
+ {
+ name: "Mixed 2-field and 6-field entries",
+ raw: "stts-upos:atob;other-mapper:btoa:stts:p:ud:p",
+ expected: []CascadeEntry{
+ {ID: "stts-upos", Direction: "atob", FoundryA: "opennlp", LayerA: "p", FoundryB: "upos", LayerB: "p"},
+ {ID: "other-mapper", Direction: "btoa", FoundryA: "stts", LayerA: "p", FoundryB: "ud", LayerB: "p"},
+ },
+ },
+ {
+ name: "Empty cfg string returns empty slice",
+ raw: "",
+ expected: nil,
+ },
+ {
+ name: "Unknown mapping ID returns error",
+ raw: "unknown-id:atob",
+ wantErr: "unknown mapping ID",
+ },
+ {
+ name: "Second entry has unknown mapping ID",
+ raw: "stts-upos:atob;unknown:btoa",
+ wantErr: "unknown mapping ID",
+ },
+ {
+ name: "Malformed entry with 1 field",
+ raw: "stts-upos",
+ wantErr: "invalid entry",
+ },
+ {
+ name: "Malformed entry with 3 fields",
+ raw: "stts-upos:atob:extra",
+ wantErr: "invalid entry",
+ },
+ {
+ name: "Malformed entry with 4 fields",
+ raw: "stts-upos:atob:a:b",
+ wantErr: "invalid entry",
+ },
+ {
+ name: "Malformed entry with 5 fields",
+ raw: "stts-upos:atob:a:b:c",
+ wantErr: "invalid entry",
+ },
+ {
+ name: "Empty override fields fall back to YAML defaults",
+ raw: "stts-upos:atob::::",
+ expected: []CascadeEntry{
+ {ID: "stts-upos", Direction: "atob", FoundryA: "opennlp", LayerA: "p", FoundryB: "upos", LayerB: "p"},
+ },
+ },
+ {
+ name: "Partial overrides merge with YAML defaults",
+ raw: "stts-upos:atob:custom::custom:",
+ expected: []CascadeEntry{
+ {ID: "stts-upos", Direction: "atob", FoundryA: "custom", LayerA: "p", FoundryB: "custom", LayerB: "p"},
+ },
+ },
+ {
+ name: "Corpus mapping 2-field entry has no foundry/layer defaults",
+ raw: "corpus-map:atob",
+ expected: []CascadeEntry{
+ {ID: "corpus-map", Direction: "atob"},
+ },
+ },
+ {
+ name: "Invalid direction",
+ raw: "stts-upos:invalid",
+ wantErr: "invalid direction",
+ },
+ {
+ name: "Three entries with mixed types",
+ raw: "stts-upos:atob;corpus-map:atob;other-mapper:btoa",
+ expected: []CascadeEntry{
+ {ID: "stts-upos", Direction: "atob", FoundryA: "opennlp", LayerA: "p", FoundryB: "upos", LayerB: "p"},
+ {ID: "corpus-map", Direction: "atob"},
+ {ID: "other-mapper", Direction: "btoa", FoundryA: "stts", LayerA: "p", FoundryB: "ud", LayerB: "pos"},
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := ParseCfgParam(tt.raw, cfgTestLists)
+ if tt.wantErr != "" {
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), tt.wantErr)
+ return
+ }
+ require.NoError(t, err)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestBuildCfgParam(t *testing.T) {
+ tests := []struct {
+ name string
+ entries []CascadeEntry
+ expected string
+ }{
+ {
+ name: "Full 6-field entry",
+ entries: []CascadeEntry{
+ {ID: "stts-upos", Direction: "atob", FoundryA: "opennlp", LayerA: "p", FoundryB: "upos", LayerB: "p"},
+ },
+ expected: "stts-upos:atob:opennlp:p:upos:p",
+ },
+ {
+ name: "Short 2-field entry when all foundry/layer empty",
+ entries: []CascadeEntry{
+ {ID: "corpus-map", Direction: "atob"},
+ },
+ expected: "corpus-map:atob",
+ },
+ {
+ name: "Multiple entries",
+ entries: []CascadeEntry{
+ {ID: "stts-upos", Direction: "atob", FoundryA: "opennlp", LayerA: "p", FoundryB: "upos", LayerB: "p"},
+ {ID: "other-mapper", Direction: "btoa", FoundryA: "stts", LayerA: "p", FoundryB: "ud", LayerB: "p"},
+ },
+ expected: "stts-upos:atob:opennlp:p:upos:p;other-mapper:btoa:stts:p:ud:p",
+ },
+ {
+ name: "Nil slice returns empty string",
+ entries: nil,
+ expected: "",
+ },
+ {
+ name: "Empty slice returns empty string",
+ entries: []CascadeEntry{},
+ expected: "",
+ },
+ {
+ name: "Mixed full and short entries",
+ entries: []CascadeEntry{
+ {ID: "stts-upos", Direction: "atob", FoundryA: "opennlp", LayerA: "p", FoundryB: "upos", LayerB: "p"},
+ {ID: "corpus-map", Direction: "atob"},
+ },
+ expected: "stts-upos:atob:opennlp:p:upos:p;corpus-map:atob",
+ },
+ {
+ name: "Entry with some empty foundry/layer fields uses 6-field format",
+ entries: []CascadeEntry{
+ {ID: "stts-upos", Direction: "atob", FoundryA: "opennlp"},
+ },
+ expected: "stts-upos:atob:opennlp:::",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := BuildCfgParam(tt.entries)
+ assert.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestBuildAndParseCfgParamRoundTrip(t *testing.T) {
+ original := "stts-upos:atob:opennlp:p:upos:p;corpus-map:btoa"
+ entries, err := ParseCfgParam(original, cfgTestLists)
+ require.NoError(t, err)
+
+ rebuilt := BuildCfgParam(entries)
+ assert.Equal(t, original, rebuilt)
+}