| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 1 | package main |
| 2 | |
| 3 | import ( |
| 4 | "fmt" |
| 5 | "strings" |
| 6 | |
| 7 | "github.com/KorAP/Koral-Mapper/config" |
| 8 | ) |
| 9 | |
| 10 | // CascadeEntry represents a single mapping configuration parsed from |
| 11 | // the cfg URL parameter. After parsing, empty override fields are |
| 12 | // merged with the YAML defaults from the corresponding MappingList. |
| 13 | type CascadeEntry struct { |
| 14 | ID string |
| 15 | Direction string |
| 16 | FoundryA string |
| 17 | LayerA string |
| 18 | FoundryB string |
| 19 | LayerB string |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame^] | 20 | FieldA string |
| 21 | FieldB string |
| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 22 | } |
| 23 | |
| 24 | // ParseCfgParam parses the compact cfg URL parameter into a slice of |
| 25 | // CascadeEntry structs. Empty override fields are merged with YAML |
| 26 | // defaults from the matching MappingList. |
| 27 | // |
| 28 | // Format: entry (";" entry)* |
| 29 | // |
| 30 | // entry = id ":" dir [ ":" foundryA ":" layerA ":" foundryB ":" layerB ] |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame^] | 31 | // | id ":" dir [ ":" fieldA ":" fieldB ] |
| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 32 | // |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame^] | 33 | // Annotation entries have either 2 fields (all foundry/layer use defaults) |
| 34 | // or 6 fields (explicit values, empty means use default). |
| 35 | // Corpus entries have either 2 fields (all field overrides use defaults) |
| 36 | // or 4 fields (explicit values, empty means use default). |
| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 37 | func ParseCfgParam(raw string, lists []config.MappingList) ([]CascadeEntry, error) { |
| 38 | if raw == "" { |
| 39 | return nil, nil |
| 40 | } |
| 41 | |
| 42 | listsByID := make(map[string]*config.MappingList, len(lists)) |
| 43 | for i := range lists { |
| 44 | listsByID[lists[i].ID] = &lists[i] |
| 45 | } |
| 46 | |
| 47 | parts := strings.Split(raw, ";") |
| 48 | result := make([]CascadeEntry, 0, len(parts)) |
| 49 | |
| 50 | for _, part := range parts { |
| 51 | fields := strings.Split(part, ":") |
| 52 | n := len(fields) |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame^] | 53 | if n < 2 { |
| 54 | return nil, fmt.Errorf("invalid entry %q: expected at least 2 colon-separated fields, got %d", part, n) |
| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 55 | } |
| 56 | |
| 57 | id := fields[0] |
| 58 | dir := fields[1] |
| 59 | |
| 60 | if dir != "atob" && dir != "btoa" { |
| 61 | return nil, fmt.Errorf("invalid direction %q in entry %q", dir, part) |
| 62 | } |
| 63 | |
| 64 | list, ok := listsByID[id] |
| 65 | if !ok { |
| 66 | return nil, fmt.Errorf("unknown mapping ID %q", id) |
| 67 | } |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame^] | 68 | isCorpus := list.IsCorpus() |
| 69 | |
| 70 | if isCorpus { |
| 71 | if n != 2 && n != 4 { |
| 72 | return nil, fmt.Errorf("invalid corpus entry %q: expected 2 or 4 colon-separated fields, got %d", part, n) |
| 73 | } |
| 74 | } else if n != 2 && n != 6 { |
| 75 | return nil, fmt.Errorf("invalid annotation entry %q: expected 2 or 6 colon-separated fields, got %d", part, n) |
| 76 | } |
| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 77 | |
| 78 | ce := CascadeEntry{ |
| 79 | ID: id, |
| 80 | Direction: dir, |
| 81 | } |
| 82 | |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame^] | 83 | if isCorpus { |
| 84 | if n == 4 { |
| 85 | ce.FieldA = fields[2] |
| 86 | ce.FieldB = fields[3] |
| 87 | } |
| 88 | if ce.FieldA == "" { |
| 89 | ce.FieldA = list.FieldA |
| 90 | } |
| 91 | if ce.FieldB == "" { |
| 92 | ce.FieldB = list.FieldB |
| 93 | } |
| 94 | } else { |
| 95 | if n == 6 { |
| 96 | ce.FoundryA = fields[2] |
| 97 | ce.LayerA = fields[3] |
| 98 | ce.FoundryB = fields[4] |
| 99 | ce.LayerB = fields[5] |
| 100 | } |
| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 101 | |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame^] | 102 | if ce.FoundryA == "" { |
| 103 | ce.FoundryA = list.FoundryA |
| 104 | } |
| 105 | if ce.LayerA == "" { |
| 106 | ce.LayerA = list.LayerA |
| 107 | } |
| 108 | if ce.FoundryB == "" { |
| 109 | ce.FoundryB = list.FoundryB |
| 110 | } |
| 111 | if ce.LayerB == "" { |
| 112 | ce.LayerB = list.LayerB |
| 113 | } |
| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 114 | } |
| 115 | |
| 116 | result = append(result, ce) |
| 117 | } |
| 118 | |
| 119 | return result, nil |
| 120 | } |
| 121 | |
| 122 | // BuildCfgParam serialises a slice of CascadeEntry back to the compact |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame^] | 123 | // cfg string format. Entries with all override fields empty use the |
| 124 | // short 2-field format (id:dir). Entries with any non-empty |
| 125 | // foundry/layer field use the full 6-field annotation format. |
| 126 | // Entries with any non-empty fieldA/fieldB use the full 4-field |
| 127 | // corpus format. |
| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 128 | func BuildCfgParam(entries []CascadeEntry) string { |
| 129 | if len(entries) == 0 { |
| 130 | return "" |
| 131 | } |
| 132 | |
| 133 | parts := make([]string, len(entries)) |
| 134 | for i, e := range entries { |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame^] | 135 | if e.FoundryA == "" && e.LayerA == "" && e.FoundryB == "" && e.LayerB == "" && e.FieldA == "" && e.FieldB == "" { |
| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 136 | parts[i] = e.ID + ":" + e.Direction |
| Akron | 4131026 | 2026-02-23 18:58:53 +0100 | [diff] [blame^] | 137 | } else if e.FoundryA == "" && e.LayerA == "" && e.FoundryB == "" && e.LayerB == "" { |
| 138 | parts[i] = e.ID + ":" + e.Direction + ":" + e.FieldA + ":" + e.FieldB |
| Akron | a379f1e | 2026-02-20 08:29:43 +0100 | [diff] [blame] | 139 | } else { |
| 140 | parts[i] = e.ID + ":" + e.Direction + ":" + e.FoundryA + ":" + e.LayerA + ":" + e.FoundryB + ":" + e.LayerB |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | return strings.Join(parts, ";") |
| 145 | } |