Support corpus mappings
Change-Id: I25e987b0ca668a1cf733424b22edb4f0fca37bf2
diff --git a/config/config.go b/config/config.go
index d245461..cdc1e0f 100644
--- a/config/config.go
+++ b/config/config.go
@@ -24,6 +24,7 @@
// MappingList represents a list of mapping rules with metadata
type MappingList struct {
ID string `yaml:"id"`
+ Type string `yaml:"type,omitempty"` // "annotation" (default) or "corpus"
Description string `yaml:"desc,omitempty"`
FoundryA string `yaml:"foundryA,omitempty"`
LayerA string `yaml:"layerA,omitempty"`
@@ -32,6 +33,28 @@
Mappings []MappingRule `yaml:"mappings"`
}
+// IsCorpus returns true if the mapping list type is "corpus".
+func (list *MappingList) IsCorpus() bool {
+ return list.Type == "corpus"
+}
+
+// ParseCorpusMappings parses all mapping rules as corpus rules.
+func (list *MappingList) ParseCorpusMappings() ([]*parser.CorpusMappingResult, error) {
+ corpusParser := parser.NewCorpusParser()
+ results := make([]*parser.CorpusMappingResult, len(list.Mappings))
+ for i, rule := range list.Mappings {
+ if rule == "" {
+ return nil, fmt.Errorf("empty corpus mapping rule at index %d in list '%s'", i, list.ID)
+ }
+ result, err := corpusParser.ParseMapping(string(rule))
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse corpus mapping rule %d in list '%s': %w", i, list.ID, err)
+ }
+ results[i] = result
+ }
+ return results, nil
+}
+
// MappingConfig represents the root configuration containing multiple mapping lists
type MappingConfig struct {
SDK string `yaml:"sdk,omitempty"`
diff --git a/config/config_test.go b/config/config_test.go
index cc14416..845ebdd 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -868,3 +868,81 @@
assert.Equal(t, defaultServer, config.Server)
assert.Equal(t, defaultServiceURL, config.ServiceURL)
}
+
+func TestCorpusMappingListType(t *testing.T) {
+ content := `
+lists:
+- id: corpus-class-mapping
+ type: corpus
+ desc: Maps textClass values to genre field
+ mappings:
+ - "textClass=novel <> genre=fiction"
+ - "textClass=science <> genre=nonfiction"
+- id: annotation-mapper
+ mappings:
+ - "[A] <> [B]"
+`
+ tmpfile, err := os.CreateTemp("", "config-corpus-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(tmpfile.Name())
+
+ _, err = tmpfile.WriteString(content)
+ require.NoError(t, err)
+ err = tmpfile.Close()
+ require.NoError(t, err)
+
+ config, err := LoadFromSources(tmpfile.Name(), nil)
+ require.NoError(t, err)
+ require.Len(t, config.Lists, 2)
+
+ assert.Equal(t, "corpus", config.Lists[0].Type)
+ assert.True(t, config.Lists[0].IsCorpus())
+
+ assert.Equal(t, "", config.Lists[1].Type)
+ assert.False(t, config.Lists[1].IsCorpus())
+}
+
+func TestParseCorpusMappings(t *testing.T) {
+ list := &MappingList{
+ ID: "test-corpus",
+ Type: "corpus",
+ Mappings: []MappingRule{
+ "textClass=novel <> genre=fiction",
+ "(textClass=novel & pubDate=2020:geq#date) <> genre=recentfiction",
+ },
+ }
+
+ results, err := list.ParseCorpusMappings()
+ require.NoError(t, err)
+ require.Len(t, results, 2)
+
+ // Verify simple field rule
+ require.NotNil(t, results[0].Upper)
+ require.NotNil(t, results[0].Lower)
+
+ // Verify group rule
+ require.NotNil(t, results[1].Upper)
+ require.NotNil(t, results[1].Lower)
+}
+
+func TestParseCorpusMappingsErrors(t *testing.T) {
+ list := &MappingList{
+ ID: "test-corpus",
+ Type: "corpus",
+ Mappings: []MappingRule{""},
+ }
+
+ _, err := list.ParseCorpusMappings()
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "empty corpus mapping rule")
+
+ list2 := &MappingList{
+ ID: "test-corpus",
+ Type: "corpus",
+ Mappings: []MappingRule{"invalid rule without separator"},
+ }
+
+ _, err = list2.ParseCorpusMappings()
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "failed to parse corpus mapping rule")
+}