Support loading multiple mapping files
Change-Id: I3c6caaa3c4c3434dfacfb842f0407250b5e980f0
diff --git a/README.md b/README.md
index c28f50b..c44e080 100644
--- a/README.md
+++ b/README.md
@@ -15,17 +15,29 @@
## Usage
```bash
-termmapper -c config.yaml -p 8080 -l info
+termmapper -c config.yaml -m extra-mapper1.yaml -m extra-mapper2.yaml
```
-Command line options:
-- `--config` or `-c`: YAML configuration file containing mapping directives (required)
+
+Command Line Options
+
+- `--config` or `-c`: YAML configuration file containing mapping directives and global settings (optional)
+- `--mappings` or `-m`: Individual YAML mapping files to load (can be used multiple times, optional)
- `--port` or `-p`: Port to listen on (default: 8080)
- `--log-level` or `-l`: Log level (debug, info, warn, error) (default: info)
- `--help` or `-h`: Show help message
-## Configuration File Format
+**Note**: At least one mapping source must be provided
-Mapping rules are defined in a YAML configuration file.
+## Configuration
+
+KoralPipe-TermMapper supports loading configuration from multiple sources:
+
+1. **Main Configuration File** (`-c`): Contains global settings (SDK, server endpoints) and optional mapping lists
+2. **Individual Mapping Files** (`-m`): Contains single mapping lists, can be specified multiple times
+
+The main configuration provides global settings, and all mapping lists from both sources are combined. Duplicate mapping IDs across all sources will result in an error.
+
+Configurations can contain global settings and mapping lists (used with the `-c` flag):
```yaml
# Optional: Custom SDK endpoint for Kalamar plugin integration
@@ -34,7 +46,7 @@
# Optional: Custom server endpoint for Kalamar plugin integration
server: "https://custom.example.com/"
-# Mapping lists (same format as standard format)
+# Optional: Mapping lists (same format as individual mapping files)
lists:
- id: mapping-list-id
foundryA: source-foundry
@@ -46,12 +58,25 @@
- "[pattern2] <> [replacement2]"
```
-The `sdk` and `server` fields are optional and override the default endpoints used for Kalamar plugin integration:
+Map files contain a single mapping list (used with the `-m` flag):
+
+```yaml
+id: mapping-list-id
+foundryA: source-foundry
+layerA: source-layer
+foundryB: target-foundry
+layerB: target-layer
+mappings:
+ - "[pattern1] <> [replacement1]"
+ - "[pattern2] <> [replacement2]"
+```
+
+The `sdk` and `server` fields in the main configuration file are optional and override the default endpoints used for Kalamar plugin integration:
- **`sdk`**: Custom SDK JavaScript file URL (default: `https://korap.ids-mannheim.de/js/korap-plugin-latest.js`)
- **`server`**: Custom server endpoint URL (default: `https://korap.ids-mannheim.de/`)
-These values are applied during configuration parsing and affect the HTML plugin page served at the root endpoint (`/`).
+These values are applied during configuration parsing and affect the HTML plugin page served at the root endpoint (`/`). When using only individual mapping files (`-m` flags), default values are used.
### Mapping Rules
@@ -143,10 +168,10 @@
- [x] Support for rewrites
- [x] Web service
- [x] JSON script for Kalamar integration
+- [x] Integration of multiple mapping files
- [ ] Support for negation
- [ ] Support multiple mappings (by having a check list)
- [ ] Response rewriting
-- [ ] Integration of mapping files
## COPYRIGHT AND LICENSE
diff --git a/cmd/termmapper/main.go b/cmd/termmapper/main.go
index ec8b93d..2a2f7e3 100644
--- a/cmd/termmapper/main.go
+++ b/cmd/termmapper/main.go
@@ -21,9 +21,10 @@
)
type appConfig struct {
- Port int `kong:"short='p',default='8080',help='Port to listen on'"`
- Config string `kong:"short='c',required,help='YAML configuration file containing mapping directives'"`
- LogLevel string `kong:"short='l',default='info',help='Log level (debug, info, warn, error)'"`
+ Port int `kong:"short='p',default='8080',help='Port to listen on'"`
+ Config string `kong:"short='c',help='YAML configuration file containing mapping directives and global settings'"`
+ Mappings []string `kong:"short='m',help='Individual YAML mapping files to load'"`
+ LogLevel string `kong:"short='l',default='info',help='Log level (debug, info, warn, error)'"`
}
// TemplateData holds data for the Kalamar plugin template
@@ -73,11 +74,16 @@
// Parse command line flags
cfg := parseConfig()
+ // Validate command line arguments
+ if cfg.Config == "" && len(cfg.Mappings) == 0 {
+ log.Fatal().Msg("At least one configuration source must be provided: use -c for main config file or -m for mapping files")
+ }
+
// Set up logging
setupLogger(cfg.LogLevel)
- // Load configuration file
- yamlConfig, err := config.LoadConfig(cfg.Config)
+ // Load configuration from multiple sources
+ yamlConfig, err := config.LoadFromSources(cfg.Config, cfg.Mappings)
if err != nil {
log.Fatal().Err(err).Msg("Failed to load configuration")
}
@@ -293,8 +299,8 @@
<dt><tt><strong>GET</strong> /:map</tt></dt>
<dd><small>Kalamar integration</small></dd>
-
- <dt><tt><strong>POST</strong> /:map/query</tt></dt>
+
+ <dt><tt><strong>POST</strong> /:map/query</tt></dt>
<dd><small>Transform JSON query objects using term mapping rules</small></dd>
</dl>
diff --git a/cmd/termmapper/main_test.go b/cmd/termmapper/main_test.go
index 292d3d9..c045e0b 100644
--- a/cmd/termmapper/main_test.go
+++ b/cmd/termmapper/main_test.go
@@ -6,6 +6,7 @@
"io"
"net/http"
"net/http/httptest"
+ "os"
"testing"
tmconfig "github.com/KorAP/KoralPipe-TermMapper/config"
@@ -385,3 +386,192 @@
})
}
}
+
+func TestMultipleMappingFiles(t *testing.T) {
+ // Create test mapping files
+ mappingFile1Content := `
+id: test-mapper-1
+foundryA: opennlp
+layerA: p
+foundryB: upos
+layerB: p
+mappings:
+ - "[PIDAT] <> [DET & AdjType=Pdt]"
+ - "[PAV] <> [ADV & PronType=Dem]"
+`
+ mappingFile1, err := os.CreateTemp("", "mapping1-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(mappingFile1.Name())
+
+ _, err = mappingFile1.WriteString(mappingFile1Content)
+ require.NoError(t, err)
+ err = mappingFile1.Close()
+ require.NoError(t, err)
+
+ mappingFile2Content := `
+id: test-mapper-2
+foundryA: stts
+layerA: p
+foundryB: upos
+layerB: p
+mappings:
+ - "[DET] <> [PRON]"
+ - "[ADJ] <> [NOUN]"
+`
+ mappingFile2, err := os.CreateTemp("", "mapping2-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(mappingFile2.Name())
+
+ _, err = mappingFile2.WriteString(mappingFile2Content)
+ require.NoError(t, err)
+ err = mappingFile2.Close()
+ require.NoError(t, err)
+
+ // Load configuration using multiple mapping files
+ config, err := tmconfig.LoadFromSources("", []string{mappingFile1.Name(), mappingFile2.Name()})
+ require.NoError(t, err)
+
+ // Create mapper
+ m, err := mapper.NewMapper(config.Lists)
+ require.NoError(t, err)
+
+ // Create fiber app
+ app := fiber.New()
+ setupRoutes(app, m, config)
+
+ // Test that both mappers work
+ testCases := []struct {
+ name string
+ mapID string
+ input string
+ expectGroup bool
+ expectedKey string
+ }{
+ {
+ name: "test-mapper-1 with complex mapping",
+ mapID: "test-mapper-1",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "opennlp",
+ "key": "PIDAT",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`,
+ expectGroup: true, // This mapping creates a termGroup because of "&"
+ expectedKey: "DET", // The first operand should be DET
+ },
+ {
+ name: "test-mapper-2 with simple mapping",
+ mapID: "test-mapper-2",
+ input: `{
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "foundry": "stts",
+ "key": "DET",
+ "layer": "p",
+ "match": "match:eq"
+ }
+ }`,
+ expectGroup: false, // This mapping creates a simple term
+ expectedKey: "PRON",
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ req := httptest.NewRequest(http.MethodPost, "/"+tc.mapID+"/query?dir=atob", bytes.NewBufferString(tc.input))
+ req.Header.Set("Content-Type", "application/json")
+
+ resp, err := app.Test(req)
+ require.NoError(t, err)
+ defer resp.Body.Close()
+
+ assert.Equal(t, http.StatusOK, resp.StatusCode)
+
+ var result map[string]interface{}
+ err = json.NewDecoder(resp.Body).Decode(&result)
+ require.NoError(t, err)
+
+ // Check that the mapping was applied
+ wrap := result["wrap"].(map[string]interface{})
+ if tc.expectGroup {
+ // For complex mappings, check the first operand
+ assert.Equal(t, "koral:termGroup", wrap["@type"])
+ operands := wrap["operands"].([]interface{})
+ require.Greater(t, len(operands), 0)
+ firstOperand := operands[0].(map[string]interface{})
+ assert.Equal(t, tc.expectedKey, firstOperand["key"])
+ } else {
+ // For simple mappings, check the key directly
+ assert.Equal(t, "koral:term", wrap["@type"])
+ assert.Equal(t, tc.expectedKey, wrap["key"])
+ }
+ })
+ }
+}
+
+func TestCombinedConfigAndMappingFiles(t *testing.T) {
+ // Create main config file
+ mainConfigContent := `
+sdk: "https://custom.example.com/sdk.js"
+server: "https://custom.example.com/"
+lists:
+- id: main-mapper
+ foundryA: opennlp
+ layerA: p
+ mappings:
+ - "[A] <> [B]"
+`
+ mainConfigFile, err := os.CreateTemp("", "main-config-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(mainConfigFile.Name())
+
+ _, err = mainConfigFile.WriteString(mainConfigContent)
+ require.NoError(t, err)
+ err = mainConfigFile.Close()
+ require.NoError(t, err)
+
+ // Create individual mapping file
+ mappingFileContent := `
+id: additional-mapper
+foundryA: stts
+layerA: p
+mappings:
+ - "[C] <> [D]"
+`
+ mappingFile, err := os.CreateTemp("", "mapping-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(mappingFile.Name())
+
+ _, err = mappingFile.WriteString(mappingFileContent)
+ require.NoError(t, err)
+ err = mappingFile.Close()
+ require.NoError(t, err)
+
+ // Load configuration from both sources
+ config, err := tmconfig.LoadFromSources(mainConfigFile.Name(), []string{mappingFile.Name()})
+ require.NoError(t, err)
+
+ // Verify that both mappers are loaded
+ require.Len(t, config.Lists, 2)
+
+ ids := make([]string, len(config.Lists))
+ for i, list := range config.Lists {
+ ids[i] = list.ID
+ }
+ assert.Contains(t, ids, "main-mapper")
+ assert.Contains(t, ids, "additional-mapper")
+
+ // Verify custom SDK and server are preserved from main config
+ assert.Equal(t, "https://custom.example.com/sdk.js", config.SDK)
+ assert.Equal(t, "https://custom.example.com/", config.Server)
+
+ // Create mapper and test it works
+ m, err := mapper.NewMapper(config.Lists)
+ require.NoError(t, err)
+ require.NotNil(t, m)
+}
diff --git a/config/config.go b/config/config.go
index 128c063..46d6639 100644
--- a/config/config.go
+++ b/config/config.go
@@ -34,44 +34,107 @@
Lists []MappingList `yaml:"lists,omitempty"`
}
-// LoadConfig loads a YAML configuration file and returns a Config object
-func LoadConfig(filename string) (*MappingConfig, error) {
- data, err := os.ReadFile(filename)
- if err != nil {
- return nil, fmt.Errorf("failed to read config file: %w", err)
- }
+// LoadFromSources loads configuration from multiple sources and merges them:
+// - A main configuration file (optional) containing global settings and lists
+// - Individual mapping files (optional) containing single mapping lists each
+// At least one source must be provided
+func LoadFromSources(configFile string, mappingFiles []string) (*MappingConfig, error) {
+ var allLists []MappingList
+ var globalConfig MappingConfig
- // Check for empty file
- if len(data) == 0 {
- return nil, fmt.Errorf("EOF: config file is empty")
- }
+ // Track seen IDs across all sources to detect duplicates
+ seenIDs := make(map[string]bool)
- // Try to unmarshal as new format first (object with optional sdk/server and lists)
- var config MappingConfig
- if err := yaml.Unmarshal(data, &config); err == nil && len(config.Lists) > 0 {
- // Successfully parsed as new format with lists field
- if err := validateMappingLists(config.Lists); err != nil {
- return nil, err
+ // Load main configuration file if provided
+ if configFile != "" {
+ data, err := os.ReadFile(configFile)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read config file '%s': %w", configFile, err)
}
- // Apply defaults if not specified
- applyDefaults(&config)
- return &config, nil
+
+ if len(data) == 0 {
+ return nil, fmt.Errorf("EOF: config file '%s' is empty", configFile)
+ }
+
+ // Try to unmarshal as new format first (object with optional sdk/server and lists)
+ if err := yaml.Unmarshal(data, &globalConfig); err == nil && len(globalConfig.Lists) > 0 {
+ // Successfully parsed as new format with lists field
+ for _, list := range globalConfig.Lists {
+ if seenIDs[list.ID] {
+ return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
+ }
+ seenIDs[list.ID] = true
+ }
+ allLists = append(allLists, globalConfig.Lists...)
+ } else {
+ // Fall back to old format (direct list)
+ var lists []MappingList
+ if err := yaml.Unmarshal(data, &lists); err != nil {
+ return nil, fmt.Errorf("failed to parse YAML config file '%s': %w", configFile, err)
+ }
+
+ for _, list := range lists {
+ if seenIDs[list.ID] {
+ return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
+ }
+ seenIDs[list.ID] = true
+ }
+ allLists = append(allLists, lists...)
+ // Clear the lists from globalConfig since we got them from the old format
+ globalConfig.Lists = nil
+ }
}
- // Fall back to old format (direct list)
- var lists []MappingList
- if err := yaml.Unmarshal(data, &lists); err != nil {
- return nil, fmt.Errorf("failed to parse YAML: %w", err)
+ // Load individual mapping files
+ for _, file := range mappingFiles {
+ data, err := os.ReadFile(file)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read mapping file '%s': %w", file, err)
+ }
+
+ if len(data) == 0 {
+ return nil, fmt.Errorf("EOF: mapping file '%s' is empty", file)
+ }
+
+ var list MappingList
+ if err := yaml.Unmarshal(data, &list); err != nil {
+ return nil, fmt.Errorf("failed to parse YAML mapping file '%s': %w", file, err)
+ }
+
+ if seenIDs[list.ID] {
+ return nil, fmt.Errorf("duplicate mapping list ID found: %s", list.ID)
+ }
+ seenIDs[list.ID] = true
+ allLists = append(allLists, list)
}
- if err := validateMappingLists(lists); err != nil {
+ // Ensure we have at least some configuration
+ if len(allLists) == 0 {
+ return nil, fmt.Errorf("no mapping lists found: provide either a config file (-c) with lists or mapping files (-m)")
+ }
+
+ // Validate all mapping lists
+ if err := validateMappingLists(allLists); err != nil {
return nil, err
}
- config = MappingConfig{Lists: lists}
+ // Create final configuration
+ result := &MappingConfig{
+ SDK: globalConfig.SDK,
+ Server: globalConfig.Server,
+ Lists: allLists,
+ }
+
// Apply defaults if not specified
- applyDefaults(&config)
- return &config, nil
+ applyDefaults(result)
+
+ return result, nil
+}
+
+// LoadConfig loads a YAML configuration file and returns a Config object
+// Deprecated: Use LoadFromSources for new code
+func LoadConfig(filename string) (*MappingConfig, error) {
+ return LoadFromSources(filename, nil)
}
// applyDefaults sets default values for SDK and Server if they are empty
diff --git a/config/config_test.go b/config/config_test.go
index 6519f82..f2678fb 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -691,3 +691,181 @@
})
}
}
+
+func TestLoadFromSources(t *testing.T) {
+ // Create main config file
+ mainConfigContent := `
+sdk: "https://custom.example.com/sdk.js"
+server: "https://custom.example.com/"
+lists:
+- id: main-mapper
+ mappings:
+ - "[A] <> [B]"
+`
+ mainConfigFile, err := os.CreateTemp("", "main-config-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(mainConfigFile.Name())
+
+ _, err = mainConfigFile.WriteString(mainConfigContent)
+ require.NoError(t, err)
+ err = mainConfigFile.Close()
+ require.NoError(t, err)
+
+ // Create individual mapping files
+ mappingFile1Content := `
+id: mapper-1
+foundryA: opennlp
+layerA: p
+mappings:
+ - "[C] <> [D]"
+`
+ mappingFile1, err := os.CreateTemp("", "mapping1-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(mappingFile1.Name())
+
+ _, err = mappingFile1.WriteString(mappingFile1Content)
+ require.NoError(t, err)
+ err = mappingFile1.Close()
+ require.NoError(t, err)
+
+ mappingFile2Content := `
+id: mapper-2
+foundryB: upos
+layerB: p
+mappings:
+ - "[E] <> [F]"
+`
+ mappingFile2, err := os.CreateTemp("", "mapping2-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(mappingFile2.Name())
+
+ _, err = mappingFile2.WriteString(mappingFile2Content)
+ require.NoError(t, err)
+ err = mappingFile2.Close()
+ require.NoError(t, err)
+
+ tests := []struct {
+ name string
+ configFile string
+ mappingFiles []string
+ wantErr bool
+ expectedIDs []string
+ }{
+ {
+ name: "Main config only",
+ configFile: mainConfigFile.Name(),
+ mappingFiles: []string{},
+ wantErr: false,
+ expectedIDs: []string{"main-mapper"},
+ },
+ {
+ name: "Mapping files only",
+ configFile: "",
+ mappingFiles: []string{mappingFile1.Name(), mappingFile2.Name()},
+ wantErr: false,
+ expectedIDs: []string{"mapper-1", "mapper-2"},
+ },
+ {
+ name: "Main config and mapping files",
+ configFile: mainConfigFile.Name(),
+ mappingFiles: []string{mappingFile1.Name(), mappingFile2.Name()},
+ wantErr: false,
+ expectedIDs: []string{"main-mapper", "mapper-1", "mapper-2"},
+ },
+ {
+ name: "No configuration sources",
+ configFile: "",
+ mappingFiles: []string{},
+ wantErr: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ config, err := LoadFromSources(tt.configFile, tt.mappingFiles)
+ if tt.wantErr {
+ require.Error(t, err)
+ return
+ }
+
+ require.NoError(t, err)
+ require.NotNil(t, config)
+
+ // Check that all expected mapping IDs are present
+ require.Len(t, config.Lists, len(tt.expectedIDs))
+ actualIDs := make([]string, len(config.Lists))
+ for i, list := range config.Lists {
+ actualIDs[i] = list.ID
+ }
+ for _, expectedID := range tt.expectedIDs {
+ assert.Contains(t, actualIDs, expectedID)
+ }
+
+ // Check that SDK and Server are set (either from config or defaults)
+ assert.NotEmpty(t, config.SDK)
+ assert.NotEmpty(t, config.Server)
+ })
+ }
+}
+
+func TestLoadFromSourcesWithDefaults(t *testing.T) {
+ // Test that defaults are applied when loading only mapping files
+ mappingFileContent := `
+id: test-mapper
+mappings:
+ - "[A] <> [B]"
+`
+ mappingFile, err := os.CreateTemp("", "mapping-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(mappingFile.Name())
+
+ _, err = mappingFile.WriteString(mappingFileContent)
+ require.NoError(t, err)
+ err = mappingFile.Close()
+ require.NoError(t, err)
+
+ config, err := LoadFromSources("", []string{mappingFile.Name()})
+ require.NoError(t, err)
+
+ // Check that defaults are applied
+ assert.Equal(t, defaultSDK, config.SDK)
+ assert.Equal(t, defaultServer, config.Server)
+ require.Len(t, config.Lists, 1)
+ assert.Equal(t, "test-mapper", config.Lists[0].ID)
+}
+
+func TestLoadFromSourcesDuplicateIDs(t *testing.T) {
+ // Create config with duplicate IDs across sources
+ configContent := `
+lists:
+- id: duplicate-id
+ mappings:
+ - "[A] <> [B]"
+`
+ configFile, err := os.CreateTemp("", "config-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(configFile.Name())
+
+ _, err = configFile.WriteString(configContent)
+ require.NoError(t, err)
+ err = configFile.Close()
+ require.NoError(t, err)
+
+ mappingContent := `
+id: duplicate-id
+mappings:
+ - "[C] <> [D]"
+`
+ mappingFile, err := os.CreateTemp("", "mapping-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(mappingFile.Name())
+
+ _, err = mappingFile.WriteString(mappingContent)
+ require.NoError(t, err)
+ err = mappingFile.Close()
+ require.NoError(t, err)
+
+ _, err = LoadFromSources(configFile.Name(), []string{mappingFile.Name()})
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "duplicate mapping list ID found: duplicate-id")
+}