Sanitize file paths before config loading
Change-Id: Id27ef65ec0575fd4bc9ddfc4927f4f3e685eced2
diff --git a/README.md b/README.md
index 0b71410..780f3bd 100644
--- a/README.md
+++ b/README.md
@@ -65,6 +65,11 @@
# Optional: Maximum requests per minute per IP for rate limiting (default: 100)
rateLimit: 100
+# Optional: Base path for file loading confinement (default: current working directory).
+# All config and mapping file paths must resolve within this directory or /tmp.
+# Set to "/" to allow loading from anywhere on the filesystem.
+basePath: "/opt/koralmapper"
+
# Optional: Mapping lists (same format as individual mapping files)
lists:
- id: mapping-list-id
@@ -94,7 +99,7 @@
Command line arguments take precedence over configuration file values:
-The `sdk`, `stylesheet`, `server`, `port`, and `loglevel` fields in the main configuration file are optional and override the following default values:
+The `sdk`, `stylesheet`, `server`, `port`, `loglevel`, and `basePath` fields in the main configuration file are optional and override the following default values:
- **`sdk`**: Custom SDK JavaScript file URL (default: `https://korap.ids-mannheim.de/js/korap-plugin-latest.js`)
- **`stylesheet`**: Kalamar stylesheet URL for the config page (default: `https://korap.ids-mannheim.de/css/kalamar-plugin-latest.css`)
@@ -103,6 +108,7 @@
- **`loglevel`**: Log level (default: `warn`)
- **`serviceURL`**: Service URL of the KoralMapper (default: `https://korap.ids-mannheim.de/plugin/koralmapper`)
- **`rateLimit`**: Maximum number of requests per minute per IP address (default: `100`). When the limit is exceeded, the server responds with HTTP 429 (Too Many Requests).
+- **`basePath`**: Directory tree for file loading confinement (default: current working directory). Configuration and mapping files must resolve within this path or the system temp directory. Set to `"/"` to disable confinement. This prevents path traversal attacks (CWE-22).
These values are applied during configuration parsing. When using only individual mapping files (`-m` flags), default values are used unless overridden by command line arguments.
@@ -119,6 +125,7 @@
- `KORAL_MAPPER_LOG_LEVEL`: Overrides `loglevel`
- `KORAL_MAPPER_PORT`: Overrides `port` (integer)
- `KORAL_MAPPER_RATE_LIMIT`: Overrides `rateLimit` (integer, requests per minute per IP)
+- `KORAL_MAPPER_BASE_PATH`: Overrides `basePath` (directory path for file loading confinement)
Environment variable values take precedence over values from the configuration file.
diff --git a/cmd/koralmapper/main.go b/cmd/koralmapper/main.go
index 9fc6f5d..eac2984 100644
--- a/cmd/koralmapper/main.go
+++ b/cmd/koralmapper/main.go
@@ -222,6 +222,16 @@
}
func main() {
+ // Confine config file loading to the current working directory tree
+ // (path traversal prevention). Can be overridden via the "basePath"
+ // YAML field or the KORAL_MAPPER_BASE_PATH environment variable.
+ // In Docker (WORKDIR /), the default "/" naturally allows all paths.
+ cwd, err := os.Getwd()
+ if err != nil {
+ log.Fatal().Err(err).Msg("Failed to determine working directory")
+ }
+ config.AllowedBasePath = cwd
+
// Parse command line flags
cfg := parseConfig()
@@ -242,6 +252,11 @@
log.Fatal().Err(err).Msg("Failed to load configuration")
}
+ // Apply basePath from config/env if specified (overrides CWD default)
+ if yamlConfig.BasePath != "" {
+ config.AllowedBasePath = yamlConfig.BasePath
+ }
+
finalPort := yamlConfig.Port
finalLogLevel := yamlConfig.LogLevel
diff --git a/config/config.go b/config/config.go
index 2ad6c43..7fa600e 100644
--- a/config/config.go
+++ b/config/config.go
@@ -3,7 +3,9 @@
import (
"fmt"
"os"
+ "path/filepath"
"strconv"
+ "strings"
"github.com/KorAP/Koral-Mapper/ast"
"github.com/KorAP/Koral-Mapper/parser"
@@ -95,12 +97,66 @@
Server string `yaml:"server,omitempty"`
ServiceURL string `yaml:"serviceURL,omitempty"`
CookieName string `yaml:"cookieName,omitempty"`
+ BasePath string `yaml:"basePath,omitempty"` // restricts config file loading to this directory tree
Port int `yaml:"port,omitempty"`
LogLevel string `yaml:"loglevel,omitempty"`
RateLimit int `yaml:"rateLimit,omitempty"` // max requests per minute per IP (0 = use default 100)
Lists []MappingList `yaml:"lists,omitempty"`
}
+// AllowedBasePath restricts file loading to a specific directory tree.
+// When set, all file paths must resolve to a location at or below this
+// directory (or under the system temp directory). Defaults to the CWD at
+// application startup; can be overridden via the "basePath" YAML config
+// field or the KORAL_MAPPER_BASE_PATH environment variable. In Docker
+// (WORKDIR /), the default "/" naturally allows all paths.
+var AllowedBasePath string
+
+// isWithinDir checks whether absPath is at or below the given directory.
+// Uses a trailing-separator comparison to avoid prefix false positives
+// (e.g. /home/user must not match /home/username).
+func isWithinDir(absPath, dir string) bool {
+ if dir == "/" {
+ return true
+ }
+ return absPath == dir || strings.HasPrefix(absPath, dir+string(filepath.Separator))
+}
+
+// sanitizeFilePath cleans a file path, resolves it to an absolute path, and
+// (when AllowedBasePath is set) verifies it resides at or below the allowed
+// base directory or the system temp directory. This prevents path
+// traversal attacks by ensuring os.ReadFile never receives
+// unsanitized user input and cannot access files outside the application's
+// working tree.
+func sanitizeFilePath(path string) (string, error) {
+ if path == "" {
+ return "", fmt.Errorf("empty file path")
+ }
+
+ // Clean the path to remove redundant separators and resolve "." and ".."
+ cleaned := filepath.Clean(path)
+
+ // Convert to absolute path so all traversal is resolved against the CWD
+ absPath, err := filepath.Abs(cleaned)
+ if err != nil {
+ return "", fmt.Errorf("failed to resolve absolute path for '%s': %w", path, err)
+ }
+
+ // If a base path is configured, confine access to that tree or temp dir
+ if AllowedBasePath != "" {
+ base := filepath.Clean(AllowedBasePath)
+ tmpDir := filepath.Clean(os.TempDir())
+
+ if !isWithinDir(absPath, base) && !isWithinDir(absPath, tmpDir) {
+ return "", fmt.Errorf(
+ "path traversal detected: '%s' resolves to '%s' which is outside the allowed base '%s'",
+ path, absPath, base)
+ }
+ }
+
+ return absPath, nil
+}
+
// LoadFromSources loads configuration from multiple sources and merges them:
// - A main configuration file (optional) containing global settings and lists
// - Individual mapping files (optional) containing single mapping lists each
@@ -114,7 +170,11 @@
// Load main configuration file if provided
if configFile != "" {
- data, err := os.ReadFile(configFile)
+ safePath, err := sanitizeFilePath(configFile)
+ if err != nil {
+ return nil, err
+ }
+ data, err := os.ReadFile(safePath) // #nosec G304 -- path sanitized above
if err != nil {
return nil, fmt.Errorf("failed to read config file '%s': %w", configFile, err)
}
@@ -154,7 +214,11 @@
// Load individual mapping files
for _, file := range mappingFiles {
- data, err := os.ReadFile(file)
+ safePath, err := sanitizeFilePath(file)
+ if err != nil {
+ return nil, err
+ }
+ data, err := os.ReadFile(safePath) // #nosec G304 -- path sanitized above
if err != nil {
log.Error().Err(err).Str("file", file).Msg("Failed to read mapping file")
continue
@@ -195,6 +259,7 @@
Stylesheet: globalConfig.Stylesheet,
Server: globalConfig.Server,
ServiceURL: globalConfig.ServiceURL,
+ BasePath: globalConfig.BasePath,
Port: globalConfig.Port,
LogLevel: globalConfig.LogLevel,
RateLimit: globalConfig.RateLimit,
@@ -246,6 +311,7 @@
"KORAL_MAPPER_SERVICE_URL": &config.ServiceURL,
"KORAL_MAPPER_COOKIE_NAME": &config.CookieName,
"KORAL_MAPPER_LOG_LEVEL": &config.LogLevel,
+ "KORAL_MAPPER_BASE_PATH": &config.BasePath,
}
for envKey, field := range envMappings {
diff --git a/config/config_test.go b/config/config_test.go
index e2d02f3..67aa821 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -3,6 +3,7 @@
import (
"bytes"
"os"
+ "path/filepath"
"testing"
"github.com/KorAP/Koral-Mapper/ast"
@@ -1061,6 +1062,36 @@
})
}
+func TestBasePathEnvOverride(t *testing.T) {
+ t.Setenv("KORAL_MAPPER_BASE_PATH", "/custom/base/path")
+
+ cfg := &MappingConfig{BasePath: "from-config"}
+ ApplyEnvOverrides(cfg)
+
+ assert.Equal(t, "/custom/base/path", cfg.BasePath)
+}
+
+func TestBasePathFromYAML(t *testing.T) {
+ content := `
+basePath: "/opt/koralmapper"
+lists:
+ - id: test-mapper
+ mappings:
+ - "[A] <> [B]"
+`
+ tmpfile, err := os.CreateTemp("", "config-basepath-*.yaml")
+ require.NoError(t, err)
+ defer os.Remove(tmpfile.Name())
+
+ _, err = tmpfile.WriteString(content)
+ require.NoError(t, err)
+ require.NoError(t, tmpfile.Close())
+
+ cfg, err := LoadFromSources(tmpfile.Name(), nil)
+ require.NoError(t, err)
+ assert.Equal(t, "/opt/koralmapper", cfg.BasePath)
+}
+
func TestEnvOverridesInLoadFromSources(t *testing.T) {
envKeys := []string{
"KORAL_MAPPER_SERVER",
@@ -1229,3 +1260,186 @@
assert.Equal(t, 200, cfg.RateLimit,
"KORAL_MAPPER_RATE_LIMIT env var should override YAML value")
}
+
+func TestSanitizeFilePathRejectsOutsideBase(t *testing.T) {
+ // Set base to a specific directory and verify paths outside are rejected
+ tmpDir, err := os.MkdirTemp("", "koral-base-*")
+ require.NoError(t, err)
+ defer os.RemoveAll(tmpDir)
+
+ origBase := AllowedBasePath
+ defer func() { AllowedBasePath = origBase }()
+ AllowedBasePath = tmpDir
+
+ tests := []struct {
+ name string
+ input string
+ wantErr bool
+ }{
+ {
+ name: "Path within base is accepted",
+ input: filepath.Join(tmpDir, "config.yaml"),
+ wantErr: false,
+ },
+ {
+ name: "Path outside base is rejected",
+ input: "/etc/passwd",
+ wantErr: true,
+ },
+ {
+ name: "Traversal escaping base and tmp is rejected",
+ input: "/etc/passwd",
+ wantErr: true,
+ },
+ {
+ name: "Empty path is rejected",
+ input: "",
+ wantErr: true,
+ },
+ {
+ name: "Subdirectory within base is accepted",
+ input: filepath.Join(tmpDir, "sub", "dir", "file.yaml"),
+ wantErr: false,
+ },
+ {
+ name: "Relative path within base is rejected when CWD differs",
+ input: "config.yaml",
+ wantErr: true, // resolves against CWD, not base
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := sanitizeFilePath(tt.input)
+ if tt.wantErr {
+ require.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ assert.True(t, filepath.IsAbs(result),
+ "sanitized path should be absolute, got: %s", result)
+ assert.NotContains(t, result, "..")
+ })
+ }
+}
+
+func TestSanitizeFilePathTraversalToPasswd(t *testing.T) {
+ // Verify /etc/passwd cannot be accessed via traversal
+ cwd, err := os.Getwd()
+ require.NoError(t, err)
+
+ origBase := AllowedBasePath
+ defer func() { AllowedBasePath = origBase }()
+ AllowedBasePath = cwd
+
+ _, err = sanitizeFilePath("../../../etc/passwd")
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "path traversal detected")
+}
+
+func TestSanitizeFilePathWithDockerRoot(t *testing.T) {
+ // In Docker the WORKDIR is "/" -- all absolute paths should be valid
+ origBase := AllowedBasePath
+ defer func() { AllowedBasePath = origBase }()
+ AllowedBasePath = "/"
+
+ result, err := sanitizeFilePath("/mappings/stts-upos.yaml")
+ require.NoError(t, err)
+ assert.Equal(t, "/mappings/stts-upos.yaml", result)
+
+ // Even deeply nested paths work when base is /
+ result, err = sanitizeFilePath("/etc/ssl/certs/ca-certificates.crt")
+ require.NoError(t, err)
+ assert.Equal(t, "/etc/ssl/certs/ca-certificates.crt", result)
+}
+
+func TestSanitizeFilePathPrefixFalsePositive(t *testing.T) {
+ // Ensure /home/user does not match /home/username
+ origBase := AllowedBasePath
+ defer func() { AllowedBasePath = origBase }()
+ AllowedBasePath = "/home/user"
+
+ _, err := sanitizeFilePath("/home/username/secret.yaml")
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "path traversal detected")
+}
+
+func TestLoadFromSourcesRejectsTraversal(t *testing.T) {
+ origBase := AllowedBasePath
+ defer func() { AllowedBasePath = origBase }()
+
+ cwd, err := os.Getwd()
+ require.NoError(t, err)
+ AllowedBasePath = cwd
+
+ // Config file traversal should be rejected
+ _, err = LoadFromSources("../../../etc/passwd", nil)
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "path traversal detected")
+
+ // Mapping file traversal should be rejected
+ _, err = LoadFromSources("", []string{"../../../etc/passwd"})
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "path traversal detected")
+}
+
+func TestValidPathsStillWork(t *testing.T) {
+ content := `
+id: test-mapper
+mappings:
+ - "[A] <> [B]"
+`
+ tmpDir, err := os.MkdirTemp("", "koral-test-*")
+ require.NoError(t, err)
+ defer os.RemoveAll(tmpDir)
+
+ origBase := AllowedBasePath
+ defer func() { AllowedBasePath = origBase }()
+ AllowedBasePath = tmpDir
+
+ subDir := filepath.Join(tmpDir, "subdir")
+ require.NoError(t, os.Mkdir(subDir, 0755))
+
+ tmpfile, err := os.CreateTemp(subDir, "mapping-*.yaml")
+ require.NoError(t, err)
+
+ _, err = tmpfile.WriteString(content)
+ require.NoError(t, err)
+ require.NoError(t, tmpfile.Close())
+
+ cfg, err := LoadFromSources("", []string{tmpfile.Name()})
+ require.NoError(t, err)
+ require.Len(t, cfg.Lists, 1)
+ assert.Equal(t, "test-mapper", cfg.Lists[0].ID)
+}
+
+func TestRelativePathWithTraversalWithinBase(t *testing.T) {
+ // Paths with ".." that still resolve within the base should work
+ content := `
+id: traversal-test-mapper
+mappings:
+ - "[A] <> [B]"
+`
+ tmpDir, err := os.MkdirTemp("", "koral-traversal-*")
+ require.NoError(t, err)
+ defer os.RemoveAll(tmpDir)
+
+ origBase := AllowedBasePath
+ defer func() { AllowedBasePath = origBase }()
+ AllowedBasePath = tmpDir
+
+ // Create file at tmpDir/config.yaml
+ configPath := filepath.Join(tmpDir, "config.yaml")
+ require.NoError(t, os.WriteFile(configPath, []byte(content), 0644))
+
+ // Reference via a traversal path: tmpDir/subdir/../config.yaml
+ // This resolves to tmpDir/config.yaml which is within the base
+ subDir := filepath.Join(tmpDir, "subdir")
+ require.NoError(t, os.Mkdir(subDir, 0755))
+ traversalPath := filepath.Join(subDir, "..", "config.yaml")
+
+ cfg, err := LoadFromSources("", []string{traversalPath})
+ require.NoError(t, err)
+ require.Len(t, cfg.Lists, 1)
+ assert.Equal(t, "traversal-test-mapper", cfg.Lists[0].ID)
+}