Sanitize file paths before config loading

Change-Id: Id27ef65ec0575fd4bc9ddfc4927f4f3e685eced2
diff --git a/README.md b/README.md
index 0b71410..780f3bd 100644
--- a/README.md
+++ b/README.md
@@ -65,6 +65,11 @@
 # Optional: Maximum requests per minute per IP for rate limiting (default: 100)
 rateLimit: 100
 
+# Optional: Base path for file loading confinement (default: current working directory).
+# All config and mapping file paths must resolve within this directory or /tmp.
+# Set to "/" to allow loading from anywhere on the filesystem.
+basePath: "/opt/koralmapper"
+
 # Optional: Mapping lists (same format as individual mapping files)
 lists:
   - id: mapping-list-id
@@ -94,7 +99,7 @@
 
 Command line arguments take precedence over configuration file values:
 
-The `sdk`, `stylesheet`, `server`, `port`, and `loglevel` fields in the main configuration file are optional and override the following default values:
+The `sdk`, `stylesheet`, `server`, `port`, `loglevel`, and `basePath` fields in the main configuration file are optional and override the following default values:
 
 - **`sdk`**: Custom SDK JavaScript file URL (default: `https://korap.ids-mannheim.de/js/korap-plugin-latest.js`)
 - **`stylesheet`**: Kalamar stylesheet URL for the config page (default: `https://korap.ids-mannheim.de/css/kalamar-plugin-latest.css`)
@@ -103,6 +108,7 @@
 - **`loglevel`**: Log level (default: `warn`)
 - **`serviceURL`**: Service URL of the KoralMapper (default: `https://korap.ids-mannheim.de/plugin/koralmapper`)
 - **`rateLimit`**: Maximum number of requests per minute per IP address (default: `100`). When the limit is exceeded, the server responds with HTTP 429 (Too Many Requests).
+- **`basePath`**: Directory tree for file loading confinement (default: current working directory). Configuration and mapping files must resolve within this path or the system temp directory. Set to `"/"` to disable confinement. This prevents path traversal attacks (CWE-22).
 
 These values are applied during configuration parsing. When using only individual mapping files (`-m` flags), default values are used unless overridden by command line arguments.
 
@@ -119,6 +125,7 @@
 - `KORAL_MAPPER_LOG_LEVEL`: Overrides `loglevel`
 - `KORAL_MAPPER_PORT`: Overrides `port` (integer)
 - `KORAL_MAPPER_RATE_LIMIT`: Overrides `rateLimit` (integer, requests per minute per IP)
+- `KORAL_MAPPER_BASE_PATH`: Overrides `basePath` (directory path for file loading confinement)
 
 Environment variable values take precedence over values from the configuration file.
 
diff --git a/cmd/koralmapper/main.go b/cmd/koralmapper/main.go
index 9fc6f5d..eac2984 100644
--- a/cmd/koralmapper/main.go
+++ b/cmd/koralmapper/main.go
@@ -222,6 +222,16 @@
 }
 
 func main() {
+	// Confine config file loading to the current working directory tree
+	// (path traversal prevention). Can be overridden via the "basePath"
+	// YAML field or the KORAL_MAPPER_BASE_PATH environment variable.
+	// In Docker (WORKDIR /), the default "/" naturally allows all paths.
+	cwd, err := os.Getwd()
+	if err != nil {
+		log.Fatal().Err(err).Msg("Failed to determine working directory")
+	}
+	config.AllowedBasePath = cwd
+
 	// Parse command line flags
 	cfg := parseConfig()
 
@@ -242,6 +252,11 @@
 		log.Fatal().Err(err).Msg("Failed to load configuration")
 	}
 
+	// Apply basePath from config/env if specified (overrides CWD default)
+	if yamlConfig.BasePath != "" {
+		config.AllowedBasePath = yamlConfig.BasePath
+	}
+
 	finalPort := yamlConfig.Port
 	finalLogLevel := yamlConfig.LogLevel
 
diff --git a/config/config.go b/config/config.go
index 2ad6c43..7fa600e 100644
--- a/config/config.go
+++ b/config/config.go
@@ -3,7 +3,9 @@
 import (
 	"fmt"
 	"os"
+	"path/filepath"
 	"strconv"
+	"strings"
 
 	"github.com/KorAP/Koral-Mapper/ast"
 	"github.com/KorAP/Koral-Mapper/parser"
@@ -95,12 +97,66 @@
 	Server     string        `yaml:"server,omitempty"`
 	ServiceURL string        `yaml:"serviceURL,omitempty"`
 	CookieName string        `yaml:"cookieName,omitempty"`
+	BasePath   string        `yaml:"basePath,omitempty"` // restricts config file loading to this directory tree
 	Port       int           `yaml:"port,omitempty"`
 	LogLevel   string        `yaml:"loglevel,omitempty"`
 	RateLimit  int           `yaml:"rateLimit,omitempty"` // max requests per minute per IP (0 = use default 100)
 	Lists      []MappingList `yaml:"lists,omitempty"`
 }
 
+// AllowedBasePath restricts file loading to a specific directory tree.
+// When set, all file paths must resolve to a location at or below this
+// directory (or under the system temp directory). Defaults to the CWD at
+// application startup; can be overridden via the "basePath" YAML config
+// field or the KORAL_MAPPER_BASE_PATH environment variable. In Docker
+// (WORKDIR /), the default "/" naturally allows all paths.
+var AllowedBasePath string
+
+// isWithinDir checks whether absPath is at or below the given directory.
+// Uses a trailing-separator comparison to avoid prefix false positives
+// (e.g. /home/user must not match /home/username).
+func isWithinDir(absPath, dir string) bool {
+	if dir == "/" {
+		return true
+	}
+	return absPath == dir || strings.HasPrefix(absPath, dir+string(filepath.Separator))
+}
+
+// sanitizeFilePath cleans a file path, resolves it to an absolute path, and
+// (when AllowedBasePath is set) verifies it resides at or below the allowed
+// base directory or the system temp directory. This prevents path
+// traversal attacks by ensuring os.ReadFile never receives
+// unsanitized user input and cannot access files outside the application's
+// working tree.
+func sanitizeFilePath(path string) (string, error) {
+	if path == "" {
+		return "", fmt.Errorf("empty file path")
+	}
+
+	// Clean the path to remove redundant separators and resolve "." and ".."
+	cleaned := filepath.Clean(path)
+
+	// Convert to absolute path so all traversal is resolved against the CWD
+	absPath, err := filepath.Abs(cleaned)
+	if err != nil {
+		return "", fmt.Errorf("failed to resolve absolute path for '%s': %w", path, err)
+	}
+
+	// If a base path is configured, confine access to that tree or temp dir
+	if AllowedBasePath != "" {
+		base := filepath.Clean(AllowedBasePath)
+		tmpDir := filepath.Clean(os.TempDir())
+
+		if !isWithinDir(absPath, base) && !isWithinDir(absPath, tmpDir) {
+			return "", fmt.Errorf(
+				"path traversal detected: '%s' resolves to '%s' which is outside the allowed base '%s'",
+				path, absPath, base)
+		}
+	}
+
+	return absPath, nil
+}
+
 // LoadFromSources loads configuration from multiple sources and merges them:
 // - A main configuration file (optional) containing global settings and lists
 // - Individual mapping files (optional) containing single mapping lists each
@@ -114,7 +170,11 @@
 
 	// Load main configuration file if provided
 	if configFile != "" {
-		data, err := os.ReadFile(configFile)
+		safePath, err := sanitizeFilePath(configFile)
+		if err != nil {
+			return nil, err
+		}
+		data, err := os.ReadFile(safePath) // #nosec G304 -- path sanitized above
 		if err != nil {
 			return nil, fmt.Errorf("failed to read config file '%s': %w", configFile, err)
 		}
@@ -154,7 +214,11 @@
 
 	// Load individual mapping files
 	for _, file := range mappingFiles {
-		data, err := os.ReadFile(file)
+		safePath, err := sanitizeFilePath(file)
+		if err != nil {
+			return nil, err
+		}
+		data, err := os.ReadFile(safePath) // #nosec G304 -- path sanitized above
 		if err != nil {
 			log.Error().Err(err).Str("file", file).Msg("Failed to read mapping file")
 			continue
@@ -195,6 +259,7 @@
 		Stylesheet: globalConfig.Stylesheet,
 		Server:     globalConfig.Server,
 		ServiceURL: globalConfig.ServiceURL,
+		BasePath:   globalConfig.BasePath,
 		Port:       globalConfig.Port,
 		LogLevel:   globalConfig.LogLevel,
 		RateLimit:  globalConfig.RateLimit,
@@ -246,6 +311,7 @@
 		"KORAL_MAPPER_SERVICE_URL": &config.ServiceURL,
 		"KORAL_MAPPER_COOKIE_NAME": &config.CookieName,
 		"KORAL_MAPPER_LOG_LEVEL":   &config.LogLevel,
+		"KORAL_MAPPER_BASE_PATH":   &config.BasePath,
 	}
 
 	for envKey, field := range envMappings {
diff --git a/config/config_test.go b/config/config_test.go
index e2d02f3..67aa821 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -3,6 +3,7 @@
 import (
 	"bytes"
 	"os"
+	"path/filepath"
 	"testing"
 
 	"github.com/KorAP/Koral-Mapper/ast"
@@ -1061,6 +1062,36 @@
 	})
 }
 
+func TestBasePathEnvOverride(t *testing.T) {
+	t.Setenv("KORAL_MAPPER_BASE_PATH", "/custom/base/path")
+
+	cfg := &MappingConfig{BasePath: "from-config"}
+	ApplyEnvOverrides(cfg)
+
+	assert.Equal(t, "/custom/base/path", cfg.BasePath)
+}
+
+func TestBasePathFromYAML(t *testing.T) {
+	content := `
+basePath: "/opt/koralmapper"
+lists:
+  - id: test-mapper
+    mappings:
+      - "[A] <> [B]"
+`
+	tmpfile, err := os.CreateTemp("", "config-basepath-*.yaml")
+	require.NoError(t, err)
+	defer os.Remove(tmpfile.Name())
+
+	_, err = tmpfile.WriteString(content)
+	require.NoError(t, err)
+	require.NoError(t, tmpfile.Close())
+
+	cfg, err := LoadFromSources(tmpfile.Name(), nil)
+	require.NoError(t, err)
+	assert.Equal(t, "/opt/koralmapper", cfg.BasePath)
+}
+
 func TestEnvOverridesInLoadFromSources(t *testing.T) {
 	envKeys := []string{
 		"KORAL_MAPPER_SERVER",
@@ -1229,3 +1260,186 @@
 	assert.Equal(t, 200, cfg.RateLimit,
 		"KORAL_MAPPER_RATE_LIMIT env var should override YAML value")
 }
+
+func TestSanitizeFilePathRejectsOutsideBase(t *testing.T) {
+	// Set base to a specific directory and verify paths outside are rejected
+	tmpDir, err := os.MkdirTemp("", "koral-base-*")
+	require.NoError(t, err)
+	defer os.RemoveAll(tmpDir)
+
+	origBase := AllowedBasePath
+	defer func() { AllowedBasePath = origBase }()
+	AllowedBasePath = tmpDir
+
+	tests := []struct {
+		name    string
+		input   string
+		wantErr bool
+	}{
+		{
+			name:    "Path within base is accepted",
+			input:   filepath.Join(tmpDir, "config.yaml"),
+			wantErr: false,
+		},
+		{
+			name:    "Path outside base is rejected",
+			input:   "/etc/passwd",
+			wantErr: true,
+		},
+		{
+			name:    "Traversal escaping base and tmp is rejected",
+			input:   "/etc/passwd",
+			wantErr: true,
+		},
+		{
+			name:    "Empty path is rejected",
+			input:   "",
+			wantErr: true,
+		},
+		{
+			name:    "Subdirectory within base is accepted",
+			input:   filepath.Join(tmpDir, "sub", "dir", "file.yaml"),
+			wantErr: false,
+		},
+		{
+			name:    "Relative path within base is rejected when CWD differs",
+			input:   "config.yaml",
+			wantErr: true, // resolves against CWD, not base
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := sanitizeFilePath(tt.input)
+			if tt.wantErr {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			assert.True(t, filepath.IsAbs(result),
+				"sanitized path should be absolute, got: %s", result)
+			assert.NotContains(t, result, "..")
+		})
+	}
+}
+
+func TestSanitizeFilePathTraversalToPasswd(t *testing.T) {
+	// Verify /etc/passwd cannot be accessed via traversal
+	cwd, err := os.Getwd()
+	require.NoError(t, err)
+
+	origBase := AllowedBasePath
+	defer func() { AllowedBasePath = origBase }()
+	AllowedBasePath = cwd
+
+	_, err = sanitizeFilePath("../../../etc/passwd")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "path traversal detected")
+}
+
+func TestSanitizeFilePathWithDockerRoot(t *testing.T) {
+	// In Docker the WORKDIR is "/" -- all absolute paths should be valid
+	origBase := AllowedBasePath
+	defer func() { AllowedBasePath = origBase }()
+	AllowedBasePath = "/"
+
+	result, err := sanitizeFilePath("/mappings/stts-upos.yaml")
+	require.NoError(t, err)
+	assert.Equal(t, "/mappings/stts-upos.yaml", result)
+
+	// Even deeply nested paths work when base is /
+	result, err = sanitizeFilePath("/etc/ssl/certs/ca-certificates.crt")
+	require.NoError(t, err)
+	assert.Equal(t, "/etc/ssl/certs/ca-certificates.crt", result)
+}
+
+func TestSanitizeFilePathPrefixFalsePositive(t *testing.T) {
+	// Ensure /home/user does not match /home/username
+	origBase := AllowedBasePath
+	defer func() { AllowedBasePath = origBase }()
+	AllowedBasePath = "/home/user"
+
+	_, err := sanitizeFilePath("/home/username/secret.yaml")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "path traversal detected")
+}
+
+func TestLoadFromSourcesRejectsTraversal(t *testing.T) {
+	origBase := AllowedBasePath
+	defer func() { AllowedBasePath = origBase }()
+
+	cwd, err := os.Getwd()
+	require.NoError(t, err)
+	AllowedBasePath = cwd
+
+	// Config file traversal should be rejected
+	_, err = LoadFromSources("../../../etc/passwd", nil)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "path traversal detected")
+
+	// Mapping file traversal should be rejected
+	_, err = LoadFromSources("", []string{"../../../etc/passwd"})
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "path traversal detected")
+}
+
+func TestValidPathsStillWork(t *testing.T) {
+	content := `
+id: test-mapper
+mappings:
+  - "[A] <> [B]"
+`
+	tmpDir, err := os.MkdirTemp("", "koral-test-*")
+	require.NoError(t, err)
+	defer os.RemoveAll(tmpDir)
+
+	origBase := AllowedBasePath
+	defer func() { AllowedBasePath = origBase }()
+	AllowedBasePath = tmpDir
+
+	subDir := filepath.Join(tmpDir, "subdir")
+	require.NoError(t, os.Mkdir(subDir, 0755))
+
+	tmpfile, err := os.CreateTemp(subDir, "mapping-*.yaml")
+	require.NoError(t, err)
+
+	_, err = tmpfile.WriteString(content)
+	require.NoError(t, err)
+	require.NoError(t, tmpfile.Close())
+
+	cfg, err := LoadFromSources("", []string{tmpfile.Name()})
+	require.NoError(t, err)
+	require.Len(t, cfg.Lists, 1)
+	assert.Equal(t, "test-mapper", cfg.Lists[0].ID)
+}
+
+func TestRelativePathWithTraversalWithinBase(t *testing.T) {
+	// Paths with ".." that still resolve within the base should work
+	content := `
+id: traversal-test-mapper
+mappings:
+  - "[A] <> [B]"
+`
+	tmpDir, err := os.MkdirTemp("", "koral-traversal-*")
+	require.NoError(t, err)
+	defer os.RemoveAll(tmpDir)
+
+	origBase := AllowedBasePath
+	defer func() { AllowedBasePath = origBase }()
+	AllowedBasePath = tmpDir
+
+	// Create file at tmpDir/config.yaml
+	configPath := filepath.Join(tmpDir, "config.yaml")
+	require.NoError(t, os.WriteFile(configPath, []byte(content), 0644))
+
+	// Reference via a traversal path: tmpDir/subdir/../config.yaml
+	// This resolves to tmpDir/config.yaml which is within the base
+	subDir := filepath.Join(tmpDir, "subdir")
+	require.NoError(t, os.Mkdir(subDir, 0755))
+	traversalPath := filepath.Join(subDir, "..", "config.yaml")
+
+	cfg, err := LoadFromSources("", []string{traversalPath})
+	require.NoError(t, err)
+	require.Len(t, cfg.Lists, 1)
+	assert.Equal(t, "traversal-test-mapper", cfg.Lists[0].ID)
+}