Do not be root in docker and fix model caching
Change-Id: I3ad60ef259b35579902d68a0f949184d87d0067b
diff --git a/Dockerfile b/Dockerfile
index 3f7be4d..0aa6026 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -64,8 +64,18 @@
ENV TEMP="/app/tmp"
ENV TMP="/app/tmp"
-# Make entrypoint executable
-RUN chmod +x /docker-entrypoint.sh
+# Add non-root user
+RUN groupadd -r appuser && useradd -r -g appuser appuser
+
+# Make entrypoint executable and set permissions
+RUN chmod +x /docker-entrypoint.sh && \
+ chmod +x /app/download_with_progress.py
+
+# Change ownership of app directories to appuser
+RUN chown -R appuser:appuser /app /local /docker-entrypoint.sh
+
+# Switch to non-root user
+USER appuser
# Define the entry point
ENTRYPOINT ["/docker-entrypoint.sh"]
diff --git a/Dockerfile.with-models b/Dockerfile.with-models
index 39c7bd4..f6f64c5 100644
--- a/Dockerfile.with-models
+++ b/Dockerfile.with-models
@@ -66,6 +66,7 @@
COPY systems /app/systems
COPY my_utils /app/my_utils
COPY docker-entrypoint.sh /docker-entrypoint.sh
+COPY download_with_progress.py /app/download_with_progress.py
# Set environment variables
ENV VIRTUAL_ENV=/app/venv
@@ -89,8 +90,18 @@
ENV TEMP="/app/tmp"
ENV TMP="/app/tmp"
-# Make entrypoint executable
-RUN chmod +x /docker-entrypoint.sh
+# Add non-root user
+RUN groupadd -r appuser && useradd -r -g appuser appuser
+
+# Make entrypoint executable and set permissions
+RUN chmod +x /docker-entrypoint.sh && \
+ chmod +x /app/download_with_progress.py
+
+# Change ownership of app directories to appuser
+RUN chown -R appuser:appuser /app /local /docker-entrypoint.sh
+
+# Switch to non-root user
+USER appuser
# Define the entry point
ENTRYPOINT ["/docker-entrypoint.sh"]
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
index 1ada4a7..5fb9b5e 100755
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -96,16 +96,29 @@
if [ -w "$MODEL_DIR" ]; then
# Download and install to /local/models with progress
if python /app/download_with_progress.py "$model_name" 2>&1 | tee /tmp/spacy_download.log >&2; then
- # Try to move the installed model to /local/models for persistence
+ # Extract and flatten the model structure for persistence
SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])")
INSTALLED_MODEL="$SITE_PACKAGES/$model_name"
if [ -d "$INSTALLED_MODEL" ]; then
- echo "Moving model to $MODEL_PATH for persistence..." >&2
- mv "$INSTALLED_MODEL" "$MODEL_PATH" 2>/dev/null || true
- # Create symlink back
- ln -sf "$MODEL_PATH" "$INSTALLED_MODEL" 2>/dev/null || true
- echo "Model saved to $MODEL_PATH" >&2
+ echo "Extracting model to $MODEL_PATH for persistence..." >&2
+
+ # Find the actual model directory (e.g., de_core_news_lg-3.8.0)
+ VERSIONED_DIR=$(find "$INSTALLED_MODEL" -maxdepth 1 -type d -name "${model_name}-*" | head -1)
+
+ if [ -n "$VERSIONED_DIR" ] && [ -f "$VERSIONED_DIR/config.cfg" ]; then
+ # Copy the versioned model directory contents to MODEL_PATH
+ mkdir -p "$MODEL_PATH"
+ cp -r "$VERSIONED_DIR"/* "$MODEL_PATH/"
+ # Set permissions so user can modify the model files
+ chmod -R a+rwX "$MODEL_PATH" 2>/dev/null || true
+ echo "Model extracted to $MODEL_PATH" >&2
+ else
+ # Fallback: just move the whole package
+ echo "Warning: Could not find versioned model directory, moving package as-is" >&2
+ mv "$INSTALLED_MODEL" "$MODEL_PATH" 2>/dev/null || true
+ chmod -R a+rwX "$MODEL_PATH" 2>/dev/null || true
+ fi
fi
return 0
else
diff --git a/preload-models.sh b/preload-models.sh
index ad770f8..d2523e8 100755
--- a/preload-models.sh
+++ b/preload-models.sh
@@ -71,6 +71,10 @@
"
if [ -d "$TARGET_DIR/$MODEL_NAME" ]; then
+ # Set permissions so all users can read/write/execute
+ echo "Setting permissions..."
+ chmod -R a+rwX "$TARGET_DIR/$MODEL_NAME"
+
echo ""
echo "✓ Model $MODEL_NAME successfully preloaded to $TARGET_DIR/$MODEL_NAME"
echo ""