blob: 96a0775acb7dc227c74ed559c4c5a85245676461 [file] [log] [blame]
Marc Kupietz86044852025-11-29 10:19:03 +01001# Dockerfile with pre-installed models
2# Build: docker build -f Dockerfile.with-models -t korap/conllu-spacy:with-models .
3
4# Multi-stage Docker build for size optimization
5FROM python:3.12-slim-bookworm AS builder
6
7# Install build dependencies
8RUN apt-get update && apt-get install -y \
9 gcc \
10 g++ \
11 && rm -rf /var/lib/apt/lists/*
12
13# Set environment variables
14ENV PIP_CACHE_DIR="/tmp/.cache/pip" \
15 PYTHONPATH="PYTHONPATH:."
16ENV VIRTUAL_ENV=/app/venv
17ENV PATH="$VIRTUAL_ENV/bin:$PATH"
18
19# Set the working directory and copy requirements
20WORKDIR /app
21COPY requirements.txt /app/requirements.txt
22
23# Install Python dependencies in virtual environment
24RUN python -m venv venv
25RUN venv/bin/pip install --upgrade pip
26RUN venv/bin/pip install -r requirements.txt
27
28# Download spaCy models to /local/models
29RUN mkdir -p /local/models
30
31# Download the default model (de_core_news_lg)
32RUN venv/bin/python -m spacy download de_core_news_lg --no-cache-dir
33
34# Move model to /local/models for persistence
35RUN MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_lg')") && \
36 mv "$MODEL_PATH" /local/models/de_core_news_lg
37
38# Optionally download additional models
39# Uncomment to include medium model:
40# RUN venv/bin/python -m spacy download de_core_news_md --no-cache-dir && \
41# MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_md')") && \
42# mv "$MODEL_PATH" /local/models/de_core_news_md
43
44# Uncomment to include small model:
45# RUN venv/bin/python -m spacy download de_core_news_sm --no-cache-dir && \
46# MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_sm')") && \
47# mv "$MODEL_PATH" /local/models/de_core_news_sm
48
49# Production stage
50FROM python:3.12-slim-bookworm AS production
51
52# Install minimal runtime dependencies
53RUN apt-get update && apt-get install -y \
54 wget \
55 && rm -rf /var/lib/apt/lists/* \
56 && apt-get clean
57
Marc Kupietz9baa27a2025-11-29 15:32:16 +010058# Add non-root user FIRST (before copying files)
59RUN groupadd -r appuser && useradd -r -g appuser appuser
Marc Kupietz86044852025-11-29 10:19:03 +010060
Marc Kupietz9baa27a2025-11-29 15:32:16 +010061# Copy virtual environment from builder and set ownership immediately
62COPY --from=builder --chown=appuser:appuser /app/venv /app/venv
Marc Kupietz86044852025-11-29 10:19:03 +010063
Marc Kupietz9baa27a2025-11-29 15:32:16 +010064# Copy pre-downloaded models with correct ownership
65COPY --from=builder --chown=appuser:appuser /local/models /local/models
66
67# Copy application code with correct ownership
68COPY --chown=appuser:appuser lib /app/lib
69COPY --chown=appuser:appuser systems /app/systems
70COPY --chown=appuser:appuser my_utils /app/my_utils
71COPY --chown=appuser:appuser download_with_progress.py /app/download_with_progress.py
72COPY --chown=appuser:appuser list_spacy_models.py /app/list_spacy_models.py
73COPY --chown=appuser:appuser docker-entrypoint.sh /docker-entrypoint.sh
Marc Kupietz86044852025-11-29 10:19:03 +010074
75# Set environment variables
76ENV VIRTUAL_ENV=/app/venv
77ENV PATH="$VIRTUAL_ENV/bin:$PATH"
78ENV PYTHONPATH="PYTHONPATH:."
79
80# spaCy processing configuration
81ENV SPACY_USE_DEPENDENCIES="True"
82ENV SPACY_USE_GERMALEMMA="True"
83ENV SPACY_PARSE_TIMEOUT="30"
84ENV SPACY_MAX_SENTENCE_LENGTH="500"
85ENV SPACY_N_PROCESS="10"
86ENV SPACY_BATCH_SIZE="2000"
87ENV SPACY_CHUNK_SIZE="20000"
88
89WORKDIR /app
Marc Kupietz9baa27a2025-11-29 15:32:16 +010090
91# Create directories with correct ownership
92RUN mkdir -p "/app/logs" "/app/tmp" && \
93 chown -R appuser:appuser "/app/logs" "/app/tmp" && \
94 chmod +x /docker-entrypoint.sh && \
95 chmod +x /app/download_with_progress.py && \
96 chmod +x /app/list_spacy_models.py
Marc Kupietz86044852025-11-29 10:19:03 +010097
98# Set temp directories to use app directory instead of system /tmp
99ENV TMPDIR="/app/tmp"
100ENV TEMP="/app/tmp"
101ENV TMP="/app/tmp"
102
Marc Kupietzc75ae7c2025-11-29 10:41:26 +0100103# Switch to non-root user
104USER appuser
Marc Kupietz86044852025-11-29 10:19:03 +0100105
106# Define the entry point
107ENTRYPOINT ["/docker-entrypoint.sh"]