| # Dockerfile with pre-installed models |
| # Build: docker build -f Dockerfile.with-models -t korap/conllu-spacy:with-models . |
| |
| # Multi-stage Docker build for size optimization |
| FROM python:3.12-slim-bookworm AS builder |
| |
| # Install build dependencies |
| RUN apt-get update && apt-get install -y \ |
| gcc \ |
| g++ \ |
| && rm -rf /var/lib/apt/lists/* |
| |
| # Set environment variables |
| ENV PIP_CACHE_DIR="/tmp/.cache/pip" \ |
| PYTHONPATH="PYTHONPATH:." |
| ENV VIRTUAL_ENV=/app/venv |
| ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| |
| # Set the working directory and copy requirements |
| WORKDIR /app |
| COPY requirements.txt /app/requirements.txt |
| |
| # Install Python dependencies in virtual environment |
| RUN python -m venv venv |
| RUN venv/bin/pip install --upgrade pip |
| RUN venv/bin/pip install -r requirements.txt |
| |
| # Download spaCy models to /local/models |
| RUN mkdir -p /local/models |
| |
| # Download the default model (de_core_news_lg) |
| RUN venv/bin/python -m spacy download de_core_news_lg --no-cache-dir |
| |
| # Move model to /local/models for persistence |
| RUN MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_lg')") && \ |
| mv "$MODEL_PATH" /local/models/de_core_news_lg |
| |
| # Optionally download additional models |
| # Uncomment to include medium model: |
| # RUN venv/bin/python -m spacy download de_core_news_md --no-cache-dir && \ |
| # MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_md')") && \ |
| # mv "$MODEL_PATH" /local/models/de_core_news_md |
| |
| # Uncomment to include small model: |
| # RUN venv/bin/python -m spacy download de_core_news_sm --no-cache-dir && \ |
| # MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_sm')") && \ |
| # mv "$MODEL_PATH" /local/models/de_core_news_sm |
| |
| # Production stage |
| FROM python:3.12-slim-bookworm AS production |
| |
| # Install minimal runtime dependencies |
| RUN apt-get update && apt-get install -y \ |
| wget \ |
| && rm -rf /var/lib/apt/lists/* \ |
| && apt-get clean |
| |
| # Add non-root user FIRST (before copying files) |
| RUN groupadd -r appuser && useradd -r -g appuser appuser |
| |
| # Copy virtual environment from builder and set ownership immediately |
| COPY --from=builder --chown=appuser:appuser /app/venv /app/venv |
| |
| # Copy pre-downloaded models with correct ownership |
| COPY --from=builder --chown=appuser:appuser /local/models /local/models |
| |
| # Copy application code with correct ownership |
| COPY --chown=appuser:appuser lib /app/lib |
| COPY --chown=appuser:appuser systems /app/systems |
| COPY --chown=appuser:appuser my_utils /app/my_utils |
| COPY --chown=appuser:appuser download_with_progress.py /app/download_with_progress.py |
| COPY --chown=appuser:appuser list_spacy_models.py /app/list_spacy_models.py |
| COPY --chown=appuser:appuser docker-entrypoint.sh /docker-entrypoint.sh |
| |
| # Set environment variables |
| ENV VIRTUAL_ENV=/app/venv |
| ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| ENV PYTHONPATH="PYTHONPATH:." |
| |
| # spaCy processing configuration |
| ENV SPACY_USE_DEPENDENCIES="True" |
| ENV SPACY_USE_GERMALEMMA="True" |
| ENV SPACY_PARSE_TIMEOUT="30" |
| ENV SPACY_MAX_SENTENCE_LENGTH="500" |
| ENV SPACY_N_PROCESS="10" |
| ENV SPACY_BATCH_SIZE="2000" |
| ENV SPACY_CHUNK_SIZE="20000" |
| |
| WORKDIR /app |
| |
| # Create directories with correct ownership |
| RUN mkdir -p "/app/logs" "/app/tmp" && \ |
| chown -R appuser:appuser "/app/logs" "/app/tmp" && \ |
| chmod +x /docker-entrypoint.sh && \ |
| chmod +x /app/download_with_progress.py && \ |
| chmod +x /app/list_spacy_models.py |
| |
| # Set temp directories to use app directory instead of system /tmp |
| ENV TMPDIR="/app/tmp" |
| ENV TEMP="/app/tmp" |
| ENV TMP="/app/tmp" |
| |
| # Switch to non-root user |
| USER appuser |
| |
| # Define the entry point |
| ENTRYPOINT ["/docker-entrypoint.sh"] |