| Marc Kupietz | 8604485 | 2025-11-29 10:19:03 +0100 | [diff] [blame] | 1 | # Dockerfile with pre-installed models |
| 2 | # Build: docker build -f Dockerfile.with-models -t korap/conllu-spacy:with-models . |
| 3 | |
| 4 | # Multi-stage Docker build for size optimization |
| 5 | FROM python:3.12-slim-bookworm AS builder |
| 6 | |
| 7 | # Install build dependencies |
| 8 | RUN apt-get update && apt-get install -y \ |
| 9 | gcc \ |
| 10 | g++ \ |
| 11 | && rm -rf /var/lib/apt/lists/* |
| 12 | |
| 13 | # Set environment variables |
| 14 | ENV PIP_CACHE_DIR="/tmp/.cache/pip" \ |
| 15 | PYTHONPATH="PYTHONPATH:." |
| 16 | ENV VIRTUAL_ENV=/app/venv |
| 17 | ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| 18 | |
| 19 | # Set the working directory and copy requirements |
| 20 | WORKDIR /app |
| 21 | COPY requirements.txt /app/requirements.txt |
| 22 | |
| 23 | # Install Python dependencies in virtual environment |
| 24 | RUN python -m venv venv |
| 25 | RUN venv/bin/pip install --upgrade pip |
| 26 | RUN venv/bin/pip install -r requirements.txt |
| 27 | |
| 28 | # Download spaCy models to /local/models |
| 29 | RUN mkdir -p /local/models |
| 30 | |
| 31 | # Download the default model (de_core_news_lg) |
| 32 | RUN venv/bin/python -m spacy download de_core_news_lg --no-cache-dir |
| 33 | |
| 34 | # Move model to /local/models for persistence |
| 35 | RUN MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_lg')") && \ |
| 36 | mv "$MODEL_PATH" /local/models/de_core_news_lg |
| 37 | |
| 38 | # Optionally download additional models |
| 39 | # Uncomment to include medium model: |
| 40 | # RUN venv/bin/python -m spacy download de_core_news_md --no-cache-dir && \ |
| 41 | # MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_md')") && \ |
| 42 | # mv "$MODEL_PATH" /local/models/de_core_news_md |
| 43 | |
| 44 | # Uncomment to include small model: |
| 45 | # RUN venv/bin/python -m spacy download de_core_news_sm --no-cache-dir && \ |
| 46 | # MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_sm')") && \ |
| 47 | # mv "$MODEL_PATH" /local/models/de_core_news_sm |
| 48 | |
| 49 | # Production stage |
| 50 | FROM python:3.12-slim-bookworm AS production |
| 51 | |
| 52 | # Install minimal runtime dependencies |
| 53 | RUN apt-get update && apt-get install -y \ |
| 54 | wget \ |
| 55 | && rm -rf /var/lib/apt/lists/* \ |
| 56 | && apt-get clean |
| 57 | |
| 58 | # Copy virtual environment from builder |
| 59 | COPY --from=builder /app/venv /app/venv |
| 60 | |
| 61 | # Copy pre-downloaded models |
| 62 | COPY --from=builder /local/models /local/models |
| 63 | |
| 64 | # Copy application code |
| 65 | COPY lib /app/lib |
| 66 | COPY systems /app/systems |
| 67 | COPY my_utils /app/my_utils |
| 68 | COPY docker-entrypoint.sh /docker-entrypoint.sh |
| Marc Kupietz | c75ae7c | 2025-11-29 10:41:26 +0100 | [diff] [blame^] | 69 | COPY download_with_progress.py /app/download_with_progress.py |
| Marc Kupietz | 8604485 | 2025-11-29 10:19:03 +0100 | [diff] [blame] | 70 | |
| 71 | # Set environment variables |
| 72 | ENV VIRTUAL_ENV=/app/venv |
| 73 | ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| 74 | ENV PYTHONPATH="PYTHONPATH:." |
| 75 | |
| 76 | # spaCy processing configuration |
| 77 | ENV SPACY_USE_DEPENDENCIES="True" |
| 78 | ENV SPACY_USE_GERMALEMMA="True" |
| 79 | ENV SPACY_PARSE_TIMEOUT="30" |
| 80 | ENV SPACY_MAX_SENTENCE_LENGTH="500" |
| 81 | ENV SPACY_N_PROCESS="10" |
| 82 | ENV SPACY_BATCH_SIZE="2000" |
| 83 | ENV SPACY_CHUNK_SIZE="20000" |
| 84 | |
| 85 | WORKDIR /app |
| 86 | RUN mkdir -p "/app/logs" "/app/tmp" |
| 87 | |
| 88 | # Set temp directories to use app directory instead of system /tmp |
| 89 | ENV TMPDIR="/app/tmp" |
| 90 | ENV TEMP="/app/tmp" |
| 91 | ENV TMP="/app/tmp" |
| 92 | |
| Marc Kupietz | c75ae7c | 2025-11-29 10:41:26 +0100 | [diff] [blame^] | 93 | # Add non-root user |
| 94 | RUN groupadd -r appuser && useradd -r -g appuser appuser |
| 95 | |
| 96 | # Make entrypoint executable and set permissions |
| 97 | RUN chmod +x /docker-entrypoint.sh && \ |
| 98 | chmod +x /app/download_with_progress.py |
| 99 | |
| 100 | # Change ownership of app directories to appuser |
| 101 | RUN chown -R appuser:appuser /app /local /docker-entrypoint.sh |
| 102 | |
| 103 | # Switch to non-root user |
| 104 | USER appuser |
| Marc Kupietz | 8604485 | 2025-11-29 10:19:03 +0100 | [diff] [blame] | 105 | |
| 106 | # Define the entry point |
| 107 | ENTRYPOINT ["/docker-entrypoint.sh"] |