| Marc Kupietz | 9a6a501 | 2025-10-26 21:14:58 +0100 | [diff] [blame] | 1 | # Multi-stage Docker build for size optimization |
| 2 | FROM python:3.12-slim-bookworm AS builder |
| 3 | |
| 4 | # Install build dependencies |
| 5 | RUN apt-get update && apt-get install -y \ |
| 6 | gcc \ |
| 7 | g++ \ |
| 8 | && rm -rf /var/lib/apt/lists/* |
| Marc Kupietz | 90db822 | 2024-02-01 18:07:04 +0100 | [diff] [blame] | 9 | |
| 10 | # Set environment variables |
| Marc Kupietz | 9a6a501 | 2025-10-26 21:14:58 +0100 | [diff] [blame] | 11 | ENV PIP_CACHE_DIR="/tmp/.cache/pip" \ |
| Marc Kupietz | 90db822 | 2024-02-01 18:07:04 +0100 | [diff] [blame] | 12 | PYTHONPATH="PYTHONPATH:." |
| 13 | ENV VIRTUAL_ENV=/app/venv |
| 14 | ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| Marc Kupietz | 9a6a501 | 2025-10-26 21:14:58 +0100 | [diff] [blame] | 15 | |
| 16 | # Set the working directory and copy requirements |
| 17 | WORKDIR /app |
| 18 | COPY requirements.txt /app/requirements.txt |
| 19 | |
| 20 | # Install Python dependencies in virtual environment |
| 21 | RUN python -m venv venv |
| 22 | RUN venv/bin/pip install --upgrade pip |
| 23 | RUN venv/bin/pip install -r requirements.txt |
| 24 | RUN venv/bin/python -m spacy download de_core_news_lg |
| 25 | |
| 26 | # Production stage |
| 27 | FROM python:3.12-slim-bookworm AS production |
| 28 | |
| 29 | # Install minimal runtime dependencies |
| 30 | RUN apt-get update && apt-get install -y \ |
| 31 | && rm -rf /var/lib/apt/lists/* \ |
| 32 | && apt-get clean |
| 33 | |
| 34 | # Copy virtual environment from builder |
| 35 | COPY --from=builder /app/venv /app/venv |
| 36 | |
| 37 | # Copy application code |
| 38 | COPY lib /app/lib |
| 39 | COPY systems /app/systems |
| 40 | COPY my_utils /app/my_utils |
| 41 | |
| 42 | # Set environment variables |
| 43 | ENV VIRTUAL_ENV=/app/venv |
| 44 | ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| 45 | ENV PYTHONPATH="PYTHONPATH:." |
| Marc Kupietz | 90db822 | 2024-02-01 18:07:04 +0100 | [diff] [blame] | 46 | |
| Marc Kupietz | 0ce98a6 | 2025-10-26 15:59:27 +0100 | [diff] [blame] | 47 | # spaCy processing configuration |
| 48 | ENV SPACY_USE_DEPENDENCIES="True" |
| 49 | ENV SPACY_USE_GERMALEMMA="True" |
| Marc Kupietz | 095185b | 2025-10-27 14:41:43 +0100 | [diff] [blame] | 50 | ENV SPACY_PARSE_TIMEOUT="30" |
| 51 | ENV SPACY_MAX_SENTENCE_LENGTH="500" |
| Marc Kupietz | 45e74df | 2025-10-29 18:56:08 +0100 | [diff] [blame] | 52 | ENV SPACY_N_PROCESS="10" |
| 53 | ENV SPACY_BATCH_SIZE="2000" |
| 54 | ENV SPACY_CHUNK_SIZE="20000" |
| Marc Kupietz | 0ce98a6 | 2025-10-26 15:59:27 +0100 | [diff] [blame] | 55 | |
| Marc Kupietz | 90db822 | 2024-02-01 18:07:04 +0100 | [diff] [blame] | 56 | WORKDIR /app |
| Marc Kupietz | 45e74df | 2025-10-29 18:56:08 +0100 | [diff] [blame] | 57 | RUN mkdir -p "/app/logs" "/app/tmp" |
| 58 | |
| 59 | # Set temp directories to use app directory instead of system /tmp |
| 60 | ENV TMPDIR="/app/tmp" |
| 61 | ENV TEMP="/app/tmp" |
| 62 | ENV TMP="/app/tmp" |
| Marc Kupietz | 90db822 | 2024-02-01 18:07:04 +0100 | [diff] [blame] | 63 | |
| 64 | # Define the entry point |
| Marc Kupietz | 9a6a501 | 2025-10-26 21:14:58 +0100 | [diff] [blame] | 65 | CMD ["python", "/app/systems/parse_spacy_pipe.py"] |