blob: ceeae9cc023fbd60314e3a84d53f10ef97d31db7 [file] [log] [blame]
Marc Kupietz9a6a5012025-10-26 21:14:58 +01001# Multi-stage Docker build for size optimization
2FROM python:3.12-slim-bookworm AS builder
3
4# Install build dependencies
5RUN apt-get update && apt-get install -y \
6 gcc \
7 g++ \
8 && rm -rf /var/lib/apt/lists/*
Marc Kupietz90db8222024-02-01 18:07:04 +01009
10# Set environment variables
Marc Kupietz9a6a5012025-10-26 21:14:58 +010011ENV PIP_CACHE_DIR="/tmp/.cache/pip" \
Marc Kupietz90db8222024-02-01 18:07:04 +010012 PYTHONPATH="PYTHONPATH:."
13ENV VIRTUAL_ENV=/app/venv
14ENV PATH="$VIRTUAL_ENV/bin:$PATH"
Marc Kupietz9a6a5012025-10-26 21:14:58 +010015
16# Set the working directory and copy requirements
17WORKDIR /app
18COPY requirements.txt /app/requirements.txt
19
20# Install Python dependencies in virtual environment
21RUN python -m venv venv
22RUN venv/bin/pip install --upgrade pip
23RUN venv/bin/pip install -r requirements.txt
24RUN venv/bin/python -m spacy download de_core_news_lg
25
26# Production stage
27FROM python:3.12-slim-bookworm AS production
28
29# Install minimal runtime dependencies
30RUN apt-get update && apt-get install -y \
31 && rm -rf /var/lib/apt/lists/* \
32 && apt-get clean
33
34# Copy virtual environment from builder
35COPY --from=builder /app/venv /app/venv
36
37# Copy application code
38COPY lib /app/lib
39COPY systems /app/systems
40COPY my_utils /app/my_utils
41
42# Set environment variables
43ENV VIRTUAL_ENV=/app/venv
44ENV PATH="$VIRTUAL_ENV/bin:$PATH"
45ENV PYTHONPATH="PYTHONPATH:."
Marc Kupietz90db8222024-02-01 18:07:04 +010046
Marc Kupietz0ce98a62025-10-26 15:59:27 +010047# spaCy processing configuration
48ENV SPACY_USE_DEPENDENCIES="True"
49ENV SPACY_USE_GERMALEMMA="True"
Marc Kupietz095185b2025-10-27 14:41:43 +010050ENV SPACY_PARSE_TIMEOUT="30"
51ENV SPACY_MAX_SENTENCE_LENGTH="500"
Marc Kupietz45e74df2025-10-29 18:56:08 +010052ENV SPACY_N_PROCESS="10"
53ENV SPACY_BATCH_SIZE="2000"
54ENV SPACY_CHUNK_SIZE="20000"
Marc Kupietz0ce98a62025-10-26 15:59:27 +010055
Marc Kupietz90db8222024-02-01 18:07:04 +010056WORKDIR /app
Marc Kupietz45e74df2025-10-29 18:56:08 +010057RUN mkdir -p "/app/logs" "/app/tmp"
58
59# Set temp directories to use app directory instead of system /tmp
60ENV TMPDIR="/app/tmp"
61ENV TEMP="/app/tmp"
62ENV TMP="/app/tmp"
Marc Kupietz90db8222024-02-01 18:07:04 +010063
64# Define the entry point
Marc Kupietz9a6a5012025-10-26 21:14:58 +010065CMD ["python", "/app/systems/parse_spacy_pipe.py"]