blob: 39c7bd491f3e21bcc1ccf263c02e3b3f74ae39b9 [file] [log] [blame]
Marc Kupietz86044852025-11-29 10:19:03 +01001# Dockerfile with pre-installed models
2# Build: docker build -f Dockerfile.with-models -t korap/conllu-spacy:with-models .
3
4# Multi-stage Docker build for size optimization
5FROM python:3.12-slim-bookworm AS builder
6
7# Install build dependencies
8RUN apt-get update && apt-get install -y \
9 gcc \
10 g++ \
11 && rm -rf /var/lib/apt/lists/*
12
13# Set environment variables
14ENV PIP_CACHE_DIR="/tmp/.cache/pip" \
15 PYTHONPATH="PYTHONPATH:."
16ENV VIRTUAL_ENV=/app/venv
17ENV PATH="$VIRTUAL_ENV/bin:$PATH"
18
19# Set the working directory and copy requirements
20WORKDIR /app
21COPY requirements.txt /app/requirements.txt
22
23# Install Python dependencies in virtual environment
24RUN python -m venv venv
25RUN venv/bin/pip install --upgrade pip
26RUN venv/bin/pip install -r requirements.txt
27
28# Download spaCy models to /local/models
29RUN mkdir -p /local/models
30
31# Download the default model (de_core_news_lg)
32RUN venv/bin/python -m spacy download de_core_news_lg --no-cache-dir
33
34# Move model to /local/models for persistence
35RUN MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_lg')") && \
36 mv "$MODEL_PATH" /local/models/de_core_news_lg
37
38# Optionally download additional models
39# Uncomment to include medium model:
40# RUN venv/bin/python -m spacy download de_core_news_md --no-cache-dir && \
41# MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_md')") && \
42# mv "$MODEL_PATH" /local/models/de_core_news_md
43
44# Uncomment to include small model:
45# RUN venv/bin/python -m spacy download de_core_news_sm --no-cache-dir && \
46# MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_sm')") && \
47# mv "$MODEL_PATH" /local/models/de_core_news_sm
48
49# Production stage
50FROM python:3.12-slim-bookworm AS production
51
52# Install minimal runtime dependencies
53RUN apt-get update && apt-get install -y \
54 wget \
55 && rm -rf /var/lib/apt/lists/* \
56 && apt-get clean
57
58# Copy virtual environment from builder
59COPY --from=builder /app/venv /app/venv
60
61# Copy pre-downloaded models
62COPY --from=builder /local/models /local/models
63
64# Copy application code
65COPY lib /app/lib
66COPY systems /app/systems
67COPY my_utils /app/my_utils
68COPY docker-entrypoint.sh /docker-entrypoint.sh
69
70# Set environment variables
71ENV VIRTUAL_ENV=/app/venv
72ENV PATH="$VIRTUAL_ENV/bin:$PATH"
73ENV PYTHONPATH="PYTHONPATH:."
74
75# spaCy processing configuration
76ENV SPACY_USE_DEPENDENCIES="True"
77ENV SPACY_USE_GERMALEMMA="True"
78ENV SPACY_PARSE_TIMEOUT="30"
79ENV SPACY_MAX_SENTENCE_LENGTH="500"
80ENV SPACY_N_PROCESS="10"
81ENV SPACY_BATCH_SIZE="2000"
82ENV SPACY_CHUNK_SIZE="20000"
83
84WORKDIR /app
85RUN mkdir -p "/app/logs" "/app/tmp"
86
87# Set temp directories to use app directory instead of system /tmp
88ENV TMPDIR="/app/tmp"
89ENV TEMP="/app/tmp"
90ENV TMP="/app/tmp"
91
92# Make entrypoint executable
93RUN chmod +x /docker-entrypoint.sh
94
95# Define the entry point
96ENTRYPOINT ["/docker-entrypoint.sh"]