blob: f6f64c5eeff03353ae3d845acb49e263e28b7295 [file] [log] [blame]
Marc Kupietz86044852025-11-29 10:19:03 +01001# Dockerfile with pre-installed models
2# Build: docker build -f Dockerfile.with-models -t korap/conllu-spacy:with-models .
3
4# Multi-stage Docker build for size optimization
5FROM python:3.12-slim-bookworm AS builder
6
7# Install build dependencies
8RUN apt-get update && apt-get install -y \
9 gcc \
10 g++ \
11 && rm -rf /var/lib/apt/lists/*
12
13# Set environment variables
14ENV PIP_CACHE_DIR="/tmp/.cache/pip" \
15 PYTHONPATH="PYTHONPATH:."
16ENV VIRTUAL_ENV=/app/venv
17ENV PATH="$VIRTUAL_ENV/bin:$PATH"
18
19# Set the working directory and copy requirements
20WORKDIR /app
21COPY requirements.txt /app/requirements.txt
22
23# Install Python dependencies in virtual environment
24RUN python -m venv venv
25RUN venv/bin/pip install --upgrade pip
26RUN venv/bin/pip install -r requirements.txt
27
28# Download spaCy models to /local/models
29RUN mkdir -p /local/models
30
31# Download the default model (de_core_news_lg)
32RUN venv/bin/python -m spacy download de_core_news_lg --no-cache-dir
33
34# Move model to /local/models for persistence
35RUN MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_lg')") && \
36 mv "$MODEL_PATH" /local/models/de_core_news_lg
37
38# Optionally download additional models
39# Uncomment to include medium model:
40# RUN venv/bin/python -m spacy download de_core_news_md --no-cache-dir && \
41# MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_md')") && \
42# mv "$MODEL_PATH" /local/models/de_core_news_md
43
44# Uncomment to include small model:
45# RUN venv/bin/python -m spacy download de_core_news_sm --no-cache-dir && \
46# MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_sm')") && \
47# mv "$MODEL_PATH" /local/models/de_core_news_sm
48
49# Production stage
50FROM python:3.12-slim-bookworm AS production
51
52# Install minimal runtime dependencies
53RUN apt-get update && apt-get install -y \
54 wget \
55 && rm -rf /var/lib/apt/lists/* \
56 && apt-get clean
57
58# Copy virtual environment from builder
59COPY --from=builder /app/venv /app/venv
60
61# Copy pre-downloaded models
62COPY --from=builder /local/models /local/models
63
64# Copy application code
65COPY lib /app/lib
66COPY systems /app/systems
67COPY my_utils /app/my_utils
68COPY docker-entrypoint.sh /docker-entrypoint.sh
Marc Kupietzc75ae7c2025-11-29 10:41:26 +010069COPY download_with_progress.py /app/download_with_progress.py
Marc Kupietz86044852025-11-29 10:19:03 +010070
71# Set environment variables
72ENV VIRTUAL_ENV=/app/venv
73ENV PATH="$VIRTUAL_ENV/bin:$PATH"
74ENV PYTHONPATH="PYTHONPATH:."
75
76# spaCy processing configuration
77ENV SPACY_USE_DEPENDENCIES="True"
78ENV SPACY_USE_GERMALEMMA="True"
79ENV SPACY_PARSE_TIMEOUT="30"
80ENV SPACY_MAX_SENTENCE_LENGTH="500"
81ENV SPACY_N_PROCESS="10"
82ENV SPACY_BATCH_SIZE="2000"
83ENV SPACY_CHUNK_SIZE="20000"
84
85WORKDIR /app
86RUN mkdir -p "/app/logs" "/app/tmp"
87
88# Set temp directories to use app directory instead of system /tmp
89ENV TMPDIR="/app/tmp"
90ENV TEMP="/app/tmp"
91ENV TMP="/app/tmp"
92
Marc Kupietzc75ae7c2025-11-29 10:41:26 +010093# Add non-root user
94RUN groupadd -r appuser && useradd -r -g appuser appuser
95
96# Make entrypoint executable and set permissions
97RUN chmod +x /docker-entrypoint.sh && \
98 chmod +x /app/download_with_progress.py
99
100# Change ownership of app directories to appuser
101RUN chown -R appuser:appuser /app /local /docker-entrypoint.sh
102
103# Switch to non-root user
104USER appuser
Marc Kupietz86044852025-11-29 10:19:03 +0100105
106# Define the entry point
107ENTRYPOINT ["/docker-entrypoint.sh"]