Initial import

Change-Id: I6315233ee1bfbdf7cc985cb336d0df7a10274189
diff --git a/Dockerfile.with-models b/Dockerfile.with-models
new file mode 100644
index 0000000..39c7bd4
--- /dev/null
+++ b/Dockerfile.with-models
@@ -0,0 +1,96 @@
+# Dockerfile with pre-installed models
+# Build: docker build -f Dockerfile.with-models -t korap/conllu-spacy:with-models .
+
+# Multi-stage Docker build for size optimization
+FROM python:3.12-slim-bookworm AS builder
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set environment variables
+ENV PIP_CACHE_DIR="/tmp/.cache/pip" \
+    PYTHONPATH="PYTHONPATH:."
+ENV VIRTUAL_ENV=/app/venv
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# Set the working directory and copy requirements
+WORKDIR /app
+COPY requirements.txt /app/requirements.txt
+
+# Install Python dependencies in virtual environment
+RUN python -m venv venv
+RUN venv/bin/pip install --upgrade pip
+RUN venv/bin/pip install -r requirements.txt
+
+# Download spaCy models to /local/models
+RUN mkdir -p /local/models
+
+# Download the default model (de_core_news_lg)
+RUN venv/bin/python -m spacy download de_core_news_lg --no-cache-dir
+
+# Move model to /local/models for persistence
+RUN MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_lg')") && \
+    mv "$MODEL_PATH" /local/models/de_core_news_lg
+
+# Optionally download additional models
+# Uncomment to include medium model:
+# RUN venv/bin/python -m spacy download de_core_news_md --no-cache-dir && \
+#     MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_md')") && \
+#     mv "$MODEL_PATH" /local/models/de_core_news_md
+
+# Uncomment to include small model:
+# RUN venv/bin/python -m spacy download de_core_news_sm --no-cache-dir && \
+#     MODEL_PATH=$(venv/bin/python -c "import site; print(site.getsitepackages()[0] + '/de_core_news_sm')") && \
+#     mv "$MODEL_PATH" /local/models/de_core_news_sm
+
+# Production stage
+FROM python:3.12-slim-bookworm AS production
+
+# Install minimal runtime dependencies
+RUN apt-get update && apt-get install -y \
+    wget \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+
+# Copy virtual environment from builder
+COPY --from=builder /app/venv /app/venv
+
+# Copy pre-downloaded models
+COPY --from=builder /local/models /local/models
+
+# Copy application code
+COPY lib /app/lib
+COPY systems /app/systems
+COPY my_utils /app/my_utils
+COPY docker-entrypoint.sh /docker-entrypoint.sh
+
+# Set environment variables
+ENV VIRTUAL_ENV=/app/venv
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+ENV PYTHONPATH="PYTHONPATH:."
+
+# spaCy processing configuration
+ENV SPACY_USE_DEPENDENCIES="True"
+ENV SPACY_USE_GERMALEMMA="True"
+ENV SPACY_PARSE_TIMEOUT="30"
+ENV SPACY_MAX_SENTENCE_LENGTH="500"
+ENV SPACY_N_PROCESS="10"
+ENV SPACY_BATCH_SIZE="2000"
+ENV SPACY_CHUNK_SIZE="20000"
+
+WORKDIR /app
+RUN mkdir -p "/app/logs" "/app/tmp"
+
+# Set temp directories to use app directory instead of system /tmp
+ENV TMPDIR="/app/tmp"
+ENV TEMP="/app/tmp"
+ENV TMP="/app/tmp"
+
+# Make entrypoint executable
+RUN chmod +x /docker-entrypoint.sh
+
+# Define the entry point
+ENTRYPOINT ["/docker-entrypoint.sh"]