Add Dockerfile and GitLab CI Pipeline

Change-Id: Iafef7b6388667cf2d45c79a4a0e5d0eea224afba
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..14af3e2
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,42 @@
+# Test data files
+*.i5.xml
+*.zip
+!share/*.jar
+
+# Build artifacts
+blib/
+local/
+Makefile
+MYMETA*
+pm_to_blib
+pod2htmd.tmp
+
+# Git and version control
+.git/
+.github/
+.gitignore
+
+# IDE files
+.idea/
+*.swp
+*~
+
+# Temporary and output files
+*.xml
+*.tar.gz
+*.tar.xz
+err
+out
+out.txt
+new
+old
+a/
+b/
+BNC/
+
+# CI/CD
+.gitlab-ci-local/
+.gitlab-ci-local-variables.yml*
+
+# Documentation (included via COPY)
+# Readme.pod will be included
diff --git a/.gitignore b/.gitignore
index 4b3dd5a..93c04be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,13 @@
 blib
 MYMETA*
 Makefile
-Dockerfile
+!Dockerfile
 pm_to_blib
 \#*
 *~
 .*
 !.gitignore
 !.github
+!.dockerignore
+!.gitlab-ci.yml
 /sandbox
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000..b4276d5
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,54 @@
+image: docker:latest
+
+variables:
+  FF_NETWORK_PER_BUILD: true
+
+services:
+  - name: docker:dind
+    command: [--dns=127.0.0.11]
+
+stages:
+  - build
+  - deploy
+
+build-docker:
+  rules:
+    - if: $CI_COMMIT_TAG =~ /.+/
+      variables:
+        VID: $CI_COMMIT_TAG
+    - when: manual
+      variables:
+        VID: $CI_COMMIT_BRANCH-$CI_COMMIT_SHORT_SHA
+  stage: build
+  before_script:
+    - apk update
+    - apk add --no-cache git
+  script:
+    - docker build -f Dockerfile -t korap/tei2korapxml:$VID-large --target tei2korapxml .
+    - docker run --rm -v /var/run/docker.sock:/var/run/docker.sock mintoolkit/mint --crt-api-version 1.46 build --http-probe=false --exec='PERL5LIB=/tei2korapxml/lib /tei2korapxml/script/tei2korapxml -v || test $? -eq 2 && java -jar /tei2korapxml/share/KorAP-Tokenizer-2.3.0-standalone.jar -V' --include-path=/tei2korapxml/lib --include-path=/usr/local/share/perl5 --include-path=/usr/share/perl5 --include-path=/usr/lib/perl5 --tag korap/tei2korapxml:$VID --tag korap/tei2korapxml:latest korap/tei2korapxml:$VID-large || true
+    - ARTIFACT=tei2korapxml-${VID}.tar.xz
+    - docker save korap/tei2korapxml:$VID | xz -T0 -M16G -9 > "$ARTIFACT"
+  artifacts:
+    paths:
+      - tei2korapxml-*.tar.xz
+
+push-dockerhub:
+  stage: deploy
+  needs:
+    - job: build-docker
+      artifacts: true
+  dependencies:
+    - build-docker
+  rules:
+    - if: $CI_COMMIT_TAG =~ /^v.+/
+      when: manual
+    - when: never
+  script:
+    - apk update
+    - apk add --no-cache xz
+    - echo "$DOCKERHUB_TOKEN" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
+    - ARTIFACT=tei2korapxml-${CI_COMMIT_TAG}.tar.xz
+    - xz -d -c "$ARTIFACT" | docker load
+    - docker tag korap/tei2korapxml:$CI_COMMIT_TAG korap/tei2korapxml:latest
+    - docker push korap/tei2korapxml:$CI_COMMIT_TAG
+    - docker push korap/tei2korapxml:latest
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..b817149
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,86 @@
+# Use alpine linux as base image
+FROM alpine:latest AS tei2korapxml
+
+RUN apk update && \
+    apk add --no-cache git \
+            perl \
+            perl-io-socket-ssl \
+            perl-dev \
+            g++ \
+            make \
+            wget \
+            perl-doc \
+            libxml2-dev \
+            perl-xml-libxml \
+            perl-module-pluggable \
+            openjdk21-jre \
+            curl && \
+    set -o pipefail
+
+# Install cpm (faster CPAN module installer)
+RUN curl -fsSL https://raw.githubusercontent.com/kupietz/cpm/main/cpm > /bin/cpm && chmod a+x /bin/cpm
+
+# Copy repository respecting .dockerignore
+COPY . /tei2korapxml
+
+WORKDIR /tei2korapxml
+
+# Install build-time dependencies required by Makefile.PL
+RUN cpm install --test -g File::ShareDir::Install
+
+# Install all Perl module dependencies from Makefile.PL
+RUN cpm install --test -g \
+    File::ShareDir \
+    File::Share \
+    XML::CompactTree::XS \
+    XML::LibXML::Reader \
+    IO::Compress::Zip \
+    IO::Uncompress::Unzip \
+    Log::Any \
+    Time::Progress \
+    XML::Loy
+
+# Run Makefile.PL and install (this will install share files properly)
+RUN perl Makefile.PL && make install
+
+# Remove all build dependencies to reduce image size
+RUN rm /bin/cpm && \
+    apk del git \
+            perl-dev \
+            perl-doc \
+            g++ \
+            wget \
+            libxml2-dev \
+            curl && \
+    rm -rf /root/.cpanm \
+           /usr/local/share/man
+
+# Create non-root user for security
+RUN addgroup -S korap && \
+    adduser -S tei2korapxml -G korap && \
+    chown -R tei2korapxml:korap /tei2korapxml
+
+USER tei2korapxml
+
+# Set up entrypoint
+COPY docker-entrypoint.sh /usr/local/bin/
+ENTRYPOINT ["docker-entrypoint.sh"]
+
+# Default command shows help
+CMD ["--help"]
+
+LABEL description="Docker Image for tei2korapxml - TEI P5 to KorAP-XML converter"
+LABEL maintainer="korap@ids-mannheim.de"
+LABEL repository="https://github.com/KorAP/KorAP-XML-TEI"
+
+# Build command:
+# docker build -f Dockerfile -t korap/tei2korapxml:x.xx-large .
+
+# Slimming with mintoolkit/mint (https://github.com/mintoolkit/mint):
+# mint build --http-probe=false \
+#            --exec="tei2korapxml --version" \
+#            --include-workdir=true \
+#            --include-path="/usr/local/share/perl5/site_perl/KorAP/" \
+#            --tag korap/tei2korapxml:x.xx \
+#            --tag korap/tei2korapxml:latest \
+#            korap/tei2korapxml:x.xx-large
diff --git a/Readme.pod b/Readme.pod
index 074db86..4a058c3 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -74,6 +74,44 @@
 
 =head1 INSTALLATION
 
+=head2 Docker (Recommended)
+
+The easiest way to use C<tei2korapxml> is via Docker, which bundles all dependencies
+(Perl 5.42, Java 21, and required libraries) in a single container image.
+
+B<Pull from Docker Hub:>
+
+  $ docker pull korap/tei2korapxml:latest
+
+B<Usage examples:>
+
+  # Convert a file
+  $ docker run --rm -v $(pwd):/data korap/tei2korapxml:latest \
+      -s -tk /data/input.i5.xml > output.zip
+
+  # Convert from stdin
+  $ cat input.i5.xml | docker run --rm -i korap/tei2korapxml:latest \
+     -s -tk - > output.zip
+
+  # Using docker-compose
+  $ docker-compose run --rm tei2korapxml -s -tk input.i5.xml > output.zip
+
+B<Build locally:>
+
+  $ docker build -t korap/tei2korapxml:latest .
+
+For a slimmed-down image (using L<mintoolkit|https://github.com/mintoolkit/mint>):
+
+  $ docker build -t korap/tei2korapxml:large .
+  $ mint --crt-api-version 1.46 build --http-probe=false \
+    --exec='PERL5LIB=/tei2korapxml/script/tei2korapxml -v || test $? -eq 2 && java -jar /tei2korapxml/share/KorAP-Tokenizer-2.3.0-standalone.jar -V' \
+    --include-path=/tei2korapxml/lib --include-path=/usr/local/share/perl5 \
+    --include-path=/usr/share/perl5 --include-path=/usr/lib/perl5 \
+    --tag korap/tei2korapxml:latest \
+    korap/tei2korapxml:large
+
+=head2 Traditional Installation
+
 C<tei2korapxml> requires C<libxml2-dev> bindings and L<File::ShareDir::Install> to be installed.
 When these requirements are met, the preferred way to install the script is
 to use L<cpanm|App::cpanminus>.
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
new file mode 100755
index 0000000..81d9073
--- /dev/null
+++ b/docker-entrypoint.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -e
+
+# Execute tei2korapxml with all passed arguments
+# Use the installed version from /usr/local/bin
+exec /usr/local/bin/tei2korapxml "$@"
diff --git a/lib/KorAP/XML/TEI/Zipper.pm b/lib/KorAP/XML/TEI/Zipper.pm
index f4f43af..8ff5f0d 100644
--- a/lib/KorAP/XML/TEI/Zipper.pm
+++ b/lib/KorAP/XML/TEI/Zipper.pm
@@ -2,7 +2,7 @@
 use strict;
 use warnings;
 use Log::Any qw($log);
-use IO::Compress::Zip qw($ZipError :constants);
+use IO::Compress::Zip qw($ZipError :constants :zip_method);
 use Scalar::Util 'blessed';
 
 # man IO::Compress::Zip