CI: Speed up and actually test

Change-Id: Icee4b0731abe246c1bc552559b699f557a03aa14
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a9dbf2d..6f15dec 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -14,27 +14,31 @@
     - venv/
 
 before_script:
-  - perl -v
   - python -m venv venv
   - source venv/bin/activate
   - export PYTHONPATH=PYTHONPATH:.
-  - venv/bin/pip install --upgrade pip
-  - pip install -r requirements.txt
-  - python -m spacy download de_core_news_lg
-  - apt update -y
-  - apt-get install -y cpanminus git
-  - cpanm https://github.com/KorAP/KorAP-XML-CoNLL-U.git
+  - venv/bin/pip install --upgrade pip --quiet
+  - pip install -r requirements.txt --quiet
+  - python -m spacy download de_core_news_lg --quiet 2>&1 | tail -5
 
 test-job:
   stage: test
   artifacts:
     paths:
-      - rei.spacy.zip
-      - rei.spacy.conllu
+      - rei.test.spacy.conllu.xz
+      - logs/
+    when: always
   script:
-    - mkdir logs
-    - korapxml2conllu rei.zip  | python ./systems/parse_spacy_pipe.py  | tee rei.spacy.conllu | conllu2korapxml > rei.spacy.zip
-    - (! grep -E '^0' ./rei.spacy.conllu)
+    - mkdir -p logs
+    - echo "Testing with 100,000 lines from rei.test.conllu"
+    - cat rei.test.conllu | python ./systems/parse_spacy_pipe.py --corpus_name rei_test 2>&1 | tee rei.test.spacy.conllu | tail -20
+    - echo "Checking output validity..."
+    - test -s rei.test.spacy.conllu || (echo "Output file is empty!" && exit 1)
+    - (! grep -E '^0\s' rei.test.spacy.conllu) || (echo "Found invalid line starting with 0!" && exit 1)
+    - echo "Compressing output..."
+    - xz rei.test.spacy.conllu
+    - ls -lh rei.test.spacy.conllu.xz
+    - echo "Test passed successfully"
 
 # Build Docker image with morphological features and dependency parsing
 build-docker:
@@ -44,28 +48,30 @@
     - docker:24.0.5-dind
   variables:
     DOCKER_TLS_CERTDIR: "/certs"
+    IMAGE_TAG: "${CI_COMMIT_TAG:-$CI_COMMIT_SHORT_SHA}"
   before_script:
     - docker info
-    # Determine image tag based on git ref
-    - |
-      if [ -n "$CI_COMMIT_TAG" ]; then
-        export IMAGE_TAG="$CI_COMMIT_TAG"
-        export IMAGE_NAME="conllu2spacy-$CI_COMMIT_TAG"
-      else
-        export IMAGE_TAG="$CI_COMMIT_SHORT_SHA"
-        export IMAGE_NAME="conllu2spacy-$CI_COMMIT_SHORT_SHA"
-      fi
-    - echo "Building image with tag $IMAGE_TAG"
-    - echo "Image name will be $IMAGE_NAME"
+    - export IMAGE_NAME="conllu2spacy-${IMAGE_TAG}"
+    - echo "Building image with tag ${IMAGE_TAG}"
+    - echo "Image name will be ${IMAGE_NAME}"
   script:
-    - docker build -t korap/conllu2spacy:$IMAGE_TAG .
-    - docker save korap/conllu2spacy:$IMAGE_TAG | xz > $IMAGE_NAME.tar.xz
-    - ls -lh $IMAGE_NAME.tar.xz
+    - docker build -t korap/conllu2spacy:${IMAGE_TAG} . 2>&1 | tail -20
+    - echo "Testing Docker image with rei.test.conllu..."
+    - docker run --rm -i -v $(pwd):/data korap/conllu2spacy:${IMAGE_TAG} < /data/rei.test.conllu 2>&1 > /data/rei.test.docker.conllu | tail -20
+    - test -s rei.test.docker.conllu || (echo "Docker output is empty!" && exit 1)
+    - (! grep -E '^0\s' rei.test.docker.conllu) || (echo "Docker test failed invalid output!" && exit 1)
+    - echo "Docker test passed, compressing output..."
+    - xz rei.test.docker.conllu
+    - ls -lh rei.test.docker.conllu.xz
+    - echo "Saving Docker image..."
+    - docker save korap/conllu2spacy:${IMAGE_TAG} | xz > ${IMAGE_NAME}.tar.xz
+    - ls -lh ${IMAGE_NAME}.tar.xz
   artifacts:
     paths:
       - "*.tar.xz"
+      - "rei.test.docker.conllu.xz"
     expire_in: 1 week
-    name: "docker-image-$IMAGE_TAG"
+    name: "docker-image-${IMAGE_TAG}"
   rules:
     - if: $CI_COMMIT_TAG
       when: always