Dockerize and rename to conllu-cmc
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 63792b4..e59adec 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -2,22 +2,109 @@
stages:
- test
- - package
+ - build
+ - deploy
cache:
paths:
- node_modules/
-before_script:
- - npm install
-
+# Run npm tests
test:
stage: test
+ before_script:
+ - npm install
script:
- npm test
+# Test Docker image build
+test-docker-image:
+ image: docker:latest
+ stage: test
+ variables:
+ FF_NETWORK_PER_BUILD: "true"
+ services:
+ - name: docker:dind
+ alias: docker
+ command: ["--dns=127.0.0.11", "--dns=8.8.8.8"]
+ before_script:
+ - apk update
+ - apk add --no-cache bash
+ script:
+ - docker build -t korap/conllu-cmc:test .
+ - echo "Testing help display..."
+ - docker run --rm korap/conllu-cmc:test --help
+ - echo "Testing basic annotation..."
+ - echo -e "1\tš\t_\t_\t_\t_\t_\t_\t_\t_" | docker run --rm -i korap/conllu-cmc:test
+
+# Build Docker image
+build-docker-image:
+ image: docker:latest
+ stage: build
+ variables:
+ FF_NETWORK_PER_BUILD: "true"
+ services:
+ - name: docker:dind
+ alias: docker
+ command: ["--dns=127.0.0.11", "--dns=8.8.8.8"]
+ rules:
+ - if: $CI_COMMIT_TAG =~ /.+/
+ variables:
+ VID: $CI_COMMIT_TAG
+ - when: manual
+ variables:
+ VID_ALT: $CI_COMMIT_BRANCH-$CI_COMMIT_SHORT_SHA
+ VID: snapshot
+ cache:
+ key: conllu-cmc
+ paths:
+ - cache/
+ before_script:
+ - mkdir -p cache
+ - apk update
+ - apk add --no-cache bash xz
+ script:
+ - docker build -t korap/conllu-cmc:latest .
+ - docker tag korap/conllu-cmc:latest korap/conllu-cmc:$VID
+ - |
+ if [ -n "$VID_ALT" ]; then
+ docker tag korap/conllu-cmc:latest korap/conllu-cmc:$VID_ALT
+ fi
+ - docker save korap/conllu-cmc | xz -T0 > conllu-cmc.xz
+ artifacts:
+ paths:
+ - conllu-cmc.xz
+
+# Deploy to Docker Hub
+deploy-to-docker-hub:
+ image: docker:latest
+ stage: deploy
+ variables:
+ FF_NETWORK_PER_BUILD: "true"
+ services:
+ - name: docker:dind
+ alias: docker
+ command: ["--dns=127.0.0.11", "--dns=8.8.8.8"]
+ rules:
+ - if: $CI_COMMIT_TAG =~ /.+/
+ - when: manual
+ dependencies:
+ - build-docker-image
+ before_script:
+ - echo "Checking Docker Hub credentials..."
+ - test -n "$DOCKER_HUB_USER" || (echo "ERROR: DOCKER_HUB_USER is not set" && exit 1)
+ - test -n "$DOCKER_HUB_PASSWORD" || (echo "ERROR: DOCKER_HUB_PASSWORD is not set" && exit 1)
+ - echo "Logging in as user: $DOCKER_HUB_USER"
+ - echo "$DOCKER_HUB_PASSWORD" | docker login -u "$DOCKER_HUB_USER" --password-stdin
+ script:
+ - xz -d -c conllu-cmc.xz | docker load
+ - docker push --all-tags korap/conllu-cmc
+
+# Build standalone binaries
package:
- stage: package
+ stage: build
+ before_script:
+ - npm install
script:
- npm run pkg-linux
- npm run pkg-macos
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..250006e
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,45 @@
+# Multi-stage Docker build for size optimization
+FROM node:alpine AS builder
+
+# Set the working directory
+WORKDIR /app
+
+# Copy package files first (for better layer caching)
+COPY package*.json ./
+
+# Install dependencies (production only)
+RUN npm ci --only=production
+
+# Production stage
+FROM node:alpine AS production
+
+# metadata
+LABEL maintainer="Marc Kupietz <kupietz@ids-mannheim.de>"
+
+# Install minimal runtime dependencies
+RUN apk add --no-cache --update \
+ shadow \
+ && rm -rf /var/cache/apk/*
+
+# Add non-root user
+RUN groupadd -r appuser && useradd -r -g appuser appuser
+
+# Set the working directory
+WORKDIR /app
+
+# Copy node_modules from builder
+COPY --from=builder --chown=appuser:appuser /app/node_modules /app/node_modules
+
+# Copy application source
+COPY --chown=appuser:appuser package.json /app/
+COPY --chown=appuser:appuser src /app/src
+
+# Copy entry point
+COPY --chown=appuser:appuser docker-entrypoint.sh /docker-entrypoint.sh
+RUN chmod +x /docker-entrypoint.sh
+
+# Switch to non-root user
+USER appuser
+
+# Define the entry point
+ENTRYPOINT ["/docker-entrypoint.sh"]
diff --git a/Readme.md b/Readme.md
index 5c7a9f4..19b8e93 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,23 +1,49 @@
-# conllu2cmc
+# conllu-cmc
+
+[](https://hub.docker.com/r/korap/conllu-cmc)
Reads CoNLL-U format from stdin and annotates emojis, emoticons, hashtags, URLs, email addresses, action words, @names, and Wikipedia emoji templates with their corresponding STTS-IBK POS tag (Beißwenger/Bartsch/Evert/Würzner 2016). Writes CoNLL-U format to stdout.
-## Usage
+## Docker Usage
```shell
-korapxml2conllu kyc.zip | conllu2cmc
+# Annotate CoNLL-U input
+korapxml2conllu kyc.zip | docker run --rm -i korap/conllu-cmc
+
+# With sparse output (only annotated lines)
+korapxml2conllu kyc.zip | docker run --rm -i korap/conllu-cmc -s
+
+# Generate KorAP-XML zip with CMC annotations
+korapxml2conllu kyc.zip | docker run --rm -i korap/conllu-cmc -s | conllu2korapxml > kyc.cmc.zip
+
+# Show help
+docker run --rm korap/conllu-cmc --help
+```
+
+## Local Usage
+
+```shell
+korapxml2conllu kyc.zip | conllu-cmc
```
### Generate KorAP-XML zip with CMC annotations
```shell
-korapxml2conllu kyc.zip | conllu2cmc -s | conllu2korapxml > kyc.cmc.zip
+korapxml2conllu kyc.zip | conllu-cmc -s | conllu2korapxml > kyc.cmc.zip
```
## Installation
+### Docker (recommended)
+
```shell
-npm install 'git+https://gitlab.ids-mannheim.de/KorAP/korap-conllu-cmc.git'
+docker pull korap/conllu-cmc
+```
+
+### npm
+
+```shell
+npm install 'git+https://gitlab.ids-mannheim.de/KorAP/conllu-cmc-docker.git'
```
### Build from source
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
new file mode 100644
index 0000000..990e4ad
--- /dev/null
+++ b/docker-entrypoint.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -e
+
+# Run conllu-cmc with all passed arguments
+exec node /app/src/index.js "$@"
diff --git a/package.json b/package.json
index 10cc2b3..fdfb198 100644
--- a/package.json
+++ b/package.json
@@ -1,13 +1,15 @@
{
- "name": "conllu2cmc",
+ "name": "conllu-cmc",
"version": "1.0.0",
"description": "Reads CoNLL-U format from stdin and annotates emojis, emoticons, hashtags, URLs, email addresses, @addresses, and action words. Writes CoNLL-U format to stdout.",
"main": "src/index.js",
- "bin": "src/index.js",
+ "bin": {
+ "conllu-cmc": "src/index.js"
+ },
"scripts": {
- "pkg-linux": "pkg src/index.js --public -t node18-linux-x64 -o bin/linux/conllu2cmc && chmod +x bin/linux/conllu2cmc",
- "pkg-macos": "pkg src/index.js --public -t node18-macos-x64 -o bin/macos/conllu2cmc",
- "pkg-win": "pkg src/index.js --public -t node18-win-x64 -o bin/win/conllu2cmc",
+ "pkg-linux": "pkg src/index.js --public -t node18-linux-x64 -o bin/linux/conllu-cmc && chmod +x bin/linux/conllu-cmc",
+ "pkg-macos": "pkg src/index.js --public -t node18-macos-x64 -o bin/macos/conllu-cmc",
+ "pkg-win": "pkg src/index.js --public -t node18-win-x64 -o bin/win/conllu-cmc",
"pkg-all": "npm run pkg-linux && npm run pkg-macos && npm run pkg-win",
"pkg": "pkg",
"test": "jest",
diff --git a/src/index.js b/src/index.js
index 370a2ba..cd7f11f 100755
--- a/src/index.js
+++ b/src/index.js
@@ -15,12 +15,12 @@
const sections = [
{
- header: 'conllu2cmc',
+ header: 'conllu-cmc',
content: 'Reads CoNLL-U format from stdin and annotates emojis, emoticons, hashtags, URLs, email addresses, @addresses, and action words. Writes CoNLL-U format to stdout.'
},
{
header: 'Synopsis',
- content: '$ conllu2cmc [-s] < input.conllu > output.conllu'
+ content: '$ conllu-cmc [-s] < input.conllu > output.conllu'
},
{
header: 'Options',