Add cutter, more spacys and Stanford Tokenizer only
Change-Id: I6ae4c014298d9c07e62850d39408b77cb145a9cd
diff --git a/Dockerfile b/Dockerfile
index fd276af..2d92c07 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -147,7 +147,7 @@
COPY nnsplit_bench /euralex/nnsplit_bench/
-RUN apt-get install -y cargo
+RUN apt-get update && apt-get install -y cargo
RUN cd ./nnsplit_bench && \
cargo build --release
@@ -192,6 +192,10 @@
RUN echo "SpaCy" && python3 ./spacy/spacy_tok.py example.txt
+# Sentence splitter
+RUN python3 -m spacy download de_core_news_sm && \
+ python3 -m spacy download de_dep_news_trf
+
###########################
# Install Stanford parser #
@@ -215,6 +219,17 @@
-file example.txt
+##################
+# Install Cutter #
+##################
+
+RUN pip3 install cutter-ng
+
+COPY cutter /euralex/cutter/
+
+RUN echo "Cutter\n" && python3 ./cutter/cutter.py nosent example.txt
+
+
#################
# Install Datok #
#################