Added Stanford CoreNLP Tokenizer Change-Id: Ib9debb34cb9a66fad041cb584d641e155b46a347

commit: c261642121b09f09e62bf913951371597b244638 [log] [tgz]
author: Akron <nils@diewald-online.de> Mon Mar 07 09:19:38 2022 +0100
committer: Akron <nils@diewald-online.de> Mon Mar 07 09:19:38 2022 +0100
tree: 8944f8f024dcbb632f2b5b113ccacba522138adb
parent: eb590da1b40b64a86602c0b50a46727521437cd3 [diff] [blame]
diff --git a/Dockerfile b/Dockerfile
index c734929..fd276af 100644
--- a/Dockerfile
+++ b/Dockerfile

@@ -193,6 +193,28 @@
 RUN echo "SpaCy" && python3 ./spacy/spacy_tok.py example.txt
 
 
+###########################
+# Install Stanford parser #
+###########################
+
+# Following https://stanfordnlp.github.io/CoreNLP/index.html
+
+RUN wget https://nlp.stanford.edu/software/stanford-corenlp-latest.zip
+
+RUN wget https://search.maven.org/remotecontent?filepath=edu/stanford/nlp/stanford-corenlp/4.4.0/stanford-corenlp-4.4.0-models-german.jar -O stanford-corenlp-4.4.0-models-german.jar
+
+RUN unzip stanford-corenlp-latest.zip && \
+    rm stanford-corenlp-latest.zip && \
+    mv stanford-corenlp-4.4.0-models-german.jar stanford-corenlp-4.4.0/
+
+# Run with threads!
+RUN echo "StanfordNLP" && \
+    CLASSPATH=/euralex/stanford-corenlp-4.4.0/* java edu.stanford.nlp.pipeline.StanfordCoreNLP \
+    -annotators tokenize \
+    -tokenize.language=german \
+    -file example.txt
+
+
 #################
 # Install Datok #
 #################
commit	c261642121b09f09e62bf913951371597b244638	[log] [tgz]
author	Akron <nils@diewald-online.de>	Mon Mar 07 09:19:38 2022 +0100
committer	Akron <nils@diewald-online.de>	Mon Mar 07 09:19:38 2022 +0100
tree	8944f8f024dcbb632f2b5b113ccacba522138adb
parent	eb590da1b40b64a86602c0b50a46727521437cd3 [diff] [blame]