Added Stanford CoreNLP Tokenizer
Change-Id: Ib9debb34cb9a66fad041cb584d641e155b46a347
diff --git a/benchmarks/benchmark.pl b/benchmarks/benchmark.pl
index 987a0a5..9ad5bd9 100644
--- a/benchmarks/benchmark.pl
+++ b/benchmarks/benchmark.pl
@@ -78,6 +78,14 @@
},
SpaCy => sub {
system 'python3 ./spacy/spacy_tok.py ./corpus/'.$FILE.' > /dev/null'
+ },
+ Stanford => sub {
+ system 'CLASSPATH=/euralex/stanford-corenlp-4.4.0/* java edu.stanford.nlp.pipeline.StanfordCoreNLP ' .
+ '-props german -annotators tokenize,ssplit,mwt -tokenize.language=german -file ./corpus/' . $FILE
+ },
+ Stanford_t4 => sub {
+ system 'CLASSPATH=/euralex/stanford-corenlp-4.4.0/* java edu.stanford.nlp.pipeline.StanfordCoreNLP ' .
+ '-props german -annotators tokenize,ssplit,mwt -tokenize.language=german -threads=4 -file ./corpus/' . $FILE
}
};
@@ -100,6 +108,8 @@
#delete $models->{'nnsplit'};
#delete $models->{'elephant'};
#delete $models->{'SpaCy'};
+#delete $models->{'Stanford'};
+#delete $models->{'Stanford_t4'};
my $t0 = Benchmark->new;