Update benchmarks including cutter
Change-Id: Iea11045afbf548cc81f14b31f2848e935d1f9956
diff --git a/Readme.md b/Readme.md
index 2fba873..00dd987 100644
--- a/Readme.md
+++ b/Readme.md
@@ -33,7 +33,10 @@
## `benchmark.pl`
Performance measurements of the tools. See the tools section for some
-remarks to take into account.
+remarks to take into account. Accepts two numerical parameters:
+
+- The duplication count of the example file
+- The number of iterations
## `empirist.pl`
diff --git a/benchmarks/benchmark.pl b/benchmarks/benchmark.pl
index 9dcc170..6b74456 100644
--- a/benchmarks/benchmark.pl
+++ b/benchmarks/benchmark.pl
@@ -135,36 +135,36 @@
},
};
-#delete $models->{'SoMaJo'};
-#delete $models->{'SoMaJo_p2'};
-#delete $models->{'SoMaJo_p4'};
-#delete $models->{'SoMaJo_p8'};
-#delete $models->{'Datok_matok'};
-#delete $models->{'Datok_datok'};
-#delete $models->{'OpenNLP_Simple'};
-#delete $models->{'OpenNLP_Tokenizer_de-ud-gsd'};
-#delete $models->{'OpenNLP_Sentence_de-ud-gsd'};
-#delete $models->{'TreeTagger'};
-#delete $models->{'deep-eos_bi-lstm-de'};
-#delete $models->{'deep-eos_cnn-de'};
-#delete $models->{'deep-eos_lstm-de'};
-#delete $models->{'JTok'};
-#delete $models->{'KorAP-Tokenizer'};
-#delete $models->{'Syntok_tokenizer'};
-#delete $models->{'Syntok_segmenter'};
-#delete $models->{'Waste'};
-#delete $models->{'nnsplit'};
-#delete $models->{'elephant'};
-#delete $models->{'Stanford'};
-#delete $models->{'Stanford_t2'};
-#delete $models->{'Stanford_t4'};
-#delete $models->{'Stanford_t8'};
-delete $models->{'Stanford_tokonly'};
-delete $models->{'cutter'};
-delete $models->{'spacy_tok'};
-delete $models->{'spacy_sentencizer'};
-delete $models->{'spacy_dep'};
-delete $models->{'spacy_stat'};
+delete $models->{'SoMaJo'};
+delete $models->{'SoMaJo_p2'};
+delete $models->{'SoMaJo_p4'};
+delete $models->{'SoMaJo_p8'};
+delete $models->{'Datok_matok'};
+delete $models->{'Datok_datok'};
+delete $models->{'OpenNLP_Simple'};
+delete $models->{'OpenNLP_Tokenizer_de-ud-gsd'};
+delete $models->{'OpenNLP_Sentence_de-ud-gsd'};
+delete $models->{'TreeTagger'};
+delete $models->{'deep-eos_bi-lstm-de'};
+delete $models->{'deep-eos_cnn-de'};
+delete $models->{'deep-eos_lstm-de'};
+delete $models->{'JTok'};
+delete $models->{'KorAP-Tokenizer'};
+delete $models->{'Syntok_tokenizer'};
+delete $models->{'Syntok_segmenter'};
+delete $models->{'Waste'};
+delete $models->{'nnsplit'};
+delete $models->{'elephant'};
+delete $models->{'Stanford'};
+delete $models->{'Stanford_t2'};
+delete $models->{'Stanford_t4'};
+delete $models->{'Stanford_t8'};
+#delete $models->{'Stanford_tokonly'};
+#delete $models->{'cutter'};
+#delete $models->{'spacy_tok'};
+#delete $models->{'spacy_sentencizer'};
+#delete $models->{'spacy_dep'};
+#delete $models->{'spacy_stat'};
diff --git a/benchmarks/empirist.pl b/benchmarks/empirist.pl
index 9c063c7..86e21dd 100644
--- a/benchmarks/empirist.pl
+++ b/benchmarks/empirist.pl
@@ -59,6 +59,10 @@
my $raw = $gold_path . $_[1] . '/raw/' . $_[0];
system 'python3 ./spacy/spacy_tok.py ' . $raw . ' > ' . $empirist_path . $_[1] . '/spacy/' . $_[0];
},
+ cutter => sub {
+ my $raw = $gold_path . $_[1] . '/raw/' . $_[0];
+ system 'python3 ./cutter/cutter.py nosent ' . $raw . ' > ' . $empirist_path . $_[1] . '/cutter/' . $_[0];
+ },
stanford => sub {
my $raw = $gold_path . $_[1] . '/raw/' . $_[0];
system 'CLASSPATH=/euralex/stanford-corenlp-4.4.0/* java edu.stanford.nlp.pipeline.StanfordCoreNLP ' .
@@ -80,6 +84,7 @@
# delete $tools{stanford};
# delete $tools{spacy};
# delete $tools{elephant};
+# delete $tools{cutter};
# Create project folders
foreach (keys %tools) {
diff --git a/benchmarks/ud_tokens.pl b/benchmarks/ud_tokens.pl
index 7173d04..6e30ef1 100644
--- a/benchmarks/ud_tokens.pl
+++ b/benchmarks/ud_tokens.pl
@@ -52,6 +52,9 @@
spacy => sub {
system 'python3 ./spacy/spacy_tok.py ' . $raw . ' > ' . $ud_path . '/spacy/' . $base;
},
+ cutter => sub {
+ system 'python3 ./cutter/cutter.py nosent ' . $raw . ' > ' . $ud_path . '/cutter/' . $base;
+ },
somajo => sub {
system 'somajo-tokenizer ' . $raw . ' 2> /dev/null > ' . $ud_path . '/somajo/' . $base;
},
@@ -75,6 +78,7 @@
# delete $tools{stanford};
# delete $tools{elephant};
# delete $tools{spacy};
+# delete $tools{cutter};
# Create project folders
foreach (keys %tools) {