upgrading repo to latest version
diff --git a/systems/evaluate.py b/systems/evaluate.py
index 11ffd51..839cd37 100644
--- a/systems/evaluate.py
+++ b/systems/evaluate.py
@@ -44,8 +44,8 @@
match, err, symbol = 0, 0, []
y_gld, y_pred, mistakes = [], [], []
for i, gld_tok in enumerate(gld.tokens):
- # sys_lemma = tree_tagger_fixes.get(sys.tokens[i].lemma, sys.tokens[i].lemma) # Omit TreeTagger "errors" because of article lemma disagreement
sys_lemma = sys.tokens[i].lemma
+ # sys_lemma = tree_tagger_fixes.get(sys.tokens[i].lemma, sys.tokens[i].lemma) # Omit TreeTagger "errors" because of article lemma disagreement
y_gld.append(gld_tok.pos_tag)
y_pred.append(sys_lemma)
if gld_tok.lemma == sys_lemma:
@@ -87,41 +87,57 @@
********** TIGER CORPUS ALL ************
- python systems/evaluate.py -t Turku --corpus_name Tiger\
+ python systems/evaluate.py -t Turku --corpus_name Tiger --gld_token_type CoNLL09_Token \
--sys_file /home/daza/datasets/TIGER_conll/tiger_turku_parsed.conllu \
--gld_file /home/daza/datasets/TIGER_conll/tiger_release_aug07.corrected.16012013.conll09
- python systems/evaluate.py -t SpaCy --corpus_name Tiger\
+ python systems/evaluate.py -t SpaCy --corpus_name Tiger --gld_token_type CoNLL09_Token \
--sys_file /home/daza/datasets/TIGER_conll/tiger_spacy_parsed.conllu \
--gld_file /home/daza/datasets/TIGER_conll/tiger_release_aug07.corrected.16012013.conll09
- python systems/evaluate.py -t RNNTagger --corpus_name Tiger\
+ python systems/evaluate.py -t RNNTagger --corpus_name Tiger --gld_token_type CoNLL09_Token \
--sys_file /home/daza/datasets/TIGER_conll/tiger_all.parsed.RNNTagger.conll \
--gld_file /home/daza/datasets/TIGER_conll/tiger_release_aug07.corrected.16012013.conll09
- python systems/evaluate.py -t TreeTagger --corpus_name Tiger\
+ python systems/evaluate.py -t TreeTagger --corpus_name Tiger --gld_token_type CoNLL09_Token \
--sys_file /home/daza/datasets/TIGER_conll/tiger_all.parsed.TreeTagger.conll \
--gld_file /home/daza/datasets/TIGER_conll/tiger_release_aug07.corrected.16012013.conll09
+
+ ********** TIGER CORPUS TEST ************
+
+ python systems/evaluate.py -t SpaCy --corpus_name TigerTestOld \
+ --sys_file /home/daza/datasets/TIGER_conll/tiger_spacy_parsed.test.conllu \
+ --gld_file /home/daza/datasets/TIGER_conll/data_splits/test/Tiger.OldOrth.test.conll
+
+ python systems/evaluate.py -t SpaCy --corpus_name TigerTestNew \
+ --sys_file /home/daza/datasets/TIGER_conll/Tiger.NewOrth.test.spacy_parsed.conllu\
+ --gld_file /home/daza/datasets/TIGER_conll/data_splits/test/Tiger.NewOrth.test.conll
+
+
+ python systems/evaluate.py -t Turku --corpus_name TigerTestNew \
+ --sys_file /home/daza/datasets/TIGER_conll/sys_outputs/Tiger.NewOrth.test.turku_parsed.conllu \
+ --gld_file /home/daza/datasets/TIGER_conll/data_splits/test/Tiger.NewOrth.test.conll
+
********** UNIVERSAL DEPENDENCIES TEST-SET ************
- python systems/evaluate.py -t Turku --gld_token_type CoNLLUP_Token --corpus_name DE_GSD\
+ python systems/evaluate.py -t Turku --corpus_name DE_GSD \
--sys_file /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.conllu.parsed.0.conllu \
--gld_file /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.conllu
- python systems/evaluate.py -t SpaCyGL --gld_token_type CoNLLUP_Token --corpus_name DE_GSD\
+ python systems/evaluate.py -t SpaCyGL --corpus_name DE_GSD \
--sys_file /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.parsed.germalemma.conllu \
--gld_file /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.conllu
- python systems/evaluate.py -t SpaCy --gld_token_type CoNLLUP_Token --corpus_name DE_GSD\
+ python systems/evaluate.py -t SpaCy --corpus_name DE_GSD \
--sys_file /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.parsed.conllu \
--gld_file /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.conllu
- python systems/evaluate.py -t RNNTagger --gld_token_type CoNLLUP_Token --corpus_name DE_GSD\
+ python systems/evaluate.py -t RNNTagger --corpus_name DE_GSD \
--sys_file /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.RNNtagger.parsed.conll \
--gld_file /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.conllu
- python systems/evaluate.py -t TreeTagger --gld_token_type CoNLLUP_Token --corpus_name DE_GSD\
+ python systems/evaluate.py -t TreeTagger --corpus_name DE_GSD \
--sys_file /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.treetagger.parsed.conll \
--gld_file /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.conllu
@@ -133,9 +149,9 @@
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--sys_file", help="System output in CoNLL-U Format", required=True)
parser.add_argument("-g", "--gld_file", help="Gold Labels to evaluate in CoNLL-U Format", required=True)
- parser.add_argument("-t", "--type_sys", help="Which system produced the outputs", default="system")
parser.add_argument("-c", "--corpus_name", help="Corpus Name for Gold Labels", required=True)
- parser.add_argument("-gtt", "--gld_token_type", help="CoNLL Format of the Gold Data", default="CoNLL09_Token")
+ parser.add_argument("-t", "--type_sys", help="Which system produced the outputs", default="system")
+ parser.add_argument("-gtt", "--gld_token_type", help="CoNLL Format of the Gold Data", default="CoNLLUP_Token")
parser.add_argument("-cs", "--comment_str", help="CoNLL Format of comentaries inside the file", default="#")
args = parser.parse_args()