Let token IDs start with 1 in CoNLL-U output
Resolves #3
diff --git a/systems/parse_spacy_pipe.py b/systems/parse_spacy_pipe.py
index 36f6de3..0b66e4b 100644
--- a/systems/parse_spacy_pipe.py
+++ b/systems/parse_spacy_pipe.py
@@ -24,9 +24,9 @@
conll_lines = anno_obj.metadata # Then we want: [ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC]
for ix, token in enumerate(spacy_doc):
if use_germalemma == "True":
- content = (str(ix), token.text, find_germalemma(token.text, token.tag_, token.lemma_), token.pos_, token.tag_, "_", "_", "_", "_", "_")
+ content = (str(ix+1), token.text, find_germalemma(token.text, token.tag_, token.lemma_), token.pos_, token.tag_, "_", "_", "_", "_", "_")
else:
- content = (str(ix), token.text, token.lemma_, token.pos_, token.tag_, "_", "_", "_", "_", "_") # Pure SpaCy!
+ content = (str(ix+1), token.text, token.lemma_, token.pos_, token.tag_, "_", "_", "_", "_", "_") # Pure SpaCy!
conll_lines.append("\t".join(content))
return "\n".join(conll_lines)