Let token IDs start with 1 in CoNLL-U output Resolves #3

commit: d0afe173025b8836e415d843dde13488c6b70c02 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Thu Feb 01 14:48:43 2024 +0100
committer: Marc Kupietz <kupietz@ids-mannheim.de> Thu Feb 01 14:48:43 2024 +0100
tree: aed6f9df83b2c58646816fb469eb3704ef49b36a
parent: fc34de6fea2619ed1768860b04c08f67adf97aab [diff] [blame]
diff --git a/systems/parse_spacy_pipe.py b/systems/parse_spacy_pipe.py
index 36f6de3..0b66e4b 100644
--- a/systems/parse_spacy_pipe.py
+++ b/systems/parse_spacy_pipe.py

@@ -24,9 +24,9 @@
 	conll_lines = anno_obj.metadata # Then we want: [ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC]
 	for ix, token in enumerate(spacy_doc):
 		if use_germalemma == "True":
-			content = (str(ix), token.text, find_germalemma(token.text, token.tag_, token.lemma_), token.pos_, token.tag_, "_", "_", "_", "_", "_")
+			content = (str(ix+1), token.text, find_germalemma(token.text, token.tag_, token.lemma_), token.pos_, token.tag_, "_", "_", "_", "_", "_")
 		else:
-			content = (str(ix), token.text, token.lemma_, token.pos_, token.tag_, "_", "_", "_", "_", "_") # Pure SpaCy!
+			content = (str(ix+1), token.text, token.lemma_, token.pos_, token.tag_, "_", "_", "_", "_", "_") # Pure SpaCy!
 		conll_lines.append("\t".join(content))
 	return "\n".join(conll_lines)
commit	d0afe173025b8836e415d843dde13488c6b70c02	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Feb 01 14:48:43 2024 +0100
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Feb 01 14:48:43 2024 +0100
tree	aed6f9df83b2c58646816fb469eb3704ef49b36a
parent	fc34de6fea2619ed1768860b04c08f67adf97aab [diff] [blame]