upgrading repo to latest version
diff --git a/my_utils/conll_to_tok.py b/my_utils/conll_to_tok.py
index d5656e8..2dbe2ed 100644
--- a/my_utils/conll_to_tok.py
+++ b/my_utils/conll_to_tok.py
@@ -12,11 +12,21 @@
-ss "</S>" \
--token_type CoNLL09_Token
+ *** GERMAN UNIVERSAL DEPS TEST ***
+
python my_utils/conll_to_tok.py \
-s /home/daza/datasets/ud-treebanks-v2.2/UD_German-GSD/de_gsd-ud-test.conllu \
-ss "</S>" \
--token_type CoNLLUP_Token
+ *** TIGER TEST NEW ORTH ***
+
+ python my_utils/conll_to_tok.py \
+ -s /home/daza/datasets/TIGER_conll/data_splits/test/Tiger.NewOrth.test.conll \
+ -ss "</S>" \
+ --token_type CoNLLUP_Token
+
+
For RNNTagger
python my_utils/conll_to_tok.py \
-s /home/daza/datasets/TIGER_conll/tiger_release_aug07.corrected.16012013.conll09 \
@@ -34,7 +44,10 @@
parser.add_argument("-c", "--com_str", help="Skip line if it starts with this string (comment market)", default="# ")
args = parser.parse_args()
- output_file = open(f"{args.src_file}.tok","w")
+ if args.sent_sep == "":
+ output_file = open(f"{args.src_file}.tok","w")
+ else:
+ output_file = open(f"{args.src_file}.sep.tok","w")
for conll_obj in read_conll_generator(args.src_file, token_class=get_token_type(args.token_type), comment_str=args.com_str):
for tok in conll_obj.tokens: