Successfully evaluated several taggers
diff --git a/my_utils/conll_to_tok.py b/my_utils/conll_to_tok.py
new file mode 100644
index 0000000..9d77543
--- /dev/null
+++ b/my_utils/conll_to_tok.py
@@ -0,0 +1,26 @@
+import argparse
+from lib.CoNLL_Annotation import read_conll_generator, CoNLL09_Token
+
+# TODO: Parallelize this for HUGE Files: All sentences can be processed independently
+
+if __name__ == "__main__":
+	"""
+		EXAMPLE:
+		For TreeTagger:
+			python my_utils/conll_to_tok.py -s /vol/netapp/daza/datasets/TIGER_conll/tiger_release_aug07.corrected.16012013.conll09 -ss "</S>"
+			
+		For RNNTagger
+			python my_utils/conll_to_tok.py -s /vol/netapp/daza/datasets/TIGER_conll/tiger_release_aug07.corrected.16012013.conll09
+	"""
+	
+	parser = argparse.ArgumentParser()
+	parser.add_argument("-s", "--src_file", help="CoNLLU File to Convert into the .tok input for RNNTagger/TreeTagger", required=True)
+	parser.add_argument("-ss", "--sent_sep", help="Special separator to distinguish sentence boundaries", default="")
+	args = parser.parse_args()
+	
+	output_file = open(f"{args.src_file}.tok","w")
+	
+	for conll_obj in read_conll_generator(args.src_file, token_class=CoNLL09_Token):
+		for tok in conll_obj.tokens:
+			output_file.write(tok.word+"\n")
+		output_file.write(args.sent_sep+"\n")
\ No newline at end of file
diff --git a/my_utils/file_utils.py b/my_utils/file_utils.py
index 317cef0..baa6eb6 100644
--- a/my_utils/file_utils.py
+++ b/my_utils/file_utils.py
@@ -12,6 +12,18 @@
             yield line
 
 
+def get_file_text_chunk(line_generator, chunk_size, token_class):
+    file_has_next = True
+    chunk, n_sents = read_conll(line_generator, chunk_size, token_class)
+    if n_sents == 0: file_has_next = False
+    sents, gld, meta = [], [], []
+    for anno in chunk:
+        if len(anno.metadata) > 0: meta.append("\n".join(anno.metadata))
+        sents.append(anno.get_sentence())
+        gld.append(anno.get_pos_tags())
+    return sents, gld, file_has_next
+
+
 def get_file_chunk(line_generator, chunk_size, token_class):
     file_has_next = True
     chunk, n_sents = read_conll(line_generator, chunk_size, token_class)