Successfully evaluated several taggers
diff --git a/my_utils/file_utils.py b/my_utils/file_utils.py
index 317cef0..baa6eb6 100644
--- a/my_utils/file_utils.py
+++ b/my_utils/file_utils.py
@@ -12,6 +12,18 @@
yield line
+def get_file_text_chunk(line_generator, chunk_size, token_class):
+ file_has_next = True
+ chunk, n_sents = read_conll(line_generator, chunk_size, token_class)
+ if n_sents == 0: file_has_next = False
+ sents, gld, meta = [], [], []
+ for anno in chunk:
+ if len(anno.metadata) > 0: meta.append("\n".join(anno.metadata))
+ sents.append(anno.get_sentence())
+ gld.append(anno.get_pos_tags())
+ return sents, gld, file_has_next
+
+
def get_file_chunk(line_generator, chunk_size, token_class):
file_has_next = True
chunk, n_sents = read_conll(line_generator, chunk_size, token_class)