Added Parsing and Evaluation of Lemmas using Tiger Corpus
diff --git a/DeReKo/my_utils.py b/DeReKo/my_utils.py
new file mode 100644
index 0000000..eaa724a
--- /dev/null
+++ b/DeReKo/my_utils.py
@@ -0,0 +1,44 @@
+import requests, logging
+from CoNLL_Annotation import read_conll, read_conll_generator
+
+logger = logging.getLogger(__name__)
+
+
+def file_generator(file_path):
+ with open(file_path, "r") as data_file:
+ logger.info("Reading instances from lines in file at: %s", file_path)
+ for line in data_file:
+ if not line: continue
+ yield line
+
+
+def get_file_chunk(line_generator, chunk_size, token_class):
+ file_has_next = True
+ chunk, n_sents = read_conll(line_generator, chunk_size, token_class)
+ if n_sents == 0: file_has_next = False
+ raw_text = ""
+ for anno in chunk:
+ if len(anno.metadata) > 0:
+ raw_text += "\n".join(anno.metadata) + "\n"
+ else:
+ raw_text += "\n"
+ for tok in anno.tokens:
+ raw_text += tok.get_conllU_line() + "\n"
+ raw_text += "\n"
+ return raw_text, file_has_next, n_sents
+
+
+def turku_parse_file(raw_text, filename, chunk_ix):
+ f = filename.split(".")[0]
+ out_file_str = f"{f}.parsed.{chunk_ix}.conllu"
+ # For each file make a request to obtain the parse back
+ logger.info(f"Sending Request {chunk_ix} to Parser Server...")
+ response = requests.post("http://localhost:7689/", data=raw_text.encode('utf-8'))
+ response_to_file(response.text, out_file_str)
+
+
+
+def response_to_file(response_str, fname):
+ fout = open(fname, "w")
+ fout.write(response_str)
+ fout.close()