blob: eaa724ac1b60b629a85b686ce406291fb4c4ec31 [file] [log] [blame]
import requests, logging
from CoNLL_Annotation import read_conll, read_conll_generator
logger = logging.getLogger(__name__)
def file_generator(file_path):
with open(file_path, "r") as data_file:
logger.info("Reading instances from lines in file at: %s", file_path)
for line in data_file:
if not line: continue
yield line
def get_file_chunk(line_generator, chunk_size, token_class):
file_has_next = True
chunk, n_sents = read_conll(line_generator, chunk_size, token_class)
if n_sents == 0: file_has_next = False
raw_text = ""
for anno in chunk:
if len(anno.metadata) > 0:
raw_text += "\n".join(anno.metadata) + "\n"
else:
raw_text += "\n"
for tok in anno.tokens:
raw_text += tok.get_conllU_line() + "\n"
raw_text += "\n"
return raw_text, file_has_next, n_sents
def turku_parse_file(raw_text, filename, chunk_ix):
f = filename.split(".")[0]
out_file_str = f"{f}.parsed.{chunk_ix}.conllu"
# For each file make a request to obtain the parse back
logger.info(f"Sending Request {chunk_ix} to Parser Server...")
response = requests.post("http://localhost:7689/", data=raw_text.encode('utf-8'))
response_to_file(response.text, out_file_str)
def response_to_file(response_str, fname):
fout = open(fname, "w")
fout.write(response_str)
fout.close()