blob: 317cef0439b169de7f460775532a1a8280b92bad [file] [log] [blame]
daza972aabc2020-09-01 16:41:30 +02001import requests, logging
daza0498a6a2020-10-06 12:03:12 +02002from lib.CoNLL_Annotation import read_conll, read_conll_generator
daza972aabc2020-09-01 16:41:30 +02003
4logger = logging.getLogger(__name__)
5
6
7def file_generator(file_path):
8 with open(file_path, "r") as data_file:
9 logger.info("Reading instances from lines in file at: %s", file_path)
10 for line in data_file:
11 if not line: continue
12 yield line
13
14
15def get_file_chunk(line_generator, chunk_size, token_class):
16 file_has_next = True
17 chunk, n_sents = read_conll(line_generator, chunk_size, token_class)
18 if n_sents == 0: file_has_next = False
19 raw_text = ""
20 for anno in chunk:
21 if len(anno.metadata) > 0:
22 raw_text += "\n".join(anno.metadata) + "\n"
23 else:
24 raw_text += "\n"
25 for tok in anno.tokens:
26 raw_text += tok.get_conllU_line() + "\n"
27 raw_text += "\n"
28 return raw_text, file_has_next, n_sents
29
30
31def turku_parse_file(raw_text, filename, chunk_ix):
32 f = filename.split(".")[0]
33 out_file_str = f"{f}.parsed.{chunk_ix}.conllu"
34 # For each file make a request to obtain the parse back
35 logger.info(f"Sending Request {chunk_ix} to Parser Server...")
36 response = requests.post("http://localhost:7689/", data=raw_text.encode('utf-8'))
37 response_to_file(response.text, out_file_str)
38
39
40
41def response_to_file(response_str, fname):
42 fout = open(fname, "w")
43 fout.write(response_str)
44 fout.close()