blob: baa6eb60d43fa0aa32297e1a3a4c162a9c4f2571 [file] [log] [blame]
daza972aabc2020-09-01 16:41:30 +02001import requests, logging
daza0498a6a2020-10-06 12:03:12 +02002from lib.CoNLL_Annotation import read_conll, read_conll_generator
daza972aabc2020-09-01 16:41:30 +02003
4logger = logging.getLogger(__name__)
5
6
7def file_generator(file_path):
8 with open(file_path, "r") as data_file:
9 logger.info("Reading instances from lines in file at: %s", file_path)
10 for line in data_file:
11 if not line: continue
12 yield line
13
14
dazaff42f632020-10-08 14:46:32 +020015def get_file_text_chunk(line_generator, chunk_size, token_class):
16 file_has_next = True
17 chunk, n_sents = read_conll(line_generator, chunk_size, token_class)
18 if n_sents == 0: file_has_next = False
19 sents, gld, meta = [], [], []
20 for anno in chunk:
21 if len(anno.metadata) > 0: meta.append("\n".join(anno.metadata))
22 sents.append(anno.get_sentence())
23 gld.append(anno.get_pos_tags())
24 return sents, gld, file_has_next
25
26
daza972aabc2020-09-01 16:41:30 +020027def get_file_chunk(line_generator, chunk_size, token_class):
28 file_has_next = True
29 chunk, n_sents = read_conll(line_generator, chunk_size, token_class)
30 if n_sents == 0: file_has_next = False
31 raw_text = ""
32 for anno in chunk:
33 if len(anno.metadata) > 0:
34 raw_text += "\n".join(anno.metadata) + "\n"
35 else:
36 raw_text += "\n"
37 for tok in anno.tokens:
38 raw_text += tok.get_conllU_line() + "\n"
39 raw_text += "\n"
40 return raw_text, file_has_next, n_sents
41
42
43def turku_parse_file(raw_text, filename, chunk_ix):
44 f = filename.split(".")[0]
45 out_file_str = f"{f}.parsed.{chunk_ix}.conllu"
46 # For each file make a request to obtain the parse back
47 logger.info(f"Sending Request {chunk_ix} to Parser Server...")
48 response = requests.post("http://localhost:7689/", data=raw_text.encode('utf-8'))
49 response_to_file(response.text, out_file_str)
50
51
52
53def response_to_file(response_str, fname):
54 fout = open(fname, "w")
55 fout.write(response_str)
56 fout.close()