blob: f2898a97c7f9ba19f7429016c6675b03e7aebfca [file] [log] [blame]
Akronb329d272020-11-13 12:45:26 +01001package de.ids_mannheim.korap.plkexport;
2
3import java.io.BufferedWriter;
4import java.io.File;
Akron1d36eb52020-11-13 17:52:26 +01005import java.io.Writer;
Akronb329d272020-11-13 12:45:26 +01006import java.io.FileWriter;
7import java.io.IOException;
8import java.io.StringWriter;
Akron55def632020-11-26 16:00:02 +01009import java.io.InputStream;
10import java.io.OutputStream;
11import java.io.FileInputStream;
Akron1d36eb52020-11-13 17:52:26 +010012
Akronb329d272020-11-13 12:45:26 +010013import java.util.Collection;
14import java.util.ArrayList;
Akronb329d272020-11-13 12:45:26 +010015import java.util.Iterator;
16import java.util.LinkedList;
Akron35881012020-11-24 20:05:06 +010017import java.util.Properties;
Akronb329d272020-11-13 12:45:26 +010018
19import com.fasterxml.jackson.core.JsonFactory;
20import com.fasterxml.jackson.core.JsonParser;
21import com.fasterxml.jackson.core.Version;
Akronacc9f7a2020-11-17 17:21:40 +010022import com.fasterxml.jackson.core.JsonParseException;
Akronb329d272020-11-13 12:45:26 +010023import com.fasterxml.jackson.databind.JsonNode;
24import com.fasterxml.jackson.databind.ObjectMapper;
25
Akron1d36eb52020-11-13 17:52:26 +010026import javax.ws.rs.core.Response;
27import javax.ws.rs.core.Response.ResponseBuilder;
Akron35881012020-11-24 20:05:06 +010028import javax.ws.rs.core.Response.Status;
Akron55def632020-11-26 16:00:02 +010029import javax.ws.rs.core.StreamingOutput;
Akronb329d272020-11-13 12:45:26 +010030
Akronaf145eb2020-11-24 16:55:47 +010031import org.glassfish.jersey.media.sse.EventOutput;
32import org.glassfish.jersey.media.sse.OutboundEvent;
Akrond0b1cfe2020-11-20 19:26:52 +010033
Akron876017d2020-11-17 09:19:24 +010034import static de.ids_mannheim.korap.plkexport.Util.*;
35
Akron1d36eb52020-11-13 17:52:26 +010036/**
37 * Base class for collecting matches and header information
38 * for exporters implementing the Exporter interface.
39 */
Akron1d36eb52020-11-13 17:52:26 +010040public class MatchAggregator {
Akronb329d272020-11-13 12:45:26 +010041
Akron35881012020-11-24 20:05:06 +010042 private final Properties prop = ExWSConf.properties(null);
43
Akron984fe8f2020-11-25 15:21:37 +010044 private static final ObjectMapper mapper = new ObjectMapper();
Akronb329d272020-11-13 12:45:26 +010045
Akron984fe8f2020-11-25 15:21:37 +010046 // In-memory and persistant writer for data
Akron1d36eb52020-11-13 17:52:26 +010047 private Writer writer;
Akron1d36eb52020-11-13 17:52:26 +010048 private File file;
Akron984fe8f2020-11-25 15:21:37 +010049
50 // Meta information for result exports
Akronfddd0582020-11-17 09:49:54 +010051 private JsonNode meta, query, collection;
Akron820dc642020-11-19 13:11:50 +010052 private String fname, queryString, corpusQueryString, src;
Akronc1c18242020-11-18 18:24:12 +010053 private boolean timeExceeded = false;
Akron984fe8f2020-11-25 15:21:37 +010054
55 // Result calculations (partially for progress)
56 private int totalResults = -1,
57 maxResults = -1,
58 fetchedResults = 0;
59
60 // Event writer for progress
Akronaf145eb2020-11-24 16:55:47 +010061 private EventOutput evOut;
Akron984fe8f2020-11-25 15:21:37 +010062
63
64 /**
65 * MimeType of the exporter -
66 * defaults to "text/plain" but
67 * should be overwritten.
68 */
Akrone57937b2020-11-17 08:49:31 +010069 public String getMimeType() {
70 return "text/plain";
71 };
72
Akron984fe8f2020-11-25 15:21:37 +010073
74 /**
75 * Suffix of the exported file -
76 * defaults to "txt" but
77 * should be overwritten.
78 */
Akrone57937b2020-11-17 08:49:31 +010079 public String getSuffix() {
80 return "txt";
81 };
82
Akron984fe8f2020-11-25 15:21:37 +010083
84 /**
85 * Total results of exportable matches.
86 */
Akronc1c18242020-11-18 18:24:12 +010087 public int getTotalResults() {
88 return this.totalResults;
89 };
Akron984fe8f2020-11-25 15:21:37 +010090
Akron62d90a32020-11-18 20:45:38 +010091
Akron984fe8f2020-11-25 15:21:37 +010092 /**
93 * Indicator if time was exceeded when
94 * fetching all matches. This means
95 * that "totalResults" needs
96 * to be treated as a minimum value.
97 */
Akronc1c18242020-11-18 18:24:12 +010098 public boolean hasTimeExceeded() {
99 return this.timeExceeded;
100 };
Akron984fe8f2020-11-25 15:21:37 +0100101
Akronc1c18242020-11-18 18:24:12 +0100102
Akron984fe8f2020-11-25 15:21:37 +0100103 /**
104 * Set the file name of the file to
105 * be exported.
106 */
Akron74122712020-11-17 09:41:21 +0100107 public void setFileName (String fname) {
Akron876017d2020-11-17 09:19:24 +0100108 this.fname = fname;
109 };
110
Akron984fe8f2020-11-25 15:21:37 +0100111
112 /**
113 * Get the file name of the file to
114 * be exported.
115 */
Akron74122712020-11-17 09:41:21 +0100116 public String getFileName () {
117 String s = this.fname;
118 if (s == null)
119 s = this.queryString;
120 if (s == null)
121 return "export";
122 return sanitizeFileName(s);
Akron876017d2020-11-17 09:19:24 +0100123 };
124
Akron984fe8f2020-11-25 15:21:37 +0100125
126 /**
127 * Set the query string.
128 */
Akron74122712020-11-17 09:41:21 +0100129 public void setQueryString (String query) {
130 this.queryString = query;
131 };
132
Akron984fe8f2020-11-25 15:21:37 +0100133
134 /**
135 * Get the query string.
136 */
Akron74122712020-11-17 09:41:21 +0100137 public String getQueryString () {
138 return this.queryString;
139 };
Akrond2072ee2020-11-17 16:12:41 +0100140
Akron984fe8f2020-11-25 15:21:37 +0100141
142 /**
143 * Set the corpus query string.
144 */
Akrond2072ee2020-11-17 16:12:41 +0100145 public void setCorpusQueryString (String query) {
146 this.corpusQueryString = query;
147 };
148
Akron984fe8f2020-11-25 15:21:37 +0100149
150 /**
151 * Get the corpus query string.
152 */
Akrond2072ee2020-11-17 16:12:41 +0100153 public String getCorpusQueryString () {
154 return this.corpusQueryString;
155 };
Akron820dc642020-11-19 13:11:50 +0100156
Akron984fe8f2020-11-25 15:21:37 +0100157
158 /**
159 * Set the source information.
160 */
Akron820dc642020-11-19 13:11:50 +0100161 public void setSource (String host, String path) {
162 StringBuilder s = new StringBuilder(32);
163 if (host != null)
164 s.append(host);
165
166 if (path != null && path.length() > 0)
167 s.append('/').append(path);
168
169 this.src = s.toString();
170 };
171
Akron984fe8f2020-11-25 15:21:37 +0100172
173 /**
174 * Get the source information.
175 */
Akron820dc642020-11-19 13:11:50 +0100176 public String getSource () {
177 return this.src;
178 };
Akron984fe8f2020-11-25 15:21:37 +0100179
Akron74122712020-11-17 09:41:21 +0100180
Akron984fe8f2020-11-25 15:21:37 +0100181 /**
182 * Set the meta JSON blob.
183 */
Akron1d36eb52020-11-13 17:52:26 +0100184 public void setMeta (JsonNode meta) {
185 this.meta = meta;
186 };
Akroneedac912020-11-16 12:54:42 +0100187
Akron984fe8f2020-11-25 15:21:37 +0100188
189 /**
190 * Get the meta JSON blob.
191 */
Akroneedac912020-11-16 12:54:42 +0100192 public JsonNode getMeta () {
193 return this.meta;
194 };
Akron984fe8f2020-11-25 15:21:37 +0100195
Akroneedac912020-11-16 12:54:42 +0100196
Akron984fe8f2020-11-25 15:21:37 +0100197 /**
198 * Set the query JSON blob.
199 */
Akron1d36eb52020-11-13 17:52:26 +0100200 public void setQuery (JsonNode query) {
201 this.query = query;
202 };
203
Akron62d90a32020-11-18 20:45:38 +0100204
Akron984fe8f2020-11-25 15:21:37 +0100205 /**
206 * Get the query JSON blob.
207 */
Akroneedac912020-11-16 12:54:42 +0100208 public JsonNode getQuery () {
209 return this.query;
210 };
211
Akron984fe8f2020-11-25 15:21:37 +0100212
213 /**
214 * Set the collection JSON blob.
215 */
Akron1d36eb52020-11-13 17:52:26 +0100216 public void setCollection (JsonNode collection) {
217 this.collection = collection;
218 };
219
Akron984fe8f2020-11-25 15:21:37 +0100220
221 /**
222 * Get the collection JSON blob.
223 */
Akroneedac912020-11-16 12:54:42 +0100224 public JsonNode getCollection () {
225 return this.collection;
226 };
227
Akron984fe8f2020-11-25 15:21:37 +0100228
229 /**
230 * Set the maximum results to be fetched.
231 *
232 * This needs to be set prior to the first
233 * "addMatch" so it can be taken into account.
234 */
235 public void setMaxResults (int maxResults) {
236 this.maxResults = maxResults;
237 };
238
239
240 /**
241 * Get the maximum results to be fetched.
242 */
243 public int getMaxResults () {
244 return this.maxResults;
245 };
246
247
248 /**
249 * Get the export ID which is the pointer
250 * to where the system can find the temporary
251 * generated file.
252 */
Akron35881012020-11-24 20:05:06 +0100253 public String getExportID () {
254 if (this.file == null)
255 return "";
256 return this.file.getName();
257 };
258
Akron984fe8f2020-11-25 15:21:37 +0100259
Akron35881012020-11-24 20:05:06 +0100260 /**
261 * Set the file based on the export ID
262 */
263 public void setFile (String exportID) {
264 this.file = new File(
265 this.getFileDirectory(),
266 exportID
267 );
268 }
Akron984fe8f2020-11-25 15:21:37 +0100269
Akron35881012020-11-24 20:05:06 +0100270
Akron984fe8f2020-11-25 15:21:37 +0100271 /**
272 * Write header for exportation.
273 *
274 * Should be overwritten.
275 */
Akron1d36eb52020-11-13 17:52:26 +0100276 public void writeHeader (Writer w) throws IOException { };
Akron984fe8f2020-11-25 15:21:37 +0100277
278
279 /**
280 * Write footer for exportation.
281 *
282 * Should be overwritten.
283 */
Akron1d36eb52020-11-13 17:52:26 +0100284 public void writeFooter (Writer w) throws IOException { };
Akron984fe8f2020-11-25 15:21:37 +0100285
286
287 /**
288 * Write a single match.
289 *
290 * Should be overwritten.
291 */
Akron1d36eb52020-11-13 17:52:26 +0100292 public void addMatch (JsonNode n, Writer w) throws IOException { };
Akrond0b1cfe2020-11-20 19:26:52 +0100293
Akron984fe8f2020-11-25 15:21:37 +0100294
295 /**
296 * Set the event stream for progress feedback.
297 */
Akronaf145eb2020-11-24 16:55:47 +0100298 public void setSse (EventOutput eventOutput) {
299 this.evOut = eventOutput;
Akrond0b1cfe2020-11-20 19:26:52 +0100300 };
Akron35881012020-11-24 20:05:06 +0100301
Akronaf145eb2020-11-24 16:55:47 +0100302
Akronb329d272020-11-13 12:45:26 +0100303 /**
Akron984fe8f2020-11-25 15:21:37 +0100304 * Force the creation of a file, even when only
305 * a few matches are requested.
Akron35881012020-11-24 20:05:06 +0100306 */
307 public void forceFile () {
308
309 // Open file if not already opened
310 if (this.file == null) {
311
312 try {
313
314 File dir = getFileDirectory();
315
316 // Create temporary file
317 this.file = File.createTempFile(
318 "idsexp-", "." + this.getSuffix(),
319 dir
320 );
321
Akron35881012020-11-24 20:05:06 +0100322 String s = null;
323
Akron984fe8f2020-11-25 15:21:37 +0100324 // Take temporary data from the in-memory writer
Akron35881012020-11-24 20:05:06 +0100325 if (writer != null)
326 s = writer.toString();
327
Akron984fe8f2020-11-25 15:21:37 +0100328 // Establish persistant writer
Akron35881012020-11-24 20:05:06 +0100329 writer = new BufferedWriter(new FileWriter(this.file, true));
330
Akron984fe8f2020-11-25 15:21:37 +0100331 // Add in-memory string
Akron35881012020-11-24 20:05:06 +0100332 if (s != null)
333 writer.write(s);
334
335 }
Akron35881012020-11-24 20:05:06 +0100336
Akron984fe8f2020-11-25 15:21:37 +0100337 // If data can't be stored on disk, the writer will
338 // rely on in-memory data, which may or may not work in
339 // different contexts.
340 catch (IOException e) {
Akron35881012020-11-24 20:05:06 +0100341 return;
342 };
343 };
Akron984fe8f2020-11-25 15:21:37 +0100344 };
Akron35881012020-11-24 20:05:06 +0100345
Akron984fe8f2020-11-25 15:21:37 +0100346
Akron35881012020-11-24 20:05:06 +0100347 /**
Akron984fe8f2020-11-25 15:21:37 +0100348 * Parse initial JSON file to get header information
349 * and initial matches.
Akronb329d272020-11-13 12:45:26 +0100350 */
Akron62d90a32020-11-18 20:45:38 +0100351 public boolean init (String resp) throws IOException, JsonParseException {
Akron1d36eb52020-11-13 17:52:26 +0100352
Akronc51327b2020-11-13 15:21:26 +0100353 if (resp == null)
Akron62d90a32020-11-18 20:45:38 +0100354 return false;
355
Akronb329d272020-11-13 12:45:26 +0100356 JsonParser parser = mapper.getFactory().createParser(resp);
Akron984fe8f2020-11-25 15:21:37 +0100357 JsonNode root = mapper.readTree(parser);
Akron62d90a32020-11-18 20:45:38 +0100358
Akron984fe8f2020-11-25 15:21:37 +0100359 if (root == null)
Akron62d90a32020-11-18 20:45:38 +0100360 return false;
Akronb329d272020-11-13 12:45:26 +0100361
Akron984fe8f2020-11-25 15:21:37 +0100362 JsonNode meta = root.get("meta");
Akronc1c18242020-11-18 18:24:12 +0100363 this.setMeta(meta);
Akron984fe8f2020-11-25 15:21:37 +0100364 this.setQuery(root.get("query"));
365 this.setCollection(root.get("collection"));
Akronb329d272020-11-13 12:45:26 +0100366
Akronc1c18242020-11-18 18:24:12 +0100367 if (meta != null) {
368 if (meta.has("totalResults")) {
369 this.totalResults = meta.get("totalResults").asInt();
370 if (meta.has("timeExceeded")) {
371 this.timeExceeded = meta.get("timeExceeded").asBoolean();
372 };
373 };
374 };
375
Akronba3ea112020-11-24 22:40:18 +0100376 // In case the writer is already set (e.g. forceFile() was issued),
377 // write in the header
378 if (writer == null) {
379 this.file = null;
380 writer = new StringWriter();
381 };
Akronb329d272020-11-13 12:45:26 +0100382
Akron984fe8f2020-11-25 15:21:37 +0100383 // Write header to exporter
Akron1d36eb52020-11-13 17:52:26 +0100384 this.writeHeader(writer);
Akronb329d272020-11-13 12:45:26 +0100385
Akron984fe8f2020-11-25 15:21:37 +0100386 // Go on by iterating through matches
387 return this.iterateThroughMatches(root.get("matches"));
Akronb329d272020-11-13 12:45:26 +0100388 };
389
Akron984fe8f2020-11-25 15:21:37 +0100390
391 /**
392 * Finalize the export stream.
393 */
394 public Exporter finish() throws IOException {
395 this.writeFooter(this.writer);
396 this.writer.close();
397 return (Exporter) this;
398 };
Akronc51327b2020-11-13 15:21:26 +0100399
Akron984fe8f2020-11-25 15:21:37 +0100400
Akronb329d272020-11-13 12:45:26 +0100401 /**
402 * Append more matches to the result set.
403 */
Akron62d90a32020-11-18 20:45:38 +0100404 public boolean appendMatches (String resp) throws IOException {
Akronb329d272020-11-13 12:45:26 +0100405
Akron35881012020-11-24 20:05:06 +0100406 // Demand creation of a file
407 this.forceFile();
Akronb329d272020-11-13 12:45:26 +0100408
409 JsonParser parser = mapper.getFactory().createParser(resp);
Akron984fe8f2020-11-25 15:21:37 +0100410 JsonNode root = mapper.readTree(parser);
Akronb329d272020-11-13 12:45:26 +0100411
Akron984fe8f2020-11-25 15:21:37 +0100412 if (root == null)
Akron62d90a32020-11-18 20:45:38 +0100413 return false;
Akron1d36eb52020-11-13 17:52:26 +0100414
Akron984fe8f2020-11-25 15:21:37 +0100415 return this.iterateThroughMatches(root.get("matches"));
Akronb329d272020-11-13 12:45:26 +0100416 };
417
Akron35881012020-11-24 20:05:06 +0100418
Akronb329d272020-11-13 12:45:26 +0100419 /**
Akron1d36eb52020-11-13 17:52:26 +0100420 * Serve response entity, either as a string or as a file.
Akronb329d272020-11-13 12:45:26 +0100421 */
Akron1d36eb52020-11-13 17:52:26 +0100422 public ResponseBuilder serve () {
Akronb329d272020-11-13 12:45:26 +0100423
Akron35881012020-11-24 20:05:06 +0100424 ResponseBuilder rb;
Akrone57937b2020-11-17 08:49:31 +0100425
Akron35881012020-11-24 20:05:06 +0100426 if (this.file == null) {
Akrone57937b2020-11-17 08:49:31 +0100427
Akron35881012020-11-24 20:05:06 +0100428 // Serve stream
429 rb = Response.ok(writer.toString());
Akron1d36eb52020-11-13 17:52:26 +0100430 }
Akron35881012020-11-24 20:05:06 +0100431 else if (this.file.exists()) {
Akron1d36eb52020-11-13 17:52:26 +0100432
Akron55def632020-11-26 16:00:02 +0100433 // Serve the file and delete after serving
434 final File expFile = this.file;
435 try {
436 final InputStream in = new FileInputStream(this.file);
437
438 // Remove file after output is streamed
439 StreamingOutput output = new StreamingOutput() {
440 @Override
441 public void write(OutputStream out)
442 throws IOException {
443
444 // Write file data in output stream
445 int length;
446 byte[] buffer = new byte[1024];
447 while ((length = in.read(buffer)) != -1) {
448 out.write(buffer, 0, length);
449 }
450 out.flush(); // Important!
451 in.close();
452
453 // When done, delete the file
454 expFile.delete();
455 }
456 };
457
458 // Serve file
459 rb = Response.ok(output);
460 }
461
462 catch (Exception e) {
463 // File problematic
Akrona3839bb2020-11-26 16:07:31 +0100464 return Response.status(Status.NOT_FOUND);
Akron55def632020-11-26 16:00:02 +0100465 };
Akron35881012020-11-24 20:05:06 +0100466 }
467 else {
468 // File doesn't exist
Akrona3839bb2020-11-26 16:07:31 +0100469 return Response.status(Status.NOT_FOUND);
Akronb329d272020-11-13 12:45:26 +0100470 };
Akrond0b1cfe2020-11-20 19:26:52 +0100471
Akron35881012020-11-24 20:05:06 +0100472 return rb
473 .type(this.getMimeType())
474 .header(
475 "Content-Disposition",
476 "attachment; filename=" +
477 this.getFileName() +
478 '.' +
479 this.getSuffix()
480 );
Akronb329d272020-11-13 12:45:26 +0100481 };
Akron62d90a32020-11-18 20:45:38 +0100482
Akron984fe8f2020-11-25 15:21:37 +0100483
484 /*
485 * Iterate through all matches
486 */
487 private boolean iterateThroughMatches (JsonNode mNodes)
488 throws IOException {
Akron62d90a32020-11-18 20:45:38 +0100489
Akron984fe8f2020-11-25 15:21:37 +0100490 // Send progress information
Akrond0b1cfe2020-11-20 19:26:52 +0100491 this.sendProgress();
492
Akron62d90a32020-11-18 20:45:38 +0100493 if (mNodes == null)
494 return false;
495
496 // Iterate over the results of the current file
497 Iterator<JsonNode> mNode = mNodes.elements();
498 while (mNode.hasNext()) {
Akron984fe8f2020-11-25 15:21:37 +0100499
500 // Stop if all relevant matches are fetched
Akron62d90a32020-11-18 20:45:38 +0100501 if (this.maxResults > 0 &&
Akron3524b8c2020-11-19 01:19:02 +0100502 this.fetchedResults >= this.maxResults) {
Akron62d90a32020-11-18 20:45:38 +0100503 return false;
504 };
Akron3524b8c2020-11-19 01:19:02 +0100505 this.addMatch(mNode.next(), writer);
506 this.fetchedResults++;
Akron62d90a32020-11-18 20:45:38 +0100507 };
508 return true;
509 };
Akron984fe8f2020-11-25 15:21:37 +0100510
511
512 /*
513 * Get the directory where all temporary files are stored.
514 */
515 private File getFileDirectory () {
516
517 String fileDir = prop.getProperty(
518 "conf.file_dir",
519 System.getProperty("java.io.tmpdir")
520 );
521
522 File dir = new File(fileDir);
523
524 // Create directory if not yet existing
525 if (!dir.exists()) {
526 dir.mkdir();
527 }
528
529 // Directory is unwritable - fallback
530 else if (!dir.canWrite()) {
531 fileDir = System.getProperty("java.io.tmpdir");
532 System.err.println("Unable to write to directory");
533 System.err.println("Fallback to " + fileDir);
534 dir = new File(fileDir);
535 };
536 return dir;
537 };
538
539
540 /*
541 * Send a single progress event to the event stream.
542 */
543 private void sendProgress () {
544
545 if (this.evOut == null || this.maxResults == 0)
546 return;
547
548 if (this.evOut.isClosed())
549 return;
550
551 int calc = (int) Math.ceil(((double) this.fetchedResults / this.maxResults) * 100);
552
553 final OutboundEvent.Builder eventBuilder = new OutboundEvent.Builder();
554 eventBuilder.name("Progress");
555 eventBuilder.data(String.valueOf(calc));
556
557 try {
558 this.evOut.write(eventBuilder.build());
559 }
560 catch (IOException e) {
561 return;
562 };
563 };
Akronb329d272020-11-13 12:45:26 +0100564};