blob: 093f802f362d2632c323195853a51996cbd4341b [file] [log] [blame]
hebastae2bd6e42020-03-03 19:07:20 +01001package de.ids_mannheim.korap.plkexport;
2
hebastae0496762020-03-30 00:11:01 +02003import java.io.IOException;
Akronbb076c72020-09-28 14:39:15 +02004import java.io.StringWriter;
Akronb87af2f2020-09-24 14:38:02 +02005import java.io.InputStream;
Akron69cd35d2020-11-20 13:17:31 +01006import java.lang.Thread;
Akronbb076c72020-09-28 14:39:15 +02007import java.util.HashMap;
Akronc931cd02020-09-15 10:54:17 +02008import java.util.Properties;
Akron69cd35d2020-11-20 13:17:31 +01009import java.util.Base64;
Akrona578eb02020-11-26 12:34:17 +010010import java.util.ResourceBundle;
11import java.util.Locale;
Akron669d0bb2020-10-02 13:12:01 +020012import java.util.regex.Matcher;
13import java.util.regex.Pattern;
hebastaff955d62020-05-27 15:59:14 +020014
Marc Kupietza2a17972022-03-27 12:03:48 +020015import jakarta.ws.rs.BadRequestException;
16import jakarta.ws.rs.WebApplicationException;
17import jakarta.ws.rs.FormParam;
18import jakarta.ws.rs.QueryParam;
19import jakarta.ws.rs.PathParam;
20import jakarta.ws.rs.POST;
21import jakarta.ws.rs.GET;
22import jakarta.ws.rs.Path;
23import jakarta.ws.rs.Produces;
24import jakarta.ws.rs.Consumes;
25import jakarta.ws.rs.client.Client;
26import jakarta.ws.rs.client.ClientBuilder;
27import jakarta.ws.rs.client.WebTarget;
28import jakarta.ws.rs.client.Invocation;
29import jakarta.ws.rs.core.Context;
30import jakarta.ws.rs.core.UriBuilder;
31import jakarta.ws.rs.core.MediaType;
32import jakarta.ws.rs.core.Response;
33import jakarta.ws.rs.core.Response.ResponseBuilder;
34import jakarta.ws.rs.core.Response.Status;
Akronaf145eb2020-11-24 16:55:47 +010035
Akron35881012020-11-24 20:05:06 +010036import static org.apache.commons.io.FilenameUtils.getExtension;
37
Akrona241a472021-04-27 18:55:58 +020038import org.tinylog.Logger;
39
Akronaf145eb2020-11-24 16:55:47 +010040import org.glassfish.jersey.media.sse.EventOutput;
41import org.glassfish.jersey.media.sse.OutboundEvent;
42import org.glassfish.jersey.media.sse.SseFeature;
43
Marc Kupietza2a17972022-03-27 12:03:48 +020044import jakarta.servlet.http.Cookie;
45import jakarta.servlet.http.HttpServletRequest;
Akrona578eb02020-11-26 12:34:17 +010046import org.glassfish.jersey.server.ContainerRequest;
hebastaff955d62020-05-27 15:59:14 +020047
Akrone34bd4f2020-09-18 10:46:01 +020048import static de.ids_mannheim.korap.plkexport.Util.*;
49
Akronbb076c72020-09-28 14:39:15 +020050// Template engine
51import freemarker.template.Configuration;
52import freemarker.template.Template;
53
hebastafed41942020-11-02 10:00:36 +010054/**
Akron984fe8f2020-11-25 15:21:37 +010055 * TODO:
Akron18c627d2020-11-26 22:09:08 +010056 * - Rename "is cutted" to "truncated".
Akron83f39942021-03-22 16:37:25 +010057 * - Restructure:
58 * - Load the exporter objects as part of
59 * the MatchAggregator instead of full
60 * inheritance.
Akron55def632020-11-26 16:00:02 +010061 * - Switch Cookie mechanism to container req for
62 * better testing capabilities.
63 * - Test ExWsConf.
Akron984fe8f2020-11-25 15:21:37 +010064 * - Do not expect all meta data per match.
Akron69cd35d2020-11-20 13:17:31 +010065 * - Upgrade default pageSize to 50.
Akronea77cb42020-11-18 14:04:21 +010066 * - Add loading marker.
Akrone2db9dd2020-11-19 13:35:12 +010067 * - Add infos to JsonExporter.
Akron18c627d2020-11-26 22:09:08 +010068 * - e.g. q & cq string representation.
Akron984fe8f2020-11-25 15:21:37 +010069 * - Check pageSize after init (so pageSize is not
70 * greater than what the server supports).
71 * - Restrict CORS to meaningful sources.
72 * - Add arbitrary information for RTF header
Akron984fe8f2020-11-25 15:21:37 +010073 * - Add information regarding max_exp_limit
74 * to export form.
75 * - Maybe set matches from parent window
76 * (if available) as export default (if
77 * smaller than max_exp_limit)
78 * - IDS-internal user should be allowed 100.000
79 * matches per export, while external users
80 * should be limited to 10.000.
81 * - Add 1000-separator to numbers.
Akrona578eb02020-11-26 12:34:17 +010082 * - Get the list of availables locales based
83 * on the given ResourceBundle.
84 * - Check for q/ql definition in JS.
Akron984fe8f2020-11-25 15:21:37 +010085 *
86 * IDEAS:
Akron18c627d2020-11-26 22:09:08 +010087 * - Support more granular exporter-specific options
88 * - Create a template mechanism for RTF export.
89 * - Support CSV separator and quote symbol change.
Akron984fe8f2020-11-25 15:21:37 +010090 * - Prettify VC in RTF export (maybe similar to
91 * the visualisation in Kalamar)
hebastafed41942020-11-02 10:00:36 +010092 */
hebastaa66693a2020-07-19 16:51:28 +020093
hebastae0496762020-03-30 00:11:01 +020094@Path("/")
Akron34360792020-11-20 15:06:00 +010095public class Service {
hebastae2bd6e42020-03-03 19:07:20 +010096
Akron35881012020-11-24 20:05:06 +010097 private Properties prop = ExWSConf.properties(null);
Akronb87af2f2020-09-24 14:38:02 +020098
Akronbb076c72020-09-28 14:39:15 +020099 private final ClassLoader cl = Thread.currentThread().getContextClassLoader();
100
101 InputStream is = cl.getResourceAsStream("assets/export.js");
102 private final String exportJsStr = streamToString(is);
Akron7f242bb2020-09-28 20:19:11 +0200103
Helge605cf6d2023-03-24 16:04:19 +0100104 Configuration cfg = new Configuration(Configuration.VERSION_2_3_32);
105
Akron7f242bb2020-09-28 20:19:11 +0200106 {
Akron34360792020-11-20 15:06:00 +0100107 cfg.setClassForTemplateLoading(Service.class, "/assets/templates");
Akron7f242bb2020-09-28 20:19:11 +0200108 cfg.setDefaultEncoding("UTF-8");
109 }
Akronbe6777b2020-10-02 11:35:03 +0200110
111 private final static String octets =
112 "(?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})";
113
114 private final static String ipre =
115 octets + "\\." + octets + "\\." + octets + "\\." + octets;
Akron669d0bb2020-10-02 13:12:01 +0200116
117 private static Pattern authrep = Pattern.compile("\"auth\":\"([^\"]+?)\"");
118
119 private final static Base64.Decoder b64Dec = Base64.getDecoder();
120
Akrona241a472021-04-27 18:55:58 +0200121
Akron669d0bb2020-10-02 13:12:01 +0200122 @Context
Akrona578eb02020-11-26 12:34:17 +0100123 private HttpServletRequest servletReq;
124
125 @Context
126 private ContainerRequest req;
Akrone57937b2020-11-17 08:49:31 +0100127
Akron984fe8f2020-11-25 15:21:37 +0100128 /*
129 * Private method to run the export,
130 * either static or streaming
131 */
Akron35881012020-11-24 20:05:06 +0100132 private Exporter export(String fname,
Akronb91c3032020-11-23 19:24:40 +0100133 String format,
134 String q,
135 String cq,
136 String ql,
137 String cutoffStr,
138 int hitc,
Akronaf145eb2020-11-24 16:55:47 +0100139 EventOutput eventOutput
140 ) throws WebApplicationException {
Akrond0b1cfe2020-11-20 19:26:52 +0100141
Akron984fe8f2020-11-25 15:21:37 +0100142 // These parameters are mandatory
Akron90799f12020-09-17 17:10:34 +0200143 String[][] params = {
Akron90799f12020-09-17 17:10:34 +0200144 { "format", format },
145 { "q", q },
146 { "ql", ql }
147 };
hebastae0496762020-03-30 00:11:01 +0200148
Akron984fe8f2020-11-25 15:21:37 +0100149 // Check that all mandatory parameters are available
hebastaff955d62020-05-27 15:59:14 +0200150 for (int i = 0; i < params.length; i++) {
151 if (params[i][1] == null || params[i][1].trim().isEmpty())
Akronb91c3032020-11-23 19:24:40 +0100152 throw new WebApplicationException(
153 responseForm(Status.BAD_REQUEST,
154 "Parameter " + "\""
155 + params[i][0] + "\"" +
156 " is missing or empty"));
Akron232248d2020-11-11 18:55:10 +0100157 };
Akron232248d2020-11-11 18:55:10 +0100158
159 // Retrieve cutoff value
hebastafed41942020-11-02 10:00:36 +0100160 boolean cutoff = false;
Akron62d90a32020-11-18 20:45:38 +0100161 if (cutoffStr != null && (
162 cutoffStr.equals("true") ||
163 cutoffStr.equals("1"))
164 ) {
Akron232248d2020-11-11 18:55:10 +0100165 cutoff = true;
166 };
Akronaf145eb2020-11-24 16:55:47 +0100167
Akron62d90a32020-11-18 20:45:38 +0100168 // Load configuration values
Akron69cd35d2020-11-20 13:17:31 +0100169 String scheme = prop.getProperty("api.scheme", "https");
170 String port = prop.getProperty("api.port", "8089");
171 String host = prop.getProperty("api.host", "localhost");
172 String path = prop.getProperty("api.path", "");
Akron5405dec2020-11-26 20:39:24 +0100173 String source = prop.getProperty("api.source");
Akron69cd35d2020-11-20 13:17:31 +0100174 int pageSize = Integer.parseInt(prop.getProperty("conf.page_size", "5"));
175 int maxResults = Integer.parseInt(prop.getProperty("conf.max_exp_limit", "10000"));
hebastaa66693a2020-07-19 16:51:28 +0200176
Akron62d90a32020-11-18 20:45:38 +0100177 // Adjust the number of requested hits
Akron99fda9e2020-11-20 13:45:02 +0100178 if (hitc > 0 && hitc < maxResults)
Akron62d90a32020-11-18 20:45:38 +0100179 maxResults = hitc;
Akron99fda9e2020-11-20 13:45:02 +0100180
181 // If less than pageSize results are requested - dont't fetch more
182 if (maxResults < pageSize)
183 pageSize = maxResults;
Akronb91c3032020-11-23 19:24:40 +0100184
185 ResponseBuilder builder = null;
186 Client client = ClientBuilder.newClient();
187
Akrone57937b2020-11-17 08:49:31 +0100188 // Create initial search uri
Akronbfd5fb82020-09-15 14:28:25 +0200189 UriBuilder uri = UriBuilder.fromPath("/api/v1.0/search")
190 .host(host)
191 .port(Integer.parseInt(port))
Akron480a9082020-09-16 13:14:09 +0200192 .scheme(scheme)
Akronbfd5fb82020-09-15 14:28:25 +0200193 .queryParam("q", q)
Akron984fe8f2020-11-25 15:21:37 +0100194 .queryParam("context", "40-t,40-t")
Akronbfd5fb82020-09-15 14:28:25 +0200195 .queryParam("ql", ql)
Akron99fda9e2020-11-20 13:45:02 +0100196 .queryParam("count", pageSize)
Akronbfd5fb82020-09-15 14:28:25 +0200197 ;
Akron68f221b2020-09-29 23:15:43 +0200198
Akron984fe8f2020-11-25 15:21:37 +0100199 // Not yet supported:
200 // .queryParam("context", "sentence")
201
Akron0bdcab22020-11-19 18:36:16 +0100202 if (cq != null && cq.length() > 0)
Akrond2072ee2020-11-17 16:12:41 +0100203 uri = uri.queryParam("cq", cq);
Akrond2072ee2020-11-17 16:12:41 +0100204
Akron984fe8f2020-11-25 15:21:37 +0100205 if (path != "")
Akron68f221b2020-09-29 23:15:43 +0200206 uri = uri.path(path);
Akron69cd35d2020-11-20 13:17:31 +0100207
Akronbe6777b2020-10-02 11:35:03 +0200208 // Get client IP, in case service is behind a proxy
Akron669d0bb2020-10-02 13:12:01 +0200209 // Get auth (temporarily) via Session riding
Akron984fe8f2020-11-25 15:21:37 +0100210 String xff = "", auth = "";
Akrona578eb02020-11-26 12:34:17 +0100211 if (servletReq != null) {
212 xff = getClientIP(servletReq.getHeader("X-Forwarded-For"));
Akron62d90a32020-11-18 20:45:38 +0100213 if (xff == "")
Akrona578eb02020-11-26 12:34:17 +0100214 xff = servletReq.getRemoteAddr();
Akron669d0bb2020-10-02 13:12:01 +0200215
Akrona578eb02020-11-26 12:34:17 +0100216 auth = authFromCookie(servletReq);
Akronbe6777b2020-10-02 11:35:03 +0200217 };
Akron669d0bb2020-10-02 13:12:01 +0200218
Akron328f9ea2020-09-22 17:11:22 +0200219 String resp;
hebastafed41942020-11-02 10:00:36 +0100220 WebTarget resource;
Akron696c0632020-11-12 15:30:51 +0100221 Invocation.Builder reqBuilder;
Akron69cd35d2020-11-20 13:17:31 +0100222
Akron328f9ea2020-09-22 17:11:22 +0200223 try {
hebastafed41942020-11-02 10:00:36 +0100224 resource = client.target(uri.build());
Akron696c0632020-11-12 15:30:51 +0100225 reqBuilder = resource.request(MediaType.APPLICATION_JSON);
226 resp = authBuilder(reqBuilder, xff, auth).get(String.class);
Akron984fe8f2020-11-25 15:21:37 +0100227 }
228
229 catch (Exception e) {
Akron328f9ea2020-09-22 17:11:22 +0200230 throw new WebApplicationException(
Akron7f242bb2020-09-28 20:19:11 +0200231 responseForm(Status.BAD_GATEWAY, "Unable to reach Backend")
Akron328f9ea2020-09-22 17:11:22 +0200232 );
233 }
hebastae0496762020-03-30 00:11:01 +0200234
Akron984fe8f2020-11-25 15:21:37 +0100235 // Get and initialize exporter based on requested format
Akron35881012020-11-24 20:05:06 +0100236 Exporter exp = getExporter(format);
Akron62d90a32020-11-18 20:45:38 +0100237 exp.setMaxResults(maxResults);
Akron74122712020-11-17 09:41:21 +0100238 exp.setQueryString(q);
Akrond2072ee2020-11-17 16:12:41 +0100239 exp.setCorpusQueryString(cq);
Akron5405dec2020-11-26 20:39:24 +0100240 if (source != null)
241 exp.setSource(source);
242 else
243 exp.setSource(host, path);
Akron62d90a32020-11-18 20:45:38 +0100244
Akron984fe8f2020-11-25 15:21:37 +0100245 // Set filename
246 if (fname != null)
Akron74122712020-11-17 09:41:21 +0100247 exp.setFileName(fname);
Akron99fda9e2020-11-20 13:45:02 +0100248
Akron984fe8f2020-11-25 15:21:37 +0100249 // Set progress mechanism, if passed
Akron35881012020-11-24 20:05:06 +0100250 if (eventOutput != null) {
Akronaf145eb2020-11-24 16:55:47 +0100251 exp.setSse(eventOutput);
Akron984fe8f2020-11-25 15:21:37 +0100252
253 // Progress requires the creation
254 // of temporary files
Akron35881012020-11-24 20:05:06 +0100255 exp.forceFile();
256 };
Akrond0b1cfe2020-11-20 19:26:52 +0100257
Akron984fe8f2020-11-25 15:21:37 +0100258 // Initialize export with meta data
259 // and first matches
Akronacc9f7a2020-11-17 17:21:40 +0100260 try {
Akron984fe8f2020-11-25 15:21:37 +0100261
262 // TODO:
263 // Check return value.
Akronacc9f7a2020-11-17 17:21:40 +0100264 exp.init(resp);
Akron984fe8f2020-11-25 15:21:37 +0100265 }
266
267 catch (Exception e) {
Akrona241a472021-04-27 18:55:58 +0200268 Logger.error(e);
Akron60a16282021-04-28 20:01:11 +0200269 String err = e.getMessage();
270 if (err == null) {
271 err = "Unable to initialize export";
272 };
273
Akronacc9f7a2020-11-17 17:21:40 +0100274 throw new WebApplicationException(
275 responseForm(
276 Status.INTERNAL_SERVER_ERROR,
Akron60a16282021-04-28 20:01:11 +0200277 err
Akronacc9f7a2020-11-17 17:21:40 +0100278 )
279 );
Akron62d90a32020-11-18 20:45:38 +0100280 };
Akrond2072ee2020-11-17 16:12:41 +0100281
Akron69cd35d2020-11-20 13:17:31 +0100282 // Calculate how many results to fetch
Akron62d90a32020-11-18 20:45:38 +0100283 int fetchCount = exp.getTotalResults();
Akron984fe8f2020-11-25 15:21:37 +0100284 if (exp.hasTimeExceeded() || fetchCount > maxResults)
Akron62d90a32020-11-18 20:45:38 +0100285 fetchCount = maxResults;
Akrond0b1cfe2020-11-20 19:26:52 +0100286
Akron984fe8f2020-11-25 15:21:37 +0100287 // fetchCount may be different to maxResults now,
288 // so reset after init (for accurate progress)
Akronb91c3032020-11-23 19:24:40 +0100289 exp.setMaxResults(fetchCount);
Akron62d90a32020-11-18 20:45:38 +0100290
Akrond3df5b02020-11-12 18:23:17 +0100291 // If only one page should be exported there is no need
Akron984fe8f2020-11-25 15:21:37 +0100292 // for a temporary export file, unless progress is
293 // requested. In case all matches are already fetched,
294 // stop here as well.
295 if (cutoff || fetchCount <= pageSize) {
296
Akron35881012020-11-24 20:05:06 +0100297 try {
Akron984fe8f2020-11-25 15:21:37 +0100298
299 // Close all export writers
Akron35881012020-11-24 20:05:06 +0100300 exp.finish();
Akron984fe8f2020-11-25 15:21:37 +0100301 }
302
303 catch (Exception e) {
Akrona241a472021-04-27 18:55:58 +0200304 Logger.error(e);
Akron60a16282021-04-28 20:01:11 +0200305 String err = e.getMessage();
306 if (err == null) {
307 err = "Unable to finish export";
308 };
309
Akron35881012020-11-24 20:05:06 +0100310 throw new WebApplicationException(
311 responseForm(
312 Status.INTERNAL_SERVER_ERROR,
Akron60a16282021-04-28 20:01:11 +0200313 err
Akron35881012020-11-24 20:05:06 +0100314 )
315 );
316 };
Akronb91c3032020-11-23 19:24:40 +0100317 return exp;
Akrond0b1cfe2020-11-20 19:26:52 +0100318 };
hebastae0496762020-03-30 00:11:01 +0200319
Akron984fe8f2020-11-25 15:21:37 +0100320 /*
321 * Page through all results
322 */
hebastaa66693a2020-07-19 16:51:28 +0200323
Akrond0b1cfe2020-11-20 19:26:52 +0100324 // It's not important anymore to get totalResults
325 uri.queryParam("cutoff", "true");
Akrond3df5b02020-11-12 18:23:17 +0100326
Akrond0b1cfe2020-11-20 19:26:52 +0100327 // Set offset for paging as a template
328 uri.queryParam("offset", "{offset}");
Akron96907962020-11-12 18:47:07 +0100329
Akrond0b1cfe2020-11-20 19:26:52 +0100330 try {
Akronacc9f7a2020-11-17 17:21:40 +0100331
Akrond0b1cfe2020-11-20 19:26:52 +0100332 // Iterate over all results
333 for (int i = pageSize; i <= fetchCount; i+=pageSize) {
Akron984fe8f2020-11-25 15:21:37 +0100334
Akrond0b1cfe2020-11-20 19:26:52 +0100335 resource = client.target(uri.build(i));
336 reqBuilder = resource.request(MediaType.APPLICATION_JSON);
337 resp = authBuilder(reqBuilder, xff, auth).get(String.class);
Akron62d90a32020-11-18 20:45:38 +0100338
Akrond0b1cfe2020-11-20 19:26:52 +0100339 // Stop when no more matches are allowed
340 if (!exp.appendMatches(resp))
341 break;
342 }
Akron35881012020-11-24 20:05:06 +0100343
Akron984fe8f2020-11-25 15:21:37 +0100344 // Close all export writers
Akron35881012020-11-24 20:05:06 +0100345 exp.finish();
346
Akron984fe8f2020-11-25 15:21:37 +0100347 }
348
349 catch (Exception e) {
Akrona241a472021-04-27 18:55:58 +0200350 Logger.error(e);
Akron60a16282021-04-28 20:01:11 +0200351 String err = e.getMessage();
352 if (err == null) {
353 err = "Unable to iterate through results";
354 };
Akrond0b1cfe2020-11-20 19:26:52 +0100355 throw new WebApplicationException(
356 responseForm(
357 Status.INTERNAL_SERVER_ERROR,
Akron60a16282021-04-28 20:01:11 +0200358 err
Akrond0b1cfe2020-11-20 19:26:52 +0100359 )
360 );
361 };
Akronb91c3032020-11-23 19:24:40 +0100362
363 return exp;
364 };
365
366
367 /**
Akron984fe8f2020-11-25 15:21:37 +0100368 * WebService that retrieves data from the Kustvakt
369 * Webservice and returns response in different formats.
370 *
371 * Returns an octet stream.
Akronb91c3032020-11-23 19:24:40 +0100372 *
373 * @param fname
374 * file name
375 * @param format
Akron984fe8f2020-11-25 15:21:37 +0100376 * the file format value
Akronb91c3032020-11-23 19:24:40 +0100377 * @param q
378 * the query
Akron984fe8f2020-11-25 15:21:37 +0100379 * @param cq
380 * the corpus query
Akronb91c3032020-11-23 19:24:40 +0100381 * @param ql
382 * the query language
383 * @param cutoff
Akron984fe8f2020-11-25 15:21:37 +0100384 * Only export the first page
385 * @param hitc
386 * Number of matches to fetch
Akronb91c3032020-11-23 19:24:40 +0100387 */
388 @POST
389 @Path("export")
390 @Produces(MediaType.APPLICATION_OCTET_STREAM)
391 public Response staticExport (
392 @FormParam("fname") String fname,
393 @FormParam("format") String format,
394 @FormParam("q") String q,
395 @FormParam("cq") String cq,
396 @FormParam("ql") String ql,
397 @FormParam("cutoff") String cutoffStr,
398 @FormParam("hitc") int hitc
Akronb91c3032020-11-23 19:24:40 +0100399 ) throws IOException {
400
Akronaf145eb2020-11-24 16:55:47 +0100401 Exporter exp = export(fname, format, q, cq, ql, cutoffStr, hitc, null);
Akrond0b1cfe2020-11-20 19:26:52 +0100402
403 return exp.serve().build();
Akron96907962020-11-12 18:47:07 +0100404 };
Akronaf145eb2020-11-24 16:55:47 +0100405
Akron96907962020-11-12 18:47:07 +0100406
Akronb91c3032020-11-23 19:24:40 +0100407 /**
Akron984fe8f2020-11-25 15:21:37 +0100408 * WebService that retrieves data from the Kustvakt
409 * Webservice and returns response in different formats.
410 *
411 * Returns an event stream.
412 *
413 * @param fname
414 * file name
415 * @param format
416 * the file format value
417 * @param q
418 * the query
419 * @param cq
420 * the corpus query
421 * @param ql
422 * the query language
423 * @param cutoff
424 * Only export the first page
425 * @param hitc
426 * Number of matches to fetch
Akronb91c3032020-11-23 19:24:40 +0100427 */
Akronb87af2f2020-09-24 14:38:02 +0200428 @GET
Akron83f39942021-03-22 16:37:25 +0100429 @Path("export")
Akronaf145eb2020-11-24 16:55:47 +0100430 @Produces(SseFeature.SERVER_SENT_EVENTS)
431 @Consumes(SseFeature.SERVER_SENT_EVENTS)
432 public Response progressExport(
Akronb91c3032020-11-23 19:24:40 +0100433 @QueryParam("fname") String fname,
434 @QueryParam("format") String format,
435 @QueryParam("q") String q,
436 @QueryParam("cq") String cq,
437 @QueryParam("ql") String ql,
438 @QueryParam("cutoff") String cutoffStr,
439 @QueryParam("hitc") int hitc
440 ) throws InterruptedException {
Akrond0b1cfe2020-11-20 19:26:52 +0100441
Akron984fe8f2020-11-25 15:21:37 +0100442 // See
443 // https://www.baeldung.com/java-ee-jax-rs-sse
444 // https://www.howopensource.com/2016/01/java-sse-chat-example/
445 // https://csetutorials.com/jersey-sse-tutorial.html
446 // https://eclipse-ee4j.github.io/jersey.github.io/documentation/latest/sse.html
Akronaf145eb2020-11-24 16:55:47 +0100447
448 final EventOutput eventOutput = new EventOutput();
Akrond0b1cfe2020-11-20 19:26:52 +0100449
Akronb91c3032020-11-23 19:24:40 +0100450 // Send initial event
Akronaf145eb2020-11-24 16:55:47 +0100451 if (eventOutput.isClosed())
452 return Response.ok("EventSource closed").build();
Akronb91c3032020-11-23 19:24:40 +0100453
Marc Kupietza2a17972022-03-27 12:03:48 +0200454 Thread t = new Thread(
Akron984fe8f2020-11-25 15:21:37 +0100455 new Runnable() {
456
Akronaf145eb2020-11-24 16:55:47 +0100457 @Override
458 public void run() {
459 final OutboundEvent.Builder eventBuilder = new OutboundEvent.Builder();
460 try {
461 eventBuilder.name("Process");
462 eventBuilder.data("init");
463 eventOutput.write(eventBuilder.build());
464 Exporter exp = export(
Akron984fe8f2020-11-25 15:21:37 +0100465 fname, format, q, cq, ql, cutoffStr, hitc, eventOutput
Akronaf145eb2020-11-24 16:55:47 +0100466 );
Akron35881012020-11-24 20:05:06 +0100467
Akronaf145eb2020-11-24 16:55:47 +0100468 if (eventOutput.isClosed())
469 return;
Akronba3ea112020-11-24 22:40:18 +0100470
Akronaf145eb2020-11-24 16:55:47 +0100471 eventBuilder.name("Relocate");
Akronba3ea112020-11-24 22:40:18 +0100472 eventBuilder.data(exp.getExportID() + ";" + exp.getFileName());
Akronaf145eb2020-11-24 16:55:47 +0100473 eventOutput.write(eventBuilder.build());
Akronba3ea112020-11-24 22:40:18 +0100474
Akron984fe8f2020-11-25 15:21:37 +0100475 }
476
477 catch (Exception e) {
Akronaf145eb2020-11-24 16:55:47 +0100478 try {
479 if (eventOutput.isClosed())
480 return;
Akron984fe8f2020-11-25 15:21:37 +0100481
Akronaf145eb2020-11-24 16:55:47 +0100482 eventBuilder.name("Error");
483 eventBuilder.data(e.getMessage());
484 eventOutput.write(eventBuilder.build());
Akron984fe8f2020-11-25 15:21:37 +0100485 }
486
487 catch (IOException ioe) {
Akrona241a472021-04-27 18:55:58 +0200488 Logger.error(ioe);
Akron984fe8f2020-11-25 15:21:37 +0100489 throw new RuntimeException(
490 "Error when writing event output.", ioe
491 );
Akronaf145eb2020-11-24 16:55:47 +0100492 };
Akron984fe8f2020-11-25 15:21:37 +0100493 }
494
495 finally {
Akronaf145eb2020-11-24 16:55:47 +0100496 try {
497 if (eventOutput.isClosed())
498 return;
Akrond0b1cfe2020-11-20 19:26:52 +0100499
Akronaf145eb2020-11-24 16:55:47 +0100500 eventBuilder.name("Process");
501 eventBuilder.data("done");
502 eventOutput.write(eventBuilder.build());
503 eventOutput.close();
Marc Kupietza2a17972022-03-27 12:03:48 +0200504 }
Akron984fe8f2020-11-25 15:21:37 +0100505
506 catch (IOException ioClose) {
Akrona241a472021-04-27 18:55:58 +0200507 Logger.error(ioClose);
Akron984fe8f2020-11-25 15:21:37 +0100508 throw new RuntimeException(
509 "Error when closing the event output.", ioClose
510 );
Akronaf145eb2020-11-24 16:55:47 +0100511 }
512 };
513 return;
514 }
Marc Kupietza2a17972022-03-27 12:03:48 +0200515 });
516 t.start();
517// t.join();
Akronaf145eb2020-11-24 16:55:47 +0100518
Akron1cbe7982021-04-27 17:18:30 +0200519 String origin = prop.getProperty("server.origin","*");
520 if (servletReq != null) {
521 // This is temporary to allow for session riding
522 origin = servletReq.getHeader("Origin");
523 };
524
Marc Kupietza2a17972022-03-27 12:03:48 +0200525 return Response.ok(eventOutput, String.valueOf(SseFeature.SERVER_SENT_EVENTS_TYPE))
Akron1cbe7982021-04-27 17:18:30 +0200526 .header("Access-Control-Allow-Origin", origin)
527 .header("Access-Control-Allow-Credentials", "true")
528 .header("Vary","Origin")
Akronaf145eb2020-11-24 16:55:47 +0100529 .build();
Akrond0b1cfe2020-11-20 19:26:52 +0100530 };
531
Akron35881012020-11-24 20:05:06 +0100532
533 /**
Akron984fe8f2020-11-25 15:21:37 +0100534 * Relocation target to which the event
Akron35881012020-11-24 20:05:06 +0100535 * stream points to.
Akron984fe8f2020-11-25 15:21:37 +0100536 *
537 * Returns an octet stream.
538 *
539 * @param fname
540 * file name
541 * @param file
542 * the file to fetch
Akron35881012020-11-24 20:05:06 +0100543 */
544 @GET
545 @Path("export/{file}")
546 @Produces(MediaType.APPLICATION_OCTET_STREAM)
547 public Response fileExport(
548 @PathParam("file") String fileStr,
549 @QueryParam("fname") String fname
550 ) {
551
552 String format = getExtension(fileStr);
Akron984fe8f2020-11-25 15:21:37 +0100553
Akron35881012020-11-24 20:05:06 +0100554 // Get exporter object
555 Exporter exp = getExporter(format);
Akron984fe8f2020-11-25 15:21:37 +0100556
557 if (fname != null)
Akron35881012020-11-24 20:05:06 +0100558 exp.setFileName(fname);
Akron984fe8f2020-11-25 15:21:37 +0100559
Akron35881012020-11-24 20:05:06 +0100560 exp.setFile(fileStr);
561
562 // Return without init
563 return exp.serve().build();
564 };
565
Akrond0b1cfe2020-11-20 19:26:52 +0100566
Akron984fe8f2020-11-25 15:21:37 +0100567 /**
568 * The export form.
569 *
570 * Returns a HTML file.
571 */
Akrond0b1cfe2020-11-20 19:26:52 +0100572 @GET
Akronb87af2f2020-09-24 14:38:02 +0200573 @Path("export")
574 @Produces(MediaType.TEXT_HTML)
575 public Response exportHTML () {
Akron7f242bb2020-09-28 20:19:11 +0200576 return responseForm();
577 };
Akronbb076c72020-09-28 14:39:15 +0200578
Akron7f242bb2020-09-28 20:19:11 +0200579
Akron984fe8f2020-11-25 15:21:37 +0100580 /**
581 * The export script.
582 *
583 * Returns a static JavaScript file.
584 */
Akron96907962020-11-12 18:47:07 +0100585 @GET
586 @Path("export.js")
587 @Produces("application/javascript")
588 public Response exportJavascript () {
589 return Response
590 .ok(exportJsStr, "application/javascript")
591 .build();
592 };
Akron35881012020-11-24 20:05:06 +0100593
Akron984fe8f2020-11-25 15:21:37 +0100594
595 /*
596 * Get exporter object by format
597 */
Akron35881012020-11-24 20:05:06 +0100598 private Exporter getExporter (String format) {
599 // Choose the correct exporter
600 if (format.equals("json"))
601 return new JsonExporter();
602 else if (format.equals("csv"))
603 return new CsvExporter();
604
605 return new RtfExporter();
606 };
607
Akron7f242bb2020-09-28 20:19:11 +0200608
Akron984fe8f2020-11-25 15:21:37 +0100609 /*
610 * Decorate request with auth headers
611 */
612 private Invocation.Builder authBuilder (
613 Invocation.Builder reqBuilder,
614 String xff,
615 String auth
616 ) {
617
618 if (xff != "")
Akron696c0632020-11-12 15:30:51 +0100619 reqBuilder = reqBuilder.header("X-Forwarded-For", xff);
Akron984fe8f2020-11-25 15:21:37 +0100620
621 if (auth != "")
Akron696c0632020-11-12 15:30:51 +0100622 reqBuilder = reqBuilder.header("Authorization", auth);
Akron696c0632020-11-12 15:30:51 +0100623
624 return reqBuilder;
625 };
626
Akron96907962020-11-12 18:47:07 +0100627
Akron984fe8f2020-11-25 15:21:37 +0100628 /*
629 * Get authorization token from cookie
630 */
Akron54b30ed2020-11-13 10:35:35 +0100631 private String authFromCookie (HttpServletRequest r) {
632
633 // This is a temporary solution using session riding - only
634 // valid for the time being
635 Cookie[] cookies = r.getCookies();
Akron69cd35d2020-11-20 13:17:31 +0100636
637 if (cookies == null)
638 return "";
639
Akronb4d3b2a2023-11-30 13:55:21 +0100640 String cookieName = prop.getProperty("cookie.name", "");
Akron54b30ed2020-11-13 10:35:35 +0100641
642 // Iterate through all cookies for a Kalamar session
643 for (int i = 0; i < cookies.length; i++) {
Akron1cbe7982021-04-27 17:18:30 +0200644
Akronb4d3b2a2023-11-30 13:55:21 +0100645 // Check the valid name and ignore irrelevant cookies
646 if (cookieName == "") {
647 if (!cookies[i].getName().equals("kalamar")) {
648 continue;
649 }
650 } else if (!cookies[i].getName().equals(cookieName)) {
Akron54b30ed2020-11-13 10:35:35 +0100651 continue;
Akronb4d3b2a2023-11-30 13:55:21 +0100652 };
Akron54b30ed2020-11-13 10:35:35 +0100653
654 // Get the value
655 String b64 = cookies[i].getValue();
656 String[] b64Parts = b64.split("--", 2);
657 if (b64Parts.length == 2) {
658 // Read the payload
659 String payload = new String(b64Dec.decode(b64Parts[0]));
660 if (payload != "") {
661 Matcher m = authrep.matcher(payload);
662 if (m.find()) {
663 return m.group(1);
664 };
665 };
666 };
667 };
Akron54b30ed2020-11-13 10:35:35 +0100668 return "";
669 };
670
671
Akron96907962020-11-12 18:47:07 +0100672 /*
673 * Response with form template.
674 */
675 private Response responseForm () {
676 return responseForm(null, null);
Akron54b30ed2020-11-13 10:35:35 +0100677 };
Akron96907962020-11-12 18:47:07 +0100678
679
680 /*
681 * Response with form template.
682 *
683 * Accepts an error code and message.
684 */
Akron7f242bb2020-09-28 20:19:11 +0200685 private Response responseForm (Status code, String msg) {
Akronbb076c72020-09-28 14:39:15 +0200686 StringWriter out = new StringWriter();
687 HashMap<String, Object> templateData = new HashMap<String, Object>();
688
Akron69cd35d2020-11-20 13:17:31 +0100689 // Build uri for assets
690 String scheme = prop.getProperty("asset.scheme", "https");
691 String port = prop.getProperty("asset.port", "");
692 String host = prop.getProperty("asset.host", "korap.ids-mannheim.de");
693 String path = prop.getProperty("asset.path", "");
Akrona77f2f02020-11-26 16:37:48 +0100694 String defaultHitc = prop.getProperty("conf.default_hitc", "100");
695 int maxHitc = Integer.parseInt(prop.getProperty("conf.max_exp_limit", "10000"));
Akronbb076c72020-09-28 14:39:15 +0200696
697 UriBuilder uri = UriBuilder.fromPath("")
698 .host(host)
Akron68f221b2020-09-29 23:15:43 +0200699 .scheme(scheme);
700
Akron69cd35d2020-11-20 13:17:31 +0100701 if (path != "")
Akron68f221b2020-09-29 23:15:43 +0200702 uri = uri.path(path);
Akronbb076c72020-09-28 14:39:15 +0200703
Akron69cd35d2020-11-20 13:17:31 +0100704 if (port != "")
Akronbb076c72020-09-28 14:39:15 +0200705 uri = uri.port(Integer.parseInt(port));
Akronbb076c72020-09-28 14:39:15 +0200706
707 templateData.put("assetPath", uri.build());
Akrona77f2f02020-11-26 16:37:48 +0100708 templateData.put("defaultHitc", defaultHitc);
709 templateData.put("maxHitc", maxHitc);
Akronbb076c72020-09-28 14:39:15 +0200710
Akron69cd35d2020-11-20 13:17:31 +0100711 // There is an error code to pass
Akron7f242bb2020-09-28 20:19:11 +0200712 if (code != null) {
713 templateData.put("code", code.getStatusCode());
714 templateData.put("msg", msg);
715 };
716
Akrona578eb02020-11-26 12:34:17 +0100717 try {
718 templateData.put("dict", this.getDictionary());
719
720 } catch (Exception e) {
Akrona241a472021-04-27 18:55:58 +0200721 Logger.error(e);
Akrona578eb02020-11-26 12:34:17 +0100722 return Response
723 .ok(new String("Dictionary not found"))
724 .status(Status.INTERNAL_SERVER_ERROR)
725 .build();
726 };
727
Akron7f242bb2020-09-28 20:19:11 +0200728 // Generate template
Akronbb076c72020-09-28 14:39:15 +0200729 try {
730 Template template = cfg.getTemplate("export.ftl");
Marc Kupietz88525882022-03-26 22:28:36 +0100731 template.setLocale(getPreferredSupportedLocale());
Akronbb076c72020-09-28 14:39:15 +0200732 template.process(templateData, out);
733 }
Akron69cd35d2020-11-20 13:17:31 +0100734
735 // Unable to find template
Akronbb076c72020-09-28 14:39:15 +0200736 catch (Exception e) {
Akrona241a472021-04-27 18:55:58 +0200737 Logger.error(e);
Akronbb076c72020-09-28 14:39:15 +0200738 return Response
739 .ok(new String("Template not found"))
740 .status(Status.INTERNAL_SERVER_ERROR)
741 .build();
Akron54b30ed2020-11-13 10:35:35 +0100742 };
hebastae2bd6e42020-03-03 19:07:20 +0100743
Akron7f242bb2020-09-28 20:19:11 +0200744 ResponseBuilder resp = Response.ok(out.toString(), "text/html");
745
Akron69cd35d2020-11-20 13:17:31 +0100746 if (code != null)
Akron7f242bb2020-09-28 20:19:11 +0200747 resp = resp.status(code);
Akron7f242bb2020-09-28 20:19:11 +0200748
749 return resp.build();
Akroneedac912020-11-16 12:54:42 +0100750 };
Akronbe6777b2020-10-02 11:35:03 +0200751
752
753 /*
Akron69cd35d2020-11-20 13:17:31 +0100754 * Get the origin user IP.
755 *
Akronbe6777b2020-10-02 11:35:03 +0200756 * This function is a simplification of
757 * Mojolicious::Plugin::ClientIP
758 */
759 protected static String getClientIP (String xff) {
760 if (xff == null) {
761 return "";
762 };
763
764 String[] ips = xff.split("\\s*,\\s*");
765
Akron69cd35d2020-11-20 13:17:31 +0100766 for (int i = ips.length - 1; i >= 0; i--) {
767 if (ips[i].matches(ipre))
Akronbe6777b2020-10-02 11:35:03 +0200768 return ips[i];
Akronbe6777b2020-10-02 11:35:03 +0200769 };
770
771 return "";
Akron232248d2020-11-11 18:55:10 +0100772 };
Akrona578eb02020-11-26 12:34:17 +0100773
774
775 /*
776 * Load dictionary for a chosen locale as a resource bundle
777 */
Marc Kupietz88525882022-03-26 22:28:36 +0100778 private ResourceBundle getDictionary() throws IOException {
779 return ResourceBundle.getBundle(
780 "locales/export", getPreferredSupportedLocale()
781 );
782 }
Akrona578eb02020-11-26 12:34:17 +0100783
Marc Kupietz88525882022-03-26 22:28:36 +0100784 private Locale getPreferredSupportedLocale() throws IOException {
785 Locale fallback = new Locale("en");
Akrona578eb02020-11-26 12:34:17 +0100786
787 if (req != null) {
Akrona578eb02020-11-26 12:34:17 +0100788 for (Locale l : req.getAcceptableLanguages()) {
789 switch (l.getLanguage()) {
Marc Kupietz88525882022-03-26 22:28:36 +0100790 case "de":
791 return (l);
792 case "en":
793 return (l);
794 }
795 }
796 }
Akrona578eb02020-11-26 12:34:17 +0100797
Marc Kupietz88525882022-03-26 22:28:36 +0100798 return fallback;
799 }
800
801}