blob: 13db3ac473cdb19cc9a7fb5752057aa837a7a90f [file] [log] [blame]
margaretha5b708792023-05-12 16:55:29 +02001package de.ids_mannheim.korap.core.service;
margaretha20f31232018-07-09 17:49:39 +02002
margaretha0ddaed62021-01-21 17:10:33 +01003import java.io.BufferedReader;
4import java.io.InputStreamReader;
5import java.io.OutputStream;
6import java.net.HttpURLConnection;
7import java.net.URL;
margaretha20f31232018-07-09 17:49:39 +02008import java.util.ArrayList;
margarethaa85965d2018-12-19 15:58:21 +01009import java.util.List;
margaretha20f31232018-07-09 17:49:39 +020010import java.util.Set;
11import java.util.regex.Pattern;
12
margaretha137d4ee2019-12-13 14:49:15 +010013import org.apache.http.HttpStatus;
margaretha20f31232018-07-09 17:49:39 +020014import org.apache.logging.log4j.LogManager;
15import org.apache.logging.log4j.Logger;
16import org.springframework.beans.factory.annotation.Autowired;
17import org.springframework.stereotype.Service;
18
margaretha2544cdf2019-07-08 11:39:43 +020019import com.fasterxml.jackson.databind.JsonNode;
margaretha137d4ee2019-12-13 14:49:15 +010020import com.fasterxml.jackson.databind.node.ArrayNode;
21import com.fasterxml.jackson.databind.node.ObjectNode;
margaretha20f31232018-07-09 17:49:39 +020022
margaretha5a5b9952023-06-12 12:46:36 +020023import de.ids_mannheim.korap.config.KustvaktCacheable;
margaretha20f31232018-07-09 17:49:39 +020024import de.ids_mannheim.korap.config.KustvaktConfiguration;
25import de.ids_mannheim.korap.exceptions.KustvaktException;
26import de.ids_mannheim.korap.exceptions.StatusCodes;
margaretha20f31232018-07-09 17:49:39 +020027import de.ids_mannheim.korap.query.serialize.MetaQueryBuilder;
28import de.ids_mannheim.korap.query.serialize.QuerySerializer;
margaretha137d4ee2019-12-13 14:49:15 +010029import de.ids_mannheim.korap.response.Notifications;
margaretha398f4722019-01-09 19:07:20 +010030import de.ids_mannheim.korap.rewrite.RewriteHandler;
margaretha20f31232018-07-09 17:49:39 +020031import de.ids_mannheim.korap.user.User;
32import de.ids_mannheim.korap.user.User.CorpusAccess;
margaretha2544cdf2019-07-08 11:39:43 +020033import de.ids_mannheim.korap.utils.JsonUtils;
margaretha20f31232018-07-09 17:49:39 +020034import de.ids_mannheim.korap.web.ClientsHandler;
margarethadb5da372023-09-01 11:02:52 +020035import jakarta.annotation.PostConstruct;
margaretha96c309d2023-08-16 12:24:12 +020036import jakarta.ws.rs.core.HttpHeaders;
37import jakarta.ws.rs.core.MultivaluedHashMap;
38import jakarta.ws.rs.core.MultivaluedMap;
39import jakarta.ws.rs.core.UriBuilder;
margaretha20f31232018-07-09 17:49:39 +020040
41@Service
margaretha35e1ca22023-11-16 22:00:01 +010042public class SearchService extends BasicService {
43
44 public class TotalResultCache extends KustvaktCacheable {
45
margaretha5a5b9952023-06-12 12:46:36 +020046 public TotalResultCache () {
margaretha35e1ca22023-11-16 22:00:01 +010047 super("total_results", "key:hashedKoralQuery");
margaretha5a5b9952023-06-12 12:46:36 +020048 }
49 }
margaretha79d738c2018-12-17 16:45:47 +010050 private static final boolean DEBUG = false;
51
margaretha20f31232018-07-09 17:49:39 +020052 private static Logger jlog = LogManager.getLogger(SearchService.class);
53
54 @Autowired
margaretha4dee07a2022-05-27 11:45:28 +020055 private SearchNetworkEndpoint searchNetwork;
margaretha20f31232018-07-09 17:49:39 +020056
57 private ClientsHandler graphDBhandler;
margaretha35e1ca22023-11-16 22:00:01 +010058
margaretha5a5b9952023-06-12 12:46:36 +020059 private TotalResultCache totalResultCache;
margaretha9b754d72025-04-01 12:46:18 +020060
61 @Autowired
62 protected RewriteHandler rewriteHandler;
margaretha20f31232018-07-09 17:49:39 +020063
margaretha66061bb2025-06-23 11:11:18 +020064 @Autowired
65 protected KustvaktConfiguration config;
66
margaretha20f31232018-07-09 17:49:39 +020067 @PostConstruct
68 private void doPostConstruct () {
margaretha20f31232018-07-09 17:49:39 +020069 UriBuilder builder = UriBuilder.fromUri("http://10.0.10.13").port(9997);
70 this.graphDBhandler = new ClientsHandler(builder.build());
margaretha35e1ca22023-11-16 22:00:01 +010071
margaretha5a5b9952023-06-12 12:46:36 +020072 totalResultCache = new TotalResultCache();
margaretha20f31232018-07-09 17:49:39 +020073 }
74
margarethae1228ab2021-02-22 11:51:38 +010075 public String getKrillVersion () {
76 return searchKrill.getIndex().getVersion();
77
78 }
margaretha35e1ca22023-11-16 22:00:01 +010079
margaretha7926adc2018-08-30 13:45:33 +020080 @SuppressWarnings("unchecked")
margaretha20f31232018-07-09 17:49:39 +020081 public String serializeQuery (String q, String ql, String v, String cq,
82 Integer pageIndex, Integer startPage, Integer pageLength,
margaretha279ad6e2025-08-14 11:25:48 +020083 String context, Boolean cutoff, boolean accessRewriteDisabled,
84 double apiVersion)
margaretha2544cdf2019-07-08 11:39:43 +020085 throws KustvaktException {
margaretha279ad6e2025-08-14 11:25:48 +020086 QuerySerializer ss = new QuerySerializer(apiVersion).setQuery(q, ql, v);
margaretha35e1ca22023-11-16 22:00:01 +010087 if (cq != null)
88 ss.setCollection(cq);
margaretha20f31232018-07-09 17:49:39 +020089
90 MetaQueryBuilder meta = new MetaQueryBuilder();
margaretha35e1ca22023-11-16 22:00:01 +010091 if (pageIndex != null)
92 meta.addEntry("startIndex", pageIndex);
margaretha20f31232018-07-09 17:49:39 +020093 if (pageIndex == null && startPage != null)
94 meta.addEntry("startPage", startPage);
margaretha35e1ca22023-11-16 22:00:01 +010095 if (pageLength != null)
96 meta.addEntry("count", pageLength);
97 if (context != null)
98 meta.setSpanContext(context);
margaretha20f31232018-07-09 17:49:39 +020099 meta.addEntry("cutOff", cutoff);
100
101 ss.setMeta(meta.raw());
margaretha34954472018-10-24 20:05:17 +0200102 // return ss.toJSON();
103
104 String query = ss.toJSON();
105 query = rewriteHandler.processQuery(ss.toJSON(), null);
106 return query;
margaretha20f31232018-07-09 17:49:39 +0200107 }
108
margaretha2544cdf2019-07-08 11:39:43 +0200109 public String search (String jsonld, String username, HttpHeaders headers)
110 throws KustvaktException {
111
112 User user = createUser(username, headers);
113
margaretha35e1ca22023-11-16 22:00:01 +0100114 JsonNode node = JsonUtils.readTree(jsonld);
margaretha2544cdf2019-07-08 11:39:43 +0200115 node = node.at("/meta/snippets");
margaretha35e1ca22023-11-16 22:00:01 +0100116 if (node != null && node.asBoolean()) {
margaretha2544cdf2019-07-08 11:39:43 +0200117 user.setCorpusAccess(CorpusAccess.ALL);
118 }
margaretha35e1ca22023-11-16 22:00:01 +0100119
margaretha2544cdf2019-07-08 11:39:43 +0200120 String query = this.rewriteHandler.processQuery(jsonld, user);
margaretha20f31232018-07-09 17:49:39 +0200121 // MH: todo: should be possible to add the meta part to
122 // the query serialization
123 // User user = controller.getUser(ctx.getUsername());
124 // jsonld = this.processor.processQuery(jsonld, user);
margaretha2544cdf2019-07-08 11:39:43 +0200125 return searchKrill.search(query);
margaretha20f31232018-07-09 17:49:39 +0200126 }
127
margaretha7926adc2018-08-30 13:45:33 +0200128 @SuppressWarnings("unchecked")
margaretha279ad6e2025-08-14 11:25:48 +0200129 public String search (double requestedVersion, String engine,
130 String username, HttpHeaders headers,
margaretha66061bb2025-06-23 11:11:18 +0200131 String q, String ql, String v, List<String> cqList, String fields,
132 String pipes, String responsePipes, Integer pageIndex,
133 Integer pageInteger, String ctx, Integer pageLength, Boolean cutoff,
134 boolean accessRewriteDisabled, boolean showTokens,
135 boolean showSnippet) throws KustvaktException {
margaretha20f31232018-07-09 17:49:39 +0200136
margarethac1db9132019-08-28 11:32:04 +0200137 if (pageInteger != null && pageInteger < 1) {
138 throw new KustvaktException(StatusCodes.INVALID_ARGUMENT,
139 "page must start from 1", "page");
140 }
margaretha35e1ca22023-11-16 22:00:01 +0100141
margaretha20f31232018-07-09 17:49:39 +0200142 User user = createUser(username, headers);
margaretha79d738c2018-12-17 16:45:47 +0100143 CorpusAccess corpusAccess = user.getCorpusAccess();
margaretha35e1ca22023-11-16 22:00:01 +0100144
margaretha2544cdf2019-07-08 11:39:43 +0200145 // EM: TODO: check if requested fields are public metadata. Currently
146 // it is not needed because all metadata are public.
margaretha35e1ca22023-11-16 22:00:01 +0100147 if (accessRewriteDisabled) {
margaretha2544cdf2019-07-08 11:39:43 +0200148 corpusAccess = CorpusAccess.ALL;
149 user.setCorpusAccess(CorpusAccess.ALL);
150 }
margaretha35e1ca22023-11-16 22:00:01 +0100151
margaretha279ad6e2025-08-14 11:25:48 +0200152 QuerySerializer serializer = new QuerySerializer(requestedVersion);
margaretha20f31232018-07-09 17:49:39 +0200153 serializer.setQuery(q, ql, v);
margarethaed2ee242019-12-12 17:34:18 +0100154 String cq = combineMultipleCorpusQuery(cqList);
margaretha35e1ca22023-11-16 22:00:01 +0100155 if (cq != null)
156 serializer.setCollection(cq);
margaretha20f31232018-07-09 17:49:39 +0200157
margaretha85967642019-11-13 13:35:33 +0100158 List<String> fieldList = convertFieldsToList(fields);
159 handleNonPublicFields(fieldList, accessRewriteDisabled, serializer);
margaretha35e1ca22023-11-16 22:00:01 +0100160
margaretha20f31232018-07-09 17:49:39 +0200161 MetaQueryBuilder meta = createMetaQuery(pageIndex, pageInteger, ctx,
margaretha35e1ca22023-11-16 22:00:01 +0100162 pageLength, cutoff, corpusAccess, fieldList,
163 accessRewriteDisabled, showTokens, showSnippet);
margaretha20f31232018-07-09 17:49:39 +0200164 serializer.setMeta(meta.raw());
margaretha35e1ca22023-11-16 22:00:01 +0100165
margaretha20f31232018-07-09 17:49:39 +0200166 // There is an error in query processing
167 // - either query, corpus or meta
168 if (serializer.hasErrors()) {
169 throw new KustvaktException(serializer.toJSON());
170 }
171
margaretha1d1c73e2019-12-03 18:07:28 +0100172 String query = serializer.toJSON();
margaretha35e1ca22023-11-16 22:00:01 +0100173
margaretha9c6493e2022-01-25 11:54:29 +0100174 if (accessRewriteDisabled && showTokens) {
margarethad0f36512025-07-04 11:06:10 +0200175 query = addWarning(query, StatusCodes.NOT_ALLOWED,
margaretha9c6493e2022-01-25 11:54:29 +0100176 "Tokens cannot be shown without access.");
margaretha9c6493e2022-01-25 11:54:29 +0100177 }
margaretha35e1ca22023-11-16 22:00:01 +0100178
margaretha66061bb2025-06-23 11:11:18 +0200179 // Query pipe rewrite
180 query = runPipes(query, pipes);
margaretha35e1ca22023-11-16 22:00:01 +0100181
margaretha1d1c73e2019-12-03 18:07:28 +0100182 query = this.rewriteHandler.processQuery(query, user);
margaretha35e1ca22023-11-16 22:00:01 +0100183 if (DEBUG) {
margaretha79d738c2018-12-17 16:45:47 +0100184 jlog.debug("the serialized query " + query);
185 }
margaretha20f31232018-07-09 17:49:39 +0200186
margaretha5a5b9952023-06-12 12:46:36 +0200187 int hashedKoralQuery = createTotalResultCacheKey(query);
188 boolean hasCutOff = hasCutOff(query);
margarethae6ebfbc2024-10-07 08:39:42 +0200189 if (config.isTotalResultCacheEnabled() && !hasCutOff) {
margaretha35e1ca22023-11-16 22:00:01 +0100190 query = precheckTotalResultCache(hashedKoralQuery, query);
margaretha5a5b9952023-06-12 12:46:36 +0200191 }
192
margaretha35e1ca22023-11-16 22:00:01 +0100193 KustvaktConfiguration.BACKENDS searchEngine = this.config
194 .chooseBackend(engine);
margaretha20f31232018-07-09 17:49:39 +0200195 String result;
margaretha4dee07a2022-05-27 11:45:28 +0200196 if (searchEngine.equals(KustvaktConfiguration.BACKENDS.NEO4J)) {
margaretha20f31232018-07-09 17:49:39 +0200197 result = searchNeo4J(query, pageLength, meta, false);
198 }
margaretha4dee07a2022-05-27 11:45:28 +0200199 else if (searchEngine.equals(KustvaktConfiguration.BACKENDS.NETWORK)) {
200 result = searchNetwork.search(query);
201 }
margaretha20f31232018-07-09 17:49:39 +0200202 else {
203 result = searchKrill.search(query);
204 }
margaretha34954472018-10-24 20:05:17 +0200205 // jlog.debug("Query result: " + result);
margaretha66061bb2025-06-23 11:11:18 +0200206
margarethae6ebfbc2024-10-07 08:39:42 +0200207 if (config.isTotalResultCacheEnabled()) {
208 result = afterCheckTotalResultCache(hashedKoralQuery, result);
209 }
210
margaretha5a5b9952023-06-12 12:46:36 +0200211 if (!hasCutOff) {
212 result = removeCutOff(result);
213 }
margaretha66061bb2025-06-23 11:11:18 +0200214
215 // Response pipe rewrite
216 result = runPipes(result, responsePipes);
margaretha20f31232018-07-09 17:49:39 +0200217 return result;
218
219 }
margaretha66061bb2025-06-23 11:11:18 +0200220
margaretha5a5b9952023-06-12 12:46:36 +0200221 private String removeCutOff (String result) throws KustvaktException {
222 ObjectNode resultNode = (ObjectNode) JsonUtils.readTree(result);
223 ObjectNode meta = (ObjectNode) resultNode.at("/meta");
224 meta.remove("cutOff");
225 return resultNode.toString();
226 }
227
margaretha35e1ca22023-11-16 22:00:01 +0100228 public int createTotalResultCacheKey (String query)
229 throws KustvaktException {
margaretha5a5b9952023-06-12 12:46:36 +0200230 ObjectNode queryNode = (ObjectNode) JsonUtils.readTree(query);
231 queryNode.remove("meta");
margarethae6ebfbc2024-10-07 08:39:42 +0200232 return queryNode.toString().hashCode();
margaretha5a5b9952023-06-12 12:46:36 +0200233 }
234
235 private String afterCheckTotalResultCache (int hashedKoralQuery,
236 String result) throws KustvaktException {
margaretha35e1ca22023-11-16 22:00:01 +0100237
238 String totalResults = (String) totalResultCache
239 .getCacheValue(hashedKoralQuery);
margaretha5a5b9952023-06-12 12:46:36 +0200240 if (totalResults != null) {
241 ObjectNode queryNode = (ObjectNode) JsonUtils.readTree(result);
242 ObjectNode meta = (ObjectNode) queryNode.at("/meta");
243 if (meta.isMissingNode()) {
Marc Kupietz824f2042023-09-18 14:50:21 +0200244 queryNode.put("totalResults", Integer.valueOf(totalResults));
margaretha5a5b9952023-06-12 12:46:36 +0200245 }
246 else {
Marc Kupietz824f2042023-09-18 14:50:21 +0200247 meta.put("totalResults", Integer.valueOf(totalResults));
margaretha5a5b9952023-06-12 12:46:36 +0200248 }
249 result = queryNode.toString();
250 }
251 else {
252 JsonNode node = JsonUtils.readTree(result);
253 totalResults = node.at("/meta/totalResults").asText();
margarethabf6b0582024-10-24 09:43:49 +0200254 boolean timeExceeded = node.at("/meta/timeExceeded").asBoolean();
255
256 if (!timeExceeded && totalResults != null && !totalResults.isEmpty()
margaretha35e1ca22023-11-16 22:00:01 +0100257 && Integer.parseInt(totalResults) > 0)
margaretha5a5b9952023-06-12 12:46:36 +0200258 totalResultCache.storeInCache(hashedKoralQuery, totalResults);
259 }
260 return result;
261 }
262
263 public String precheckTotalResultCache (int hashedKoralQuery, String query)
264 throws KustvaktException {
margaretha35e1ca22023-11-16 22:00:01 +0100265 String totalResults = (String) totalResultCache
266 .getCacheValue(hashedKoralQuery);
margaretha5a5b9952023-06-12 12:46:36 +0200267 if (totalResults != null) {
268 // add cutoff
269 ObjectNode queryNode = (ObjectNode) JsonUtils.readTree(query);
270 ObjectNode meta = (ObjectNode) queryNode.at("/meta");
271 meta.put("cutOff", "true");
272 query = queryNode.toString();
273 }
274 return query;
275 }
margaretha35e1ca22023-11-16 22:00:01 +0100276
margaretha5a5b9952023-06-12 12:46:36 +0200277 private boolean hasCutOff (String query) throws KustvaktException {
278 JsonNode queryNode = JsonUtils.readTree(query);
279 JsonNode cutOff = queryNode.at("/meta/cutOff");
280 if (cutOff.isMissingNode()) {
281 return false;
282 }
283 else {
284 return true;
285 }
286 }
margaretha20f31232018-07-09 17:49:39 +0200287
margaretha137d4ee2019-12-13 14:49:15 +0100288 /**
289 * Pipes are service URLs for modifying KoralQuery. A POST request
290 * with Content-Type application/json will be sent for each pipe.
margaretha35e1ca22023-11-16 22:00:01 +0100291 * Kustvakt expects a KoralQuery in JSON format as the pipe
292 * response.
margaretha137d4ee2019-12-13 14:49:15 +0100293 *
margaretha35e1ca22023-11-16 22:00:01 +0100294 * @param query
295 * the original koral query
margaretha66061bb2025-06-23 11:11:18 +0200296 * @param pipes
margaretha35e1ca22023-11-16 22:00:01 +0100297 * the pipe service URLs
298 * @param serializer
299 * the query serializer
margaretha137d4ee2019-12-13 14:49:15 +0100300 * @return a modified koral query
margaretha35e1ca22023-11-16 22:00:01 +0100301 * @throws KustvaktException
margaretha137d4ee2019-12-13 14:49:15 +0100302 */
margaretha66061bb2025-06-23 11:11:18 +0200303 private String runPipes (String query, String pipes)
margaretha35e1ca22023-11-16 22:00:01 +0100304 throws KustvaktException {
margarethad0f36512025-07-04 11:06:10 +0200305 if (config.getPipeHost().isEmpty()) {
306 addWarning(query, StatusCodes.NOT_SUPPORTED,
307 "Pipe is not supported.");
308 return query;
309 }
310
margaretha66061bb2025-06-23 11:11:18 +0200311 if (pipes != null && !pipes.isEmpty()) {
312 String[] pipeArray = pipes.split(",");
313
margaretha35e1ca22023-11-16 22:00:01 +0100314 for (int i = 0; i < pipeArray.length; i++) {
margaretha137d4ee2019-12-13 14:49:15 +0100315 String pipeURL = pipeArray[i];
margaretha66061bb2025-06-23 11:11:18 +0200316 if (pipeURL.startsWith(config.getPipeHost())) {
317 try {
318 URL url = new URL(pipeURL);
319 HttpURLConnection connection = (HttpURLConnection) url
320 .openConnection();
321 connection.setRequestMethod("POST");
322 connection.setRequestProperty("Content-Type",
323 "application/json; charset=UTF-8");
324 connection.setRequestProperty("Accept", "application/json");
325 connection.setDoOutput(true);
326 OutputStream os = connection.getOutputStream();
327 byte[] input = query.getBytes("utf-8");
328 os.write(input, 0, input.length);
329
330 String entity = null;
331 if (connection.getResponseCode() == HttpStatus.SC_OK) {
332 BufferedReader br = new BufferedReader(
333 new InputStreamReader(
334 connection.getInputStream(), "utf-8"));
335 StringBuilder response = new StringBuilder();
336 String responseLine = null;
337 while ((responseLine = br.readLine()) != null) {
338 response.append(responseLine.trim());
339 }
340 entity = response.toString();
margaretha137d4ee2019-12-13 14:49:15 +0100341 }
margaretha66061bb2025-06-23 11:11:18 +0200342
343 if (entity != null && !entity.isEmpty()) {
344 query = entity;
345 }
346 else {
347 query = handlePipeError(query, pipeURL,
348 connection.getResponseCode() + " "
349 + connection.getResponseMessage());
350 }
margaretha0ddaed62021-01-21 17:10:33 +0100351 }
margaretha66061bb2025-06-23 11:11:18 +0200352 catch (Exception e) {
353 query = handlePipeError(query, pipeURL, e.getMessage());
margaretha137d4ee2019-12-13 14:49:15 +0100354 }
355 }
margaretha66061bb2025-06-23 11:11:18 +0200356 else {
357 query = handlePipeError(query, pipeURL,
358 "Unrecognized pipe URL");
margaretha137d4ee2019-12-13 14:49:15 +0100359 }
margaretha1d1c73e2019-12-03 18:07:28 +0100360 }
margaretha66061bb2025-06-23 11:11:18 +0200361 }
margaretha1d1c73e2019-12-03 18:07:28 +0100362 return query;
363 }
margaretha35e1ca22023-11-16 22:00:01 +0100364
365 private String handlePipeError (String query, String url, String message)
366 throws KustvaktException {
margaretha35e1ca22023-11-16 22:00:01 +0100367
margarethad0f36512025-07-04 11:06:10 +0200368 query = addWarning(query, StatusCodes.PIPE_FAILED, "Pipe failed", url, message);
margaretha35e1ca22023-11-16 22:00:01 +0100369 return query;
margaretha9c6493e2022-01-25 11:54:29 +0100370 }
margaretha9c6493e2022-01-25 11:54:29 +0100371
margarethad0f36512025-07-04 11:06:10 +0200372 private String addWarning (String query, int statusCode,
373 String warningMessage, String ... terms) throws KustvaktException {
374 Notifications n = new Notifications();
375 n.addWarning(statusCode, warningMessage, terms);
376 JsonNode warning = n.toJsonNode();
377
margaretha66061bb2025-06-23 11:11:18 +0200378 ObjectNode node = null;
379 try {
380 node = (ObjectNode) JsonUtils.readTree(query);
381 }
382 catch (Exception e) {
383 throw new KustvaktException(StatusCodes.DESERIALIZATION_FAILED,
384 "Invalid JSON format");
385 }
margaretha35e1ca22023-11-16 22:00:01 +0100386 if (node.has("warnings")) {
margaretha137d4ee2019-12-13 14:49:15 +0100387 warning = warning.at("/warnings/0");
388 ArrayNode arrayNode = (ArrayNode) node.get("warnings");
389 arrayNode.add(warning);
390 node.set("warnings", arrayNode);
391 }
margaretha35e1ca22023-11-16 22:00:01 +0100392 else {
margaretha137d4ee2019-12-13 14:49:15 +0100393 node.setAll((ObjectNode) warning);
394 }
margaretha9c6493e2022-01-25 11:54:29 +0100395 return node.toString();
margaretha137d4ee2019-12-13 14:49:15 +0100396 }
margaretha1d1c73e2019-12-03 18:07:28 +0100397
margaretha85967642019-11-13 13:35:33 +0100398 private void handleNonPublicFields (List<String> fieldList,
399 boolean accessRewriteDisabled, QuerySerializer serializer) {
margaretha35e1ca22023-11-16 22:00:01 +0100400 List<String> nonPublicFields = new ArrayList<>();
margaretha85967642019-11-13 13:35:33 +0100401 nonPublicFields.add("snippet");
margaretha35e1ca22023-11-16 22:00:01 +0100402
margaretha85967642019-11-13 13:35:33 +0100403 List<String> ignoredFields = new ArrayList<>();
404 if (accessRewriteDisabled && !fieldList.isEmpty()) {
405 for (String field : fieldList) {
406 if (nonPublicFields.contains(field)) {
407 ignoredFields.add(field);
408 }
409 }
410 if (!ignoredFields.isEmpty()) {
411 serializer.addWarning(StatusCodes.NON_PUBLIC_FIELD_IGNORED,
412 "The requested non public fields are ignored",
413 ignoredFields);
414 }
415 }
416 }
margaretha35e1ca22023-11-16 22:00:01 +0100417
margaretha20f31232018-07-09 17:49:39 +0200418 private MetaQueryBuilder createMetaQuery (Integer pageIndex,
margaretha35e1ca22023-11-16 22:00:01 +0100419 Integer pageInteger, String ctx, Integer pageLength, Boolean cutoff,
420 CorpusAccess corpusAccess, List<String> fieldList,
421 boolean accessRewriteDisabled, boolean showTokens,
422 boolean showSnippet) {
margaretha20f31232018-07-09 17:49:39 +0200423 MetaQueryBuilder meta = new MetaQueryBuilder();
424 meta.addEntry("startIndex", pageIndex);
425 meta.addEntry("startPage", pageInteger);
426 meta.setSpanContext(ctx);
427 meta.addEntry("count", pageLength);
428 // todo: what happened to cutoff?
429 meta.addEntry("cutOff", cutoff);
margarethaac85ae12023-05-08 11:09:13 +0200430 meta.addEntry("snippets", (showSnippet && !accessRewriteDisabled));
margaretha9c6493e2022-01-25 11:54:29 +0100431 if (!accessRewriteDisabled) {
432 meta.addEntry("tokens", showTokens);
433 }
margaretha35e1ca22023-11-16 22:00:01 +0100434
margaretha20f31232018-07-09 17:49:39 +0200435 // meta.addMeta(pageIndex, pageInteger, pageLength, ctx,
436 // cutoff);
437 // fixme: should only apply to CQL queries per default!
438 // meta.addEntry("itemsPerResource", 1);
margaretha35e1ca22023-11-16 22:00:01 +0100439
440 if (corpusAccess.equals(CorpusAccess.FREE)) {
margaretha245198d2025-06-18 12:19:59 +0200441 meta.addEntry("timeout", config.getGuestTimeout());
margaretha79d738c2018-12-17 16:45:47 +0100442 }
margaretha35e1ca22023-11-16 22:00:01 +0100443 else {
margaretha245198d2025-06-18 12:19:59 +0200444 meta.addEntry("timeout", config.getLoginTimeout());
margaretha79d738c2018-12-17 16:45:47 +0100445 }
margaretha35e1ca22023-11-16 22:00:01 +0100446
447 if (fieldList != null && !fieldList.isEmpty()) {
margarethaa85965d2018-12-19 15:58:21 +0100448 meta.addEntry("fields", fieldList);
449 }
margaretha20f31232018-07-09 17:49:39 +0200450 return meta;
451 }
452
margaretha852a0f62019-02-19 12:14:30 +0100453 private List<String> convertFieldsToList (String fields) {
margaretha85967642019-11-13 13:35:33 +0100454 if (fields != null && !fields.isEmpty()) {
455 String[] fieldArray = fields.split(",");
456 List<String> fieldList = new ArrayList<>(fieldArray.length);
457 for (String field : fieldArray) {
458 fieldList.add(field.trim());
459 }
460 return fieldList;
margaretha852a0f62019-02-19 12:14:30 +0100461 }
margaretha85967642019-11-13 13:35:33 +0100462 else {
463 return new ArrayList<>();
464 }
margaretha852a0f62019-02-19 12:14:30 +0100465 }
margaretha35e1ca22023-11-16 22:00:01 +0100466
margaretha20f31232018-07-09 17:49:39 +0200467 private String searchNeo4J (String query, int pageLength,
468 MetaQueryBuilder meta, boolean raw) throws KustvaktException {
469
470 if (raw) {
471 throw new KustvaktException(StatusCodes.ILLEGAL_ARGUMENT,
472 "raw not supported!");
473 }
474
abcpro1d912a2c2022-11-07 17:10:50 +0000475 MultivaluedMap<String, String> map = new MultivaluedHashMap<String, String>();
margaretha20f31232018-07-09 17:49:39 +0200476 map.add("q", query);
477 map.add("count", String.valueOf(pageLength));
478 map.add("lctxs", String.valueOf(meta.getSpanContext().getLeftSize()));
479 map.add("rctxs", String.valueOf(meta.getSpanContext().getRightSize()));
480 return this.graphDBhandler.getResponse(map, "distKwic");
481
482 }
483
margaretha351f7692019-02-06 19:36:52 +0100484 private Pattern determineAvailabilityPattern (User user) {
margaretha34954472018-10-24 20:05:17 +0200485 Pattern p = null;
486 if (user != null) {
487 CorpusAccess corpusAccess = user.getCorpusAccess();
488 switch (corpusAccess) {
489 case PUB:
490 p = config.getPublicLicensePattern();
491 break;
492 case ALL:
493 p = config.getAllLicensePattern();
494 break;
495 default: // FREE
496 p = config.getFreeLicensePattern();
497 break;
498 }
margaretha20f31232018-07-09 17:49:39 +0200499 }
margaretha351f7692019-02-06 19:36:52 +0100500 return p;
501 }
margaretha35e1ca22023-11-16 22:00:01 +0100502
503 public String retrieveMatchInfo (String corpusId, String docId,
504 String textId, String matchId, boolean info, Set<String> foundries,
505 String username, HttpHeaders headers, Set<String> layers,
506 boolean spans, boolean snippet, boolean tokens,
margarethac7f8f802024-06-05 12:52:45 +0200507 boolean sentenceExpansion, boolean highlights, boolean isDeprecated)
margaretha35e1ca22023-11-16 22:00:01 +0100508 throws KustvaktException {
509 String matchid = searchKrill.getMatchId(corpusId, docId, textId,
510 matchId);
margaretha351f7692019-02-06 19:36:52 +0100511
512 User user = createUser(username, headers);
513 Pattern p = determineAvailabilityPattern(user);
Akron70e28ca2020-04-01 09:24:46 +0200514
margaretha35e1ca22023-11-16 22:00:01 +0100515 // boolean match_only = foundries == null || foundries.isEmpty();
margaretha20f31232018-07-09 17:49:39 +0200516 String results;
margaretha35e1ca22023-11-16 22:00:01 +0100517 // try {
margaretha20f31232018-07-09 17:49:39 +0200518
Akron17fbad72023-04-05 15:46:21 +0200519 ArrayList<String> foundryList = null;
520 ArrayList<String> layerList = null;
margaretha20f31232018-07-09 17:49:39 +0200521
Akron17fbad72023-04-05 15:46:21 +0200522 if (foundries != null && !foundries.isEmpty()) {
523 foundryList = new ArrayList<String>();
524 layerList = new ArrayList<String>();
525 // EM: now without user, just list all foundries and
526 // layers
527 if (foundries.contains("*")) {
528 foundryList = config.getFoundries();
529 layerList = config.getLayers();
margaretha20f31232018-07-09 17:49:39 +0200530 }
531 else {
Akron17fbad72023-04-05 15:46:21 +0200532 foundryList.addAll(foundries);
533 layerList.addAll(layers);
margaretha20f31232018-07-09 17:49:39 +0200534 }
margaretha35e1ca22023-11-16 22:00:01 +0100535 }
536 else {
Akron17fbad72023-04-05 15:46:21 +0200537 sentenceExpansion = false;
538 spans = false;
539 info = false;
540 highlights = true;
541 };
margaretha35e1ca22023-11-16 22:00:01 +0100542
543 results = searchKrill.getMatch(matchid, info, foundryList, layerList,
margarethac7f8f802024-06-05 12:52:45 +0200544 spans, snippet, tokens, highlights, sentenceExpansion, p, isDeprecated);
margaretha35e1ca22023-11-16 22:00:01 +0100545 // }
546 // catch (Exception e) {
547 // jlog.error("Exception in the MatchInfo service encountered!", e);
548 // throw new KustvaktException(StatusCodes.ILLEGAL_ARGUMENT,
549 // e.getMessage());
550 // }
551 if (DEBUG) {
margarethaa85965d2018-12-19 15:58:21 +0100552 jlog.debug("MatchInfo results: " + results);
553 }
margaretha20f31232018-07-09 17:49:39 +0200554 return results;
555 }
556
557 public String retrieveDocMetadata (String corpusId, String docId,
margaretha852a0f62019-02-19 12:14:30 +0100558 String textId, String fields, String username, HttpHeaders headers)
margaretha351f7692019-02-06 19:36:52 +0100559 throws KustvaktException {
margaretha852a0f62019-02-19 12:14:30 +0100560 List<String> fieldList = null;
margaretha35e1ca22023-11-16 22:00:01 +0100561 if (fields != null && !fields.isEmpty()) {
margaretha852a0f62019-02-19 12:14:30 +0100562 fieldList = convertFieldsToList(fields);
563 }
564 Pattern p = null;
margaretha35e1ca22023-11-16 22:00:01 +0100565 if (config.isMetadataRestricted()) {
margaretha852a0f62019-02-19 12:14:30 +0100566 User user = createUser(username, headers);
567 p = determineAvailabilityPattern(user);
568 }
margaretha20f31232018-07-09 17:49:39 +0200569 String textSigle = searchKrill.getTextSigle(corpusId, docId, textId);
margaretha852a0f62019-02-19 12:14:30 +0100570 return searchKrill.getFields(textSigle, fieldList, p);
margaretha20f31232018-07-09 17:49:39 +0200571 }
margaretha35e1ca22023-11-16 22:00:01 +0100572
margaretha20f31232018-07-09 17:49:39 +0200573 public String getCollocationBase (String query) throws KustvaktException {
574 return graphDBhandler.getResponse("distCollo", "q", query);
575 }
margaretha35e1ca22023-11-16 22:00:01 +0100576
margaretha78f90802022-03-28 08:23:20 +0200577 public void closeIndexReader () throws KustvaktException {
578 searchKrill.closeIndexReader();
margaretha3d55b002019-03-19 12:00:44 +0100579 }
Akron48e51b92020-06-02 14:50:27 +0200580
581 /**
582 * Return the fingerprint of the latest index revision.
583 */
584 public String getIndexFingerprint () {
585 return searchKrill.getIndexFingerprint();
586 }
margaretha35e1ca22023-11-16 22:00:01 +0100587
margaretha5a5b9952023-06-12 12:46:36 +0200588 public TotalResultCache getTotalResultCache () {
589 return totalResultCache;
590 }
margaretha20f31232018-07-09 17:49:39 +0200591}