blob: 72373dbcb48643195f6950ec3360c2ccdfa9b038 [file] [log] [blame]
Nils Diewaldc925b492013-12-03 23:56:10 +00001package de.ids_mannheim.korap;
2
Nils Diewald7cf8c6d2014-05-28 18:37:38 +00003import java.io.*;
Nils Diewaldc7d08d92014-11-05 21:30:05 +00004import java.util.*;
Michael Hanl7edaa552014-05-23 18:48:50 +00005
Nils Diewald7cf8c6d2014-05-28 18:37:38 +00006import org.apache.lucene.search.spans.SpanQuery;
Nils Diewald92729ce2014-10-06 16:00:17 +00007import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
Nils Diewalda14ecd62015-02-26 21:00:20 +00008import de.ids_mannheim.korap.KrillIndex;
Nils Diewald884dbcf2015-02-27 17:02:28 +00009import de.ids_mannheim.korap.response.Result;
Nils Diewald7cf8c6d2014-05-28 18:37:38 +000010import de.ids_mannheim.korap.util.QueryException;
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000011import de.ids_mannheim.korap.response.Notifications;
Nils Diewald0881e242015-02-27 17:31:01 +000012import de.ids_mannheim.korap.response.Response;
Nils Diewald7cf8c6d2014-05-28 18:37:38 +000013
14import com.fasterxml.jackson.databind.ObjectMapper;
15import com.fasterxml.jackson.databind.JsonNode;
Nils Diewaldc925b492013-12-03 23:56:10 +000016
Nils Diewaldbbd39a52015-02-23 19:56:57 +000017import org.slf4j.Logger;
18import org.slf4j.LoggerFactory;
19
Nils Diewald3aa9e692015-02-20 22:20:11 +000020/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000021 * <p>Krill is a corpus data retrieval index using Lucene for
22 * Look-Ups.</p>
23 *
Nils Diewald21914ff2015-02-28 02:09:47 +000024 * <p>
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000025 * It is the reference implementation for KoralQuery consumption,
Nils Diewald21914ff2015-02-28 02:09:47 +000026 * and this class acts as the central point for consuming and
27 * responding to KoralQuery requests.
28 * </p>
Nils Diewaldbb33da22015-03-04 16:24:25 +000029 *
Nils Diewald21914ff2015-02-28 02:09:47 +000030 * <p>
Nils Diewaldbb33da22015-03-04 16:24:25 +000031 * The processing of the collection section of the request is
32 * delegated
33 * to {@link KrillCollection}, the query section to {@link KrillQuery}
34 * ,
Nils Diewald21914ff2015-02-28 02:09:47 +000035 * and the meta section to {@link KrillMeta}.
36 * </p>
Nils Diewaldbb33da22015-03-04 16:24:25 +000037 *
Nils Diewald3aa9e692015-02-20 22:20:11 +000038 * <blockquote><pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000039 * // Create or receive a KoralQuery JSON string
40 * String koral = "{\"query\":{...}, \"collection\":{...}, ... }";
41 *
42 * // Create a new krill search object by passing the Query
43 * Krill krill = new Krill(koral);
44 *
45 * // Apply the query to an index and receive a search result
46 * // This may invoke different actions depending on the request
47 * Result result = krill.setIndex(new KrillIndex()).apply();
Nils Diewald3aa9e692015-02-20 22:20:11 +000048 * </pre></blockquote>
Nils Diewaldbb33da22015-03-04 16:24:25 +000049 *
Nils Diewald3aa9e692015-02-20 22:20:11 +000050 * @author diewald
51 * @author margaretha
Nils Diewaldbb33da22015-03-04 16:24:25 +000052 *
Nils Diewald2d5f8102015-02-26 21:07:54 +000053 * @see KrillCollection
Nils Diewald0339d462015-02-26 14:53:56 +000054 * @see KrillQuery
Nils Diewaldd37f7e42015-02-27 21:08:22 +000055 * @see KrillMeta
Nils Diewalda14ecd62015-02-26 21:00:20 +000056 * @see KrillIndex
Nils Diewald3aa9e692015-02-20 22:20:11 +000057 */
Akronb1166442015-06-27 00:34:19 +020058// TODO: Use a krill.properties configuration file
59// TODO: Reuse passed JSON object instead of creating a new response!
Nils Diewald0881e242015-02-27 17:31:01 +000060public class Krill extends Response {
Nils Diewalda14ecd62015-02-26 21:00:20 +000061 private KrillIndex index;
Nils Diewaldbbd39a52015-02-23 19:56:57 +000062 private SpanQuery spanQuery;
Nils Diewaldefb9c9a2014-02-20 15:05:18 +000063 private JsonNode request;
Nils Diewald1e5d5942014-05-20 13:29:53 +000064 private String spanContext;
Nils Diewald364eb642013-12-22 15:03:01 +000065
Akron98b78542015-08-06 21:43:08 +020066 private final ObjectMapper mapper = new ObjectMapper();
67
Nils Diewaldbbd39a52015-02-23 19:56:57 +000068 // Logger
69 private final static Logger log = LoggerFactory.getLogger(Krill.class);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000070
Nils Diewaldbb33da22015-03-04 16:24:25 +000071
Nils Diewald3aa9e692015-02-20 22:20:11 +000072 /**
73 * Construct a new Krill object.
74 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +000075 public Krill () {};
Nils Diewald3aa9e692015-02-20 22:20:11 +000076
77
78 /**
79 * Construct a new Krill object,
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000080 * consuming a KoralQuery json string.
Nils Diewaldbb33da22015-03-04 16:24:25 +000081 *
82 * @param query
83 * The KoralQuery json string.
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000084 */
85 public Krill (String query) {
86 this.fromJson(query);
87 };
88
89
90 /**
91 * Construct a new Krill object,
92 * consuming a KoralQuery {@link JsonNode} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +000093 *
94 * @param query
95 * The KoralQuery {@link JsonNode} object.
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000096 */
97 public Krill (JsonNode query) {
98 this.fromJson(query);
99 };
100
101
102 /**
103 * Construct a new Krill object,
Nils Diewald3aa9e692015-02-20 22:20:11 +0000104 * consuming a {@link SpanQueryWrapper} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000105 *
106 * @param query
107 * The {@link SpanQueryWrapper} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000108 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000109 public Krill (SpanQueryWrapper query) {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000110 try {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000111 this.spanQuery = query.toQuery();
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000112 }
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000113
114 // Add the error to the KoralQuery response
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000115 catch (QueryException q) {
116 this.addError(q.getErrorCode(), q.getMessage());
117 };
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000118 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000119
Nils Diewald3aa9e692015-02-20 22:20:11 +0000120
121 /**
122 * Construct a new Krill object,
123 * consuming a {@link SpanQuery} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000124 *
125 * @param query
126 * The {@link SpanQuery} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000127 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000128 public Krill (SpanQuery query) {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000129 this.spanQuery = query;
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000130 };
Nils Diewaldc925b492013-12-03 23:56:10 +0000131
Nils Diewald3aa9e692015-02-20 22:20:11 +0000132
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000133 /**
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000134 * Parse KoralQuery as a json string.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000135 *
136 * @param query
137 * The KoralQuery json string.
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000138 * @return The {@link Krill} object for chaining.
139 * @throws QueryException
140 */
Akron98b78542015-08-06 21:43:08 +0200141 public Krill fromJson (final String query) {
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000142 // Parse query string
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000143 try {
144 this.request = mapper.readTree(query);
145 this.fromJson(this.request);
146 }
147
148 // Unable to parse JSON
149 catch (IOException e) {
150 this.addError(621, "Unable to parse JSON");
151 };
152
153 return this;
154 };
155
156
157 /**
158 * Parse KoralQuery as a {@link JsonNode} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000159 *
160 * @param query
161 * The KoralQuery {@link JsonNode} object.
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000162 * @return The {@link Krill} object for chaining.
163 * @throws QueryException
164 */
165 public Krill fromJson (JsonNode json) {
166
Nils Diewald3aa9e692015-02-20 22:20:11 +0000167 // Parse "query" attribute
168 if (json.has("query")) {
169 try {
Akron98b78542015-08-06 21:43:08 +0200170 final KrillQuery kq = new KrillQuery("tokens");
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000171 this.setQuery(kq);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000172
Akron98b78542015-08-06 21:43:08 +0200173 final SpanQueryWrapper qw = kq.fromJson(json.get("query"));
Akron001dab32015-07-02 12:30:15 +0200174
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000175 // Throw an error, in case the query matches everywhere
Nils Diewald3aa9e692015-02-20 22:20:11 +0000176 if (qw.isEmpty())
177 this.addError(780, "This query matches everywhere");
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000178
Nils Diewald3aa9e692015-02-20 22:20:11 +0000179 else {
Nils Diewald21914ff2015-02-28 02:09:47 +0000180
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000181 // Serialize a Lucene SpanQuery based on the SpanQueryWrapper
Nils Diewald3aa9e692015-02-20 22:20:11 +0000182 this.spanQuery = qw.toQuery();
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000183
184 // Throw a warning in case the root object is optional
Nils Diewald3aa9e692015-02-20 22:20:11 +0000185 if (qw.isOptional())
186 this.addWarning(781, "Optionality of query is ignored");
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000187
188 // Throw a warning in case the root object is negative
Nils Diewald3aa9e692015-02-20 22:20:11 +0000189 if (qw.isNegative())
190 this.addWarning(782, "Exclusivity of query is ignored");
191 };
Nils Diewald3aa9e692015-02-20 22:20:11 +0000192 }
193 catch (QueryException q) {
194 this.addError(q.getErrorCode(), q.getMessage());
195 };
196 }
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000197 else
Nils Diewald3aa9e692015-02-20 22:20:11 +0000198 this.addError(700, "No query given");
199
200 // <legacycode>
Nils Diewaldbb33da22015-03-04 16:24:25 +0000201 if (json.has("warning") && json.get("warning").asText().length() > 0) {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000202 this.addWarning(799, json.get("warning").asText());
203 };
204 // </legacycode>
205
Nils Diewald3aa9e692015-02-20 22:20:11 +0000206 // Copy notifications from request
207 this.copyNotificationsFrom(json);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000208
Nils Diewald21914ff2015-02-28 02:09:47 +0000209 // Parse "collection" or "collections" attribute
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000210 try {
211 if (json.has("collection")) {
Akron98b78542015-08-06 21:43:08 +0200212 final JsonNode collNode = json.get("collection");
Akronbb5d1732015-06-22 01:22:40 +0200213
Akronc63697c2015-06-17 22:32:02 +0200214 // TODO: Temporary
Akronbb5d1732015-06-22 01:22:40 +0200215 if (collNode.fieldNames().hasNext()) {
Akron176c9b12015-07-29 19:53:40 +0200216 this.setCollection(new KrillCollection().fromJson(collNode));
Akronbb5d1732015-06-22 01:22:40 +0200217 };
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000218 }
Nils Diewald3aa9e692015-02-20 22:20:11 +0000219
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000220 else if (json.has("collections")) {
Akron40550172015-08-04 03:06:12 +0200221 this.addError(899,
222 "Collections are not supported anymore in favour of a single collection");
Nils Diewald3aa9e692015-02-20 22:20:11 +0000223 };
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000224 }
225 catch (QueryException q) {
226 this.addError(q.getErrorCode(), q.getMessage());
Nils Diewald3aa9e692015-02-20 22:20:11 +0000227 };
Nils Diewald3aa9e692015-02-20 22:20:11 +0000228
Nils Diewald21914ff2015-02-28 02:09:47 +0000229 // Parse "meta" attribute
Akron001dab32015-07-02 12:30:15 +0200230 // !this.hasErrors() &&
231 if (json.has("meta"))
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000232 this.setMeta(new KrillMeta(json.get("meta")));
Nils Diewald3aa9e692015-02-20 22:20:11 +0000233
Nils Diewald3aa9e692015-02-20 22:20:11 +0000234 return this;
235 };
236
Nils Diewald3aa9e692015-02-20 22:20:11 +0000237
238 /**
Nils Diewalda14ecd62015-02-26 21:00:20 +0000239 * Get the associated {@link KrillIndex} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000240 *
Nils Diewalda14ecd62015-02-26 21:00:20 +0000241 * @return The associated {@link KrillIndex} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000242 */
Nils Diewalda14ecd62015-02-26 21:00:20 +0000243 public KrillIndex getIndex () {
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000244 return this.index;
245 };
246
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000247
248 /**
Nils Diewald21914ff2015-02-28 02:09:47 +0000249 * Set the {@link KrillIndex} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000250 *
251 * @param index
252 * The associated {@link KrillIndex} object.
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000253 * @return The {@link Krill} object for chaining.
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000254 */
Nils Diewalda14ecd62015-02-26 21:00:20 +0000255 public Krill setIndex (KrillIndex index) {
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000256 this.index = index;
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000257 return this;
258 };
259
260
Nils Diewald3aa9e692015-02-20 22:20:11 +0000261 /**
262 * Apply the KoralQuery to an index.
Nils Diewald21914ff2015-02-28 02:09:47 +0000263 * This may invoke different actions depending
Nils Diewaldbb33da22015-03-04 16:24:25 +0000264 * on the meta information, like {@link KrillIndex#search} or
265 * {@link KrillIndex#collect}.
266 *
267 * @param index
268 * The {@link KrillIndex} the search should be applyied
269 * to.
Nils Diewald884dbcf2015-02-27 17:02:28 +0000270 * @return The result as a {@link Result} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000271 */
Nils Diewald884dbcf2015-02-27 17:02:28 +0000272 public Result apply (KrillIndex index) {
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000273 return this.setIndex(index).apply();
274 };
Nils Diewaldea28b622014-10-01 16:01:31 +0000275
Nils Diewaldc925b492013-12-03 23:56:10 +0000276
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000277 /**
278 * Apply the KoralQuery to an index.
Nils Diewald21914ff2015-02-28 02:09:47 +0000279 * This may invoke different actions depending
Nils Diewaldbb33da22015-03-04 16:24:25 +0000280 * on the meta information, like {@link KrillIndex#search} or
281 * {@link KrillIndex#collect}.
282 *
Nils Diewald884dbcf2015-02-27 17:02:28 +0000283 * @return The result as a {@link Result} object.
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000284 */
Nils Diewald884dbcf2015-02-27 17:02:28 +0000285 public Result apply () {
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000286
Nils Diewald884dbcf2015-02-27 17:02:28 +0000287 // Create new Result object to return
288 Result kr = new Result();
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000289
290 // There were errors
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000291 if (this.hasErrors()) {
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000292 kr.copyNotificationsFrom(this);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000293 }
294
295 // There was no index
296 else if (this.index == null) {
297 kr.addError(601, "Unable to find index");
298 }
299
300 // Apply search
301 else {
Akronbb5d1732015-06-22 01:22:40 +0200302
303 // This contains meta and matches
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000304 kr = this.index.search(this);
Akronbb5d1732015-06-22 01:22:40 +0200305 // this.getCollection().setIndex(this.index);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000306 kr.copyNotificationsFrom(this);
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000307 };
Nils Diewaldc6b78752013-12-05 19:05:12 +0000308
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000309 kr.setQuery(this.getQuery());
Akronbb5d1732015-06-22 01:22:40 +0200310 kr.setCollection(this.getCollection());
Akronb1166442015-06-27 00:34:19 +0200311 kr.setMeta(this.getMeta());
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000312
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000313 return kr;
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000314 };
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000315
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000316
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000317 /**
318 * Get the associated {@link SpanQuery} deserialization
Nils Diewaldbb33da22015-03-04 16:24:25 +0000319 * (i.e. the internal correspandence to KoralQuery's query
320 * object).
321 *
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000322 * <strong>Warning</strong>: SpanQueries may be lazy deserialized
323 * in future versions of Krill, rendering this API obsolete.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000324 *
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000325 * @return The deserialized {@link SpanQuery} object.
326 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000327 @Deprecated
328 public SpanQuery getSpanQuery () {
329 return this.spanQuery;
330 };
331
332
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000333 /**
334 * Set the SpanQuery by means of a {@link SpanQueryWrapper} object
Nils Diewaldbb33da22015-03-04 16:24:25 +0000335 * (i.e. the internal correspandence to KoralQuery's query
336 * object).
337 *
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000338 * <strong>Warning</strong>: SpanQueries may be lazy deserialized
339 * in future versions of Krill, rendering this API obsolete.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000340 *
341 * @param query
342 * The {@link SpanQueryWrapper} to unwrap
343 * the {@link SpanQuery} object.
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000344 * @return The {@link Krill} object for chaining.
345 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000346 @Deprecated
347 public Krill setSpanQuery (SpanQueryWrapper sqwi) {
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000348 try {
349 this.spanQuery = sqwi.toQuery();
350 }
351 catch (QueryException q) {
352 this.addError(q.getErrorCode(), q.getMessage());
353 };
354 return this;
355 };
356
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000357
358 /**
359 * Set the {@link SpanQuery} object
Nils Diewaldbb33da22015-03-04 16:24:25 +0000360 * (i.e. the internal correspandence to KoralQuery's query
361 * object).
362 *
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000363 * <strong>Warning</strong>: SpanQueries may be lazy deserialized
364 * in future versions of Krill, rendering this API obsolete.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000365 *
366 * @param query
367 * The {@link SpanQuery} object.
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000368 * @return The {@link Krill} object for chaining.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000369 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000370 @Deprecated
371 public Krill setSpanQuery (SpanQuery sq) {
372 this.spanQuery = sq;
373 return this;
374 };
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000375
376
377 // Requests are out - queries will be mirrored completely
378 @Deprecated
379 public JsonNode getRequest () {
380 return this.request;
381 };
Nils Diewald9f310832013-12-06 22:38:55 +0000382};