blob: 4b1b6427f8cb8f4b55eba2934c1a472d2d76868e [file] [log] [blame]
Nils Diewaldc925b492013-12-03 23:56:10 +00001package de.ids_mannheim.korap;
2
margaretha85ee2ac2018-07-25 17:58:09 +02003import java.io.IOException;
margaretha3827d532022-01-31 14:41:55 +01004import java.util.List;
Michael Hanl7edaa552014-05-23 18:48:50 +00005
Nils Diewald7cf8c6d2014-05-28 18:37:38 +00006import org.apache.lucene.search.spans.SpanQuery;
Nils Diewaldbbd39a52015-02-23 19:56:57 +00007
margaretha85ee2ac2018-07-25 17:58:09 +02008import com.fasterxml.jackson.databind.JsonNode;
9import com.fasterxml.jackson.databind.ObjectMapper;
10
margaretha5a8abea2021-11-08 16:57:51 +010011import de.ids_mannheim.korap.cache.VirtualCorpusCache;
margaretha85ee2ac2018-07-25 17:58:09 +020012import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
13import de.ids_mannheim.korap.response.Response;
14import de.ids_mannheim.korap.response.Result;
margaretha3827d532022-01-31 14:41:55 +010015import de.ids_mannheim.korap.response.VirtualCorpusResponse;
margaretha85ee2ac2018-07-25 17:58:09 +020016import de.ids_mannheim.korap.util.QueryException;
17
Nils Diewald3aa9e692015-02-20 22:20:11 +000018/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000019 * <p>Krill is a corpus data retrieval index using Lucene for
20 * Look-Ups.</p>
21 *
Nils Diewald21914ff2015-02-28 02:09:47 +000022 * <p>
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000023 * It is the reference implementation for KoralQuery consumption,
Nils Diewald21914ff2015-02-28 02:09:47 +000024 * and this class acts as the central point for consuming and
25 * responding to KoralQuery requests.
26 * </p>
Nils Diewaldbb33da22015-03-04 16:24:25 +000027 *
Nils Diewald21914ff2015-02-28 02:09:47 +000028 * <p>
Nils Diewaldbb33da22015-03-04 16:24:25 +000029 * The processing of the collection section of the request is
30 * delegated
31 * to {@link KrillCollection}, the query section to {@link KrillQuery}
32 * ,
Nils Diewald21914ff2015-02-28 02:09:47 +000033 * and the meta section to {@link KrillMeta}.
34 * </p>
Nils Diewaldbb33da22015-03-04 16:24:25 +000035 *
Nils Diewald3aa9e692015-02-20 22:20:11 +000036 * <blockquote><pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000037 * // Create or receive a KoralQuery JSON string
38 * String koral = "{\"query\":{...}, \"collection\":{...}, ... }";
39 *
40 * // Create a new krill search object by passing the Query
41 * Krill krill = new Krill(koral);
42 *
43 * // Apply the query to an index and receive a search result
44 * // This may invoke different actions depending on the request
45 * Result result = krill.setIndex(new KrillIndex()).apply();
Nils Diewald3aa9e692015-02-20 22:20:11 +000046 * </pre></blockquote>
Nils Diewaldbb33da22015-03-04 16:24:25 +000047 *
Nils Diewald3aa9e692015-02-20 22:20:11 +000048 * @author diewald
49 * @author margaretha
Nils Diewaldbb33da22015-03-04 16:24:25 +000050 *
Nils Diewald2d5f8102015-02-26 21:07:54 +000051 * @see KrillCollection
Nils Diewald0339d462015-02-26 14:53:56 +000052 * @see KrillQuery
Nils Diewaldd37f7e42015-02-27 21:08:22 +000053 * @see KrillMeta
Nils Diewalda14ecd62015-02-26 21:00:20 +000054 * @see KrillIndex
Nils Diewald3aa9e692015-02-20 22:20:11 +000055 */
Akronb1166442015-06-27 00:34:19 +020056// TODO: Use a krill.properties configuration file
57// TODO: Reuse passed JSON object instead of creating a new response!
Nils Diewald0881e242015-02-27 17:31:01 +000058public class Krill extends Response {
Nils Diewalda14ecd62015-02-26 21:00:20 +000059 private KrillIndex index;
Nils Diewaldbbd39a52015-02-23 19:56:57 +000060 private SpanQuery spanQuery;
Nils Diewaldefb9c9a2014-02-20 15:05:18 +000061 private JsonNode request;
Nils Diewald364eb642013-12-22 15:03:01 +000062
Akron98b78542015-08-06 21:43:08 +020063 private final ObjectMapper mapper = new ObjectMapper();
64
Nils Diewald3aa9e692015-02-20 22:20:11 +000065 /**
66 * Construct a new Krill object.
67 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +000068 public Krill () {};
Nils Diewald3aa9e692015-02-20 22:20:11 +000069
70
71 /**
72 * Construct a new Krill object,
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000073 * consuming a KoralQuery json string.
Nils Diewaldbb33da22015-03-04 16:24:25 +000074 *
75 * @param query
76 * The KoralQuery json string.
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000077 */
78 public Krill (String query) {
Akron850b46e2016-06-08 10:08:55 +020079 this.fromKoral(query);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000080 };
81
82
83 /**
84 * Construct a new Krill object,
85 * consuming a KoralQuery {@link JsonNode} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +000086 *
87 * @param query
88 * The KoralQuery {@link JsonNode} object.
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000089 */
90 public Krill (JsonNode query) {
Akron850b46e2016-06-08 10:08:55 +020091 this.fromKoral(query);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000092 };
93
94
95 /**
96 * Construct a new Krill object,
Nils Diewald3aa9e692015-02-20 22:20:11 +000097 * consuming a {@link SpanQueryWrapper} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +000098 *
99 * @param query
100 * The {@link SpanQueryWrapper} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000101 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000102 public Krill (SpanQueryWrapper query) {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000103 try {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000104 this.spanQuery = query.toQuery();
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000105 }
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000106
107 // Add the error to the KoralQuery response
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000108 catch (QueryException q) {
109 this.addError(q.getErrorCode(), q.getMessage());
110 };
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000111 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000112
Nils Diewald3aa9e692015-02-20 22:20:11 +0000113
114 /**
115 * Construct a new Krill object,
116 * consuming a {@link SpanQuery} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000117 *
118 * @param query
119 * The {@link SpanQuery} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000120 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000121 public Krill (SpanQuery query) {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000122 this.spanQuery = query;
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000123 };
Nils Diewaldc925b492013-12-03 23:56:10 +0000124
Nils Diewald3aa9e692015-02-20 22:20:11 +0000125
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000126 /**
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000127 * Parse KoralQuery as a json string.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000128 *
129 * @param query
130 * The KoralQuery json string.
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000131 * @return The {@link Krill} object for chaining.
132 * @throws QueryException
133 */
Akron850b46e2016-06-08 10:08:55 +0200134 public Krill fromKoral (final String query) {
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000135 // Parse query string
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000136 try {
137 this.request = mapper.readTree(query);
Akron850b46e2016-06-08 10:08:55 +0200138 this.fromKoral(this.request);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000139 }
140
141 // Unable to parse JSON
142 catch (IOException e) {
143 this.addError(621, "Unable to parse JSON");
144 };
145
146 return this;
147 };
148
149
150 /**
151 * Parse KoralQuery as a {@link JsonNode} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000152 *
153 * @param query
154 * The KoralQuery {@link JsonNode} object.
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000155 * @return The {@link Krill} object for chaining.
156 * @throws QueryException
157 */
Akron850b46e2016-06-08 10:08:55 +0200158 public Krill fromKoral (JsonNode json) {
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000159
Nils Diewald3aa9e692015-02-20 22:20:11 +0000160 // Parse "query" attribute
161 if (json.has("query")) {
162 try {
Akron98b78542015-08-06 21:43:08 +0200163 final KrillQuery kq = new KrillQuery("tokens");
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000164 this.setQuery(kq);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000165
Akron850b46e2016-06-08 10:08:55 +0200166 final SpanQueryWrapper qw = kq.fromKoral(json.get("query"));
Akron001dab32015-07-02 12:30:15 +0200167
Akron352dae82016-08-05 17:57:51 +0200168 // Koral messages are moved to the Krill object
169 this.moveNotificationsFrom(kq);
170
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000171 // Throw an error, in case the query matches everywhere
Akrondfc93572016-08-10 19:01:34 +0200172 if (qw.isEmpty()) {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000173 this.addError(780, "This query matches everywhere");
Akronf9def5e2016-10-10 21:26:46 +0200174 }
175 else if (qw.isNull()) {
176 this.addError(783, "This query can't match anywhere");
177 }
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000178
Nils Diewald3aa9e692015-02-20 22:20:11 +0000179 else {
Nils Diewald21914ff2015-02-28 02:09:47 +0000180
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000181 // Serialize a Lucene SpanQuery based on the SpanQueryWrapper
Nils Diewald3aa9e692015-02-20 22:20:11 +0000182 this.spanQuery = qw.toQuery();
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000183
Akrona7b936d2016-03-04 13:40:54 +0100184 // TODO: Make these information query rewrites
Akron0f3607d2016-02-23 22:16:20 +0100185
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000186 // Throw a warning in case the root object is optional
Nils Diewald3aa9e692015-02-20 22:20:11 +0000187 if (qw.isOptional())
188 this.addWarning(781, "Optionality of query is ignored");
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000189
190 // Throw a warning in case the root object is negative
Nils Diewald3aa9e692015-02-20 22:20:11 +0000191 if (qw.isNegative())
192 this.addWarning(782, "Exclusivity of query is ignored");
193 };
Nils Diewald3aa9e692015-02-20 22:20:11 +0000194 }
195 catch (QueryException q) {
196 this.addError(q.getErrorCode(), q.getMessage());
197 };
198 }
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000199 else
Nils Diewald3aa9e692015-02-20 22:20:11 +0000200 this.addError(700, "No query given");
201
202 // <legacycode>
Nils Diewaldbb33da22015-03-04 16:24:25 +0000203 if (json.has("warning") && json.get("warning").asText().length() > 0) {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000204 this.addWarning(799, json.get("warning").asText());
205 };
206 // </legacycode>
207
Nils Diewald3aa9e692015-02-20 22:20:11 +0000208 // Copy notifications from request
209 this.copyNotificationsFrom(json);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000210
Nils Diewald21914ff2015-02-28 02:09:47 +0000211 // Parse "collection" or "collections" attribute
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000212 try {
213 if (json.has("collection")) {
Akron98b78542015-08-06 21:43:08 +0200214 final JsonNode collNode = json.get("collection");
Akronbb5d1732015-06-22 01:22:40 +0200215
Akronc63697c2015-06-17 22:32:02 +0200216 // TODO: Temporary
Akronbb5d1732015-06-22 01:22:40 +0200217 if (collNode.fieldNames().hasNext()) {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200218 this.setCollection(
219 new KrillCollection().fromKoral(collNode));
Akronbb5d1732015-06-22 01:22:40 +0200220 };
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000221 }
Nils Diewald3aa9e692015-02-20 22:20:11 +0000222
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000223 else if (json.has("collections")) {
Akron40550172015-08-04 03:06:12 +0200224 this.addError(899,
225 "Collections are not supported anymore in favour of a single collection");
Nils Diewald3aa9e692015-02-20 22:20:11 +0000226 };
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000227 }
228 catch (QueryException q) {
229 this.addError(q.getErrorCode(), q.getMessage());
Nils Diewald3aa9e692015-02-20 22:20:11 +0000230 };
Nils Diewald3aa9e692015-02-20 22:20:11 +0000231
Nils Diewald21914ff2015-02-28 02:09:47 +0000232 // Parse "meta" attribute
Akron001dab32015-07-02 12:30:15 +0200233 // !this.hasErrors() &&
234 if (json.has("meta"))
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000235 this.setMeta(new KrillMeta(json.get("meta")));
Nils Diewald3aa9e692015-02-20 22:20:11 +0000236
Nils Diewald3aa9e692015-02-20 22:20:11 +0000237 return this;
238 };
239
Nils Diewald3aa9e692015-02-20 22:20:11 +0000240
241 /**
Nils Diewalda14ecd62015-02-26 21:00:20 +0000242 * Get the associated {@link KrillIndex} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000243 *
Nils Diewalda14ecd62015-02-26 21:00:20 +0000244 * @return The associated {@link KrillIndex} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000245 */
Nils Diewalda14ecd62015-02-26 21:00:20 +0000246 public KrillIndex getIndex () {
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000247 return this.index;
248 };
249
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000250
251 /**
Nils Diewald21914ff2015-02-28 02:09:47 +0000252 * Set the {@link KrillIndex} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000253 *
254 * @param index
255 * The associated {@link KrillIndex} object.
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000256 * @return The {@link Krill} object for chaining.
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000257 */
Nils Diewalda14ecd62015-02-26 21:00:20 +0000258 public Krill setIndex (KrillIndex index) {
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000259 this.index = index;
margaretha05a4bc12022-02-11 10:55:43 +0100260 VirtualCorpusCache.setIndexInfo(index);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000261 return this;
262 };
263
264
Nils Diewald3aa9e692015-02-20 22:20:11 +0000265 /**
266 * Apply the KoralQuery to an index.
Nils Diewald21914ff2015-02-28 02:09:47 +0000267 * This may invoke different actions depending
Nils Diewaldbb33da22015-03-04 16:24:25 +0000268 * on the meta information, like {@link KrillIndex#search} or
269 * {@link KrillIndex#collect}.
270 *
271 * @param index
272 * The {@link KrillIndex} the search should be applyied
273 * to.
Nils Diewald884dbcf2015-02-27 17:02:28 +0000274 * @return The result as a {@link Result} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000275 */
Nils Diewald884dbcf2015-02-27 17:02:28 +0000276 public Result apply (KrillIndex index) {
margaretha5a8abea2021-11-08 16:57:51 +0100277 VirtualCorpusCache.setIndexInfo(index);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000278 return this.setIndex(index).apply();
279 };
Nils Diewaldea28b622014-10-01 16:01:31 +0000280
Nils Diewaldc925b492013-12-03 23:56:10 +0000281
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000282 /**
283 * Apply the KoralQuery to an index.
Nils Diewald21914ff2015-02-28 02:09:47 +0000284 * This may invoke different actions depending
Nils Diewaldbb33da22015-03-04 16:24:25 +0000285 * on the meta information, like {@link KrillIndex#search} or
286 * {@link KrillIndex#collect}.
287 *
Nils Diewald884dbcf2015-02-27 17:02:28 +0000288 * @return The result as a {@link Result} object.
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000289 */
Nils Diewald884dbcf2015-02-27 17:02:28 +0000290 public Result apply () {
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000291
Nils Diewald884dbcf2015-02-27 17:02:28 +0000292 // Create new Result object to return
293 Result kr = new Result();
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000294
295 // There were errors
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000296 if (this.hasErrors()) {
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000297 kr.copyNotificationsFrom(this);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000298 }
299
300 // There was no index
301 else if (this.index == null) {
302 kr.addError(601, "Unable to find index");
303 }
304
305 // Apply search
306 else {
Akronbb5d1732015-06-22 01:22:40 +0200307 // This contains meta and matches
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000308 kr = this.index.search(this);
Akronbb5d1732015-06-22 01:22:40 +0200309 // this.getCollection().setIndex(this.index);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000310 kr.copyNotificationsFrom(this);
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000311 };
Nils Diewaldc6b78752013-12-05 19:05:12 +0000312
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000313 kr.setQuery(this.getQuery());
Akron1a8bb762019-01-18 15:48:59 +0100314
Akronbb5d1732015-06-22 01:22:40 +0200315 kr.setCollection(this.getCollection());
Akronb1166442015-06-27 00:34:19 +0200316 kr.setMeta(this.getMeta());
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000317
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000318 return kr;
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000319 };
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000320
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000321
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000322 /**
323 * Get the associated {@link SpanQuery} deserialization
Nils Diewaldbb33da22015-03-04 16:24:25 +0000324 * (i.e. the internal correspandence to KoralQuery's query
325 * object).
326 *
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000327 * <strong>Warning</strong>: SpanQueries may be lazy deserialized
328 * in future versions of Krill, rendering this API obsolete.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000329 *
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000330 * @return The deserialized {@link SpanQuery} object.
331 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000332 @Deprecated
333 public SpanQuery getSpanQuery () {
334 return this.spanQuery;
margaretha953fd012017-09-04 16:33:39 +0200335 }
336
337 //EM
338 public void setSpanQuery (SpanQuery sq) {
339 this.spanQuery = sq;
340
341 }
margaretha3827d532022-01-31 14:41:55 +0100342
343 public JsonNode retrieveFieldValues (String corpusQuery, KrillIndex index,
344 String fieldName) {
345 KrillCollection kc = new KrillCollection(corpusQuery);
346 List<String> fieldValues = index.getFieldVector(fieldName, kc);
347 VirtualCorpusResponse r = new VirtualCorpusResponse();
348 return r.createKoralQueryForField(fieldName, fieldValues);
349 }
Nils Diewald9f310832013-12-06 22:38:55 +0000350};