blob: dd33028244c70765e44ff9683e36019dbc79db55 [file] [log] [blame]
Nils Diewaldc925b492013-12-03 23:56:10 +00001package de.ids_mannheim.korap;
2
margaretha85ee2ac2018-07-25 17:58:09 +02003import java.io.IOException;
margaretha3827d532022-01-31 14:41:55 +01004import java.util.List;
Michael Hanl7edaa552014-05-23 18:48:50 +00005
Nils Diewald7cf8c6d2014-05-28 18:37:38 +00006import org.apache.lucene.search.spans.SpanQuery;
Nils Diewaldbbd39a52015-02-23 19:56:57 +00007
margaretha85ee2ac2018-07-25 17:58:09 +02008import com.fasterxml.jackson.databind.JsonNode;
9import com.fasterxml.jackson.databind.ObjectMapper;
10
margaretha5a8abea2021-11-08 16:57:51 +010011import de.ids_mannheim.korap.cache.VirtualCorpusCache;
margaretha85ee2ac2018-07-25 17:58:09 +020012import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
13import de.ids_mannheim.korap.response.Response;
14import de.ids_mannheim.korap.response.Result;
margaretha3827d532022-01-31 14:41:55 +010015import de.ids_mannheim.korap.response.VirtualCorpusResponse;
margaretha85ee2ac2018-07-25 17:58:09 +020016import de.ids_mannheim.korap.util.QueryException;
17
Nils Diewald3aa9e692015-02-20 22:20:11 +000018/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000019 * <p>Krill is a corpus data retrieval index using Lucene for
20 * Look-Ups.</p>
21 *
Nils Diewald21914ff2015-02-28 02:09:47 +000022 * <p>
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000023 * It is the reference implementation for KoralQuery consumption,
Nils Diewald21914ff2015-02-28 02:09:47 +000024 * and this class acts as the central point for consuming and
25 * responding to KoralQuery requests.
26 * </p>
Nils Diewaldbb33da22015-03-04 16:24:25 +000027 *
Nils Diewald21914ff2015-02-28 02:09:47 +000028 * <p>
Nils Diewaldbb33da22015-03-04 16:24:25 +000029 * The processing of the collection section of the request is
30 * delegated
31 * to {@link KrillCollection}, the query section to {@link KrillQuery}
32 * ,
Nils Diewald21914ff2015-02-28 02:09:47 +000033 * and the meta section to {@link KrillMeta}.
34 * </p>
Nils Diewaldbb33da22015-03-04 16:24:25 +000035 *
Nils Diewald3aa9e692015-02-20 22:20:11 +000036 * <blockquote><pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000037 * // Create or receive a KoralQuery JSON string
38 * String koral = "{\"query\":{...}, \"collection\":{...}, ... }";
39 *
40 * // Create a new krill search object by passing the Query
41 * Krill krill = new Krill(koral);
42 *
43 * // Apply the query to an index and receive a search result
44 * // This may invoke different actions depending on the request
45 * Result result = krill.setIndex(new KrillIndex()).apply();
Nils Diewald3aa9e692015-02-20 22:20:11 +000046 * </pre></blockquote>
Nils Diewaldbb33da22015-03-04 16:24:25 +000047 *
Nils Diewald3aa9e692015-02-20 22:20:11 +000048 * @author diewald
49 * @author margaretha
Nils Diewaldbb33da22015-03-04 16:24:25 +000050 *
Nils Diewald2d5f8102015-02-26 21:07:54 +000051 * @see KrillCollection
Nils Diewald0339d462015-02-26 14:53:56 +000052 * @see KrillQuery
Nils Diewaldd37f7e42015-02-27 21:08:22 +000053 * @see KrillMeta
Nils Diewalda14ecd62015-02-26 21:00:20 +000054 * @see KrillIndex
Nils Diewald3aa9e692015-02-20 22:20:11 +000055 */
Akronb1166442015-06-27 00:34:19 +020056// TODO: Use a krill.properties configuration file
57// TODO: Reuse passed JSON object instead of creating a new response!
Nils Diewald0881e242015-02-27 17:31:01 +000058public class Krill extends Response {
Nils Diewalda14ecd62015-02-26 21:00:20 +000059 private KrillIndex index;
Nils Diewaldbbd39a52015-02-23 19:56:57 +000060 private SpanQuery spanQuery;
Nils Diewaldefb9c9a2014-02-20 15:05:18 +000061 private JsonNode request;
Nils Diewald364eb642013-12-22 15:03:01 +000062
margaretha464ae452024-05-17 11:57:30 +020063 private int maxTokenMatchSize;
Akron98b78542015-08-06 21:43:08 +020064 private final ObjectMapper mapper = new ObjectMapper();
65
Nils Diewald3aa9e692015-02-20 22:20:11 +000066 /**
67 * Construct a new Krill object.
68 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +000069 public Krill () {};
Nils Diewald3aa9e692015-02-20 22:20:11 +000070
71
72 /**
73 * Construct a new Krill object,
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000074 * consuming a KoralQuery json string.
Nils Diewaldbb33da22015-03-04 16:24:25 +000075 *
76 * @param query
77 * The KoralQuery json string.
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000078 */
79 public Krill (String query) {
Akron850b46e2016-06-08 10:08:55 +020080 this.fromKoral(query);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000081 };
82
83
84 /**
85 * Construct a new Krill object,
86 * consuming a KoralQuery {@link JsonNode} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +000087 *
88 * @param query
89 * The KoralQuery {@link JsonNode} object.
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000090 */
91 public Krill (JsonNode query) {
Akron850b46e2016-06-08 10:08:55 +020092 this.fromKoral(query);
Nils Diewaldf5ab4b22015-02-25 20:55:16 +000093 };
94
95
96 /**
97 * Construct a new Krill object,
Nils Diewald3aa9e692015-02-20 22:20:11 +000098 * consuming a {@link SpanQueryWrapper} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +000099 *
100 * @param query
101 * The {@link SpanQueryWrapper} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000102 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000103 public Krill (SpanQueryWrapper query) {
Nils Diewaldafab8f32015-01-26 19:11:32 +0000104 try {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000105 this.spanQuery = query.toQuery();
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000106 }
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000107
108 // Add the error to the KoralQuery response
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000109 catch (QueryException q) {
110 this.addError(q.getErrorCode(), q.getMessage());
111 };
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000112 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000113
Nils Diewald3aa9e692015-02-20 22:20:11 +0000114
115 /**
116 * Construct a new Krill object,
117 * consuming a {@link SpanQuery} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000118 *
119 * @param query
120 * The {@link SpanQuery} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000121 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000122 public Krill (SpanQuery query) {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000123 this.spanQuery = query;
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000124 };
Nils Diewaldc925b492013-12-03 23:56:10 +0000125
Nils Diewald3aa9e692015-02-20 22:20:11 +0000126
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000127 /**
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000128 * Parse KoralQuery as a json string.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000129 *
130 * @param query
131 * The KoralQuery json string.
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000132 * @return The {@link Krill} object for chaining.
133 * @throws QueryException
134 */
Akron850b46e2016-06-08 10:08:55 +0200135 public Krill fromKoral (final String query) {
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000136 // Parse query string
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000137 try {
138 this.request = mapper.readTree(query);
Akron850b46e2016-06-08 10:08:55 +0200139 this.fromKoral(this.request);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000140 }
141
142 // Unable to parse JSON
143 catch (IOException e) {
144 this.addError(621, "Unable to parse JSON");
145 };
146
147 return this;
148 };
149
150
151 /**
152 * Parse KoralQuery as a {@link JsonNode} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000153 *
154 * @param query
155 * The KoralQuery {@link JsonNode} object.
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000156 * @return The {@link Krill} object for chaining.
157 * @throws QueryException
158 */
Akron850b46e2016-06-08 10:08:55 +0200159 public Krill fromKoral (JsonNode json) {
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000160
Nils Diewald3aa9e692015-02-20 22:20:11 +0000161 // Parse "query" attribute
162 if (json.has("query")) {
163 try {
Akron98b78542015-08-06 21:43:08 +0200164 final KrillQuery kq = new KrillQuery("tokens");
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000165 this.setQuery(kq);
Nils Diewald3aa9e692015-02-20 22:20:11 +0000166
Akron850b46e2016-06-08 10:08:55 +0200167 final SpanQueryWrapper qw = kq.fromKoral(json.get("query"));
Akron001dab32015-07-02 12:30:15 +0200168
Akron352dae82016-08-05 17:57:51 +0200169 // Koral messages are moved to the Krill object
170 this.moveNotificationsFrom(kq);
171
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000172 // Throw an error, in case the query matches everywhere
Akrondfc93572016-08-10 19:01:34 +0200173 if (qw.isEmpty()) {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000174 this.addError(780, "This query matches everywhere");
Akronf9def5e2016-10-10 21:26:46 +0200175 }
176 else if (qw.isNull()) {
177 this.addError(783, "This query can't match anywhere");
178 }
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000179
Nils Diewald3aa9e692015-02-20 22:20:11 +0000180 else {
Nils Diewald21914ff2015-02-28 02:09:47 +0000181
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000182 // Serialize a Lucene SpanQuery based on the SpanQueryWrapper
Nils Diewald3aa9e692015-02-20 22:20:11 +0000183 this.spanQuery = qw.toQuery();
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000184
Akrona7b936d2016-03-04 13:40:54 +0100185 // TODO: Make these information query rewrites
Akron0f3607d2016-02-23 22:16:20 +0100186
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000187 // Throw a warning in case the root object is optional
Nils Diewald3aa9e692015-02-20 22:20:11 +0000188 if (qw.isOptional())
189 this.addWarning(781, "Optionality of query is ignored");
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000190
191 // Throw a warning in case the root object is negative
Nils Diewald3aa9e692015-02-20 22:20:11 +0000192 if (qw.isNegative())
193 this.addWarning(782, "Exclusivity of query is ignored");
194 };
Nils Diewald3aa9e692015-02-20 22:20:11 +0000195 }
196 catch (QueryException q) {
197 this.addError(q.getErrorCode(), q.getMessage());
198 };
199 }
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000200 else
Nils Diewald3aa9e692015-02-20 22:20:11 +0000201 this.addError(700, "No query given");
202
203 // <legacycode>
Nils Diewaldbb33da22015-03-04 16:24:25 +0000204 if (json.has("warning") && json.get("warning").asText().length() > 0) {
Nils Diewald3aa9e692015-02-20 22:20:11 +0000205 this.addWarning(799, json.get("warning").asText());
206 };
207 // </legacycode>
208
Nils Diewald3aa9e692015-02-20 22:20:11 +0000209 // Copy notifications from request
210 this.copyNotificationsFrom(json);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000211
Nils Diewald21914ff2015-02-28 02:09:47 +0000212 // Parse "collection" or "collections" attribute
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000213 try {
margaretha24a8da62025-08-19 09:35:18 +0200214 if (json.has("corpus")) {
215 final JsonNode collNode = json.get("corpus");
216
217 // TODO: Temporary
218 if (collNode.fieldNames().hasNext()) {
margaretha7a6c4002025-10-13 10:24:13 +0200219 KrillCollection kc = new KrillCollection().fromKoral(collNode);
220 kc.isCorpus=true;
221 this.setCollection(kc);
margaretha24a8da62025-08-19 09:35:18 +0200222 };
223 }
224 // EM: legacy
225 else if (json.has("collection")) {
Akron98b78542015-08-06 21:43:08 +0200226 final JsonNode collNode = json.get("collection");
Akronbb5d1732015-06-22 01:22:40 +0200227
Akronc63697c2015-06-17 22:32:02 +0200228 // TODO: Temporary
Akronbb5d1732015-06-22 01:22:40 +0200229 if (collNode.fieldNames().hasNext()) {
margaretha7a6c4002025-10-13 10:24:13 +0200230 KrillCollection kc = new KrillCollection().fromKoral(collNode);
231 kc.isCorpus=false;
232 this.setCollection(kc);
Akronbb5d1732015-06-22 01:22:40 +0200233 };
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000234 }
Nils Diewald3aa9e692015-02-20 22:20:11 +0000235
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000236 else if (json.has("collections")) {
Akron40550172015-08-04 03:06:12 +0200237 this.addError(899,
238 "Collections are not supported anymore in favour of a single collection");
Nils Diewald3aa9e692015-02-20 22:20:11 +0000239 };
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000240 }
241 catch (QueryException q) {
242 this.addError(q.getErrorCode(), q.getMessage());
Nils Diewald3aa9e692015-02-20 22:20:11 +0000243 };
Nils Diewald3aa9e692015-02-20 22:20:11 +0000244
Nils Diewald21914ff2015-02-28 02:09:47 +0000245 // Parse "meta" attribute
Akron001dab32015-07-02 12:30:15 +0200246 // !this.hasErrors() &&
247 if (json.has("meta"))
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000248 this.setMeta(new KrillMeta(json.get("meta")));
Nils Diewald3aa9e692015-02-20 22:20:11 +0000249
Nils Diewald3aa9e692015-02-20 22:20:11 +0000250 return this;
251 };
252
Nils Diewald3aa9e692015-02-20 22:20:11 +0000253
254 /**
Nils Diewalda14ecd62015-02-26 21:00:20 +0000255 * Get the associated {@link KrillIndex} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000256 *
Nils Diewalda14ecd62015-02-26 21:00:20 +0000257 * @return The associated {@link KrillIndex} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000258 */
Nils Diewalda14ecd62015-02-26 21:00:20 +0000259 public KrillIndex getIndex () {
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000260 return this.index;
261 };
262
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000263
264 /**
Nils Diewald21914ff2015-02-28 02:09:47 +0000265 * Set the {@link KrillIndex} object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000266 *
267 * @param index
268 * The associated {@link KrillIndex} object.
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000269 * @return The {@link Krill} object for chaining.
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000270 */
Nils Diewalda14ecd62015-02-26 21:00:20 +0000271 public Krill setIndex (KrillIndex index) {
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000272 this.index = index;
margaretha05a4bc12022-02-11 10:55:43 +0100273 VirtualCorpusCache.setIndexInfo(index);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000274 return this;
275 };
276
277
Nils Diewald3aa9e692015-02-20 22:20:11 +0000278 /**
279 * Apply the KoralQuery to an index.
Nils Diewald21914ff2015-02-28 02:09:47 +0000280 * This may invoke different actions depending
Nils Diewaldbb33da22015-03-04 16:24:25 +0000281 * on the meta information, like {@link KrillIndex#search} or
282 * {@link KrillIndex#collect}.
283 *
284 * @param index
285 * The {@link KrillIndex} the search should be applyied
286 * to.
Nils Diewald884dbcf2015-02-27 17:02:28 +0000287 * @return The result as a {@link Result} object.
Nils Diewald3aa9e692015-02-20 22:20:11 +0000288 */
Nils Diewald884dbcf2015-02-27 17:02:28 +0000289 public Result apply (KrillIndex index) {
margaretha5a8abea2021-11-08 16:57:51 +0100290 VirtualCorpusCache.setIndexInfo(index);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000291 return this.setIndex(index).apply();
292 };
Nils Diewaldea28b622014-10-01 16:01:31 +0000293
Nils Diewaldc925b492013-12-03 23:56:10 +0000294
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000295 /**
296 * Apply the KoralQuery to an index.
Nils Diewald21914ff2015-02-28 02:09:47 +0000297 * This may invoke different actions depending
Nils Diewaldbb33da22015-03-04 16:24:25 +0000298 * on the meta information, like {@link KrillIndex#search} or
299 * {@link KrillIndex#collect}.
300 *
Nils Diewald884dbcf2015-02-27 17:02:28 +0000301 * @return The result as a {@link Result} object.
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000302 */
Nils Diewald884dbcf2015-02-27 17:02:28 +0000303 public Result apply () {
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000304
Nils Diewald884dbcf2015-02-27 17:02:28 +0000305 // Create new Result object to return
306 Result kr = new Result();
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000307
308 // There were errors
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000309 if (this.hasErrors()) {
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000310 kr.copyNotificationsFrom(this);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000311 }
312
313 // There was no index
314 else if (this.index == null) {
315 kr.addError(601, "Unable to find index");
316 }
317
318 // Apply search
319 else {
Akronbb5d1732015-06-22 01:22:40 +0200320 // This contains meta and matches
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000321 kr = this.index.search(this);
Akronbb5d1732015-06-22 01:22:40 +0200322 // this.getCollection().setIndex(this.index);
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000323 kr.copyNotificationsFrom(this);
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000324 };
Nils Diewaldc6b78752013-12-05 19:05:12 +0000325
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000326 kr.setQuery(this.getQuery());
Akron1a8bb762019-01-18 15:48:59 +0100327
Akronbb5d1732015-06-22 01:22:40 +0200328 kr.setCollection(this.getCollection());
Akronb1166442015-06-27 00:34:19 +0200329 kr.setMeta(this.getMeta());
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000330
Nils Diewaldd75e6f62015-01-28 23:44:56 +0000331 return kr;
Nils Diewald7cf8c6d2014-05-28 18:37:38 +0000332 };
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000333
Nils Diewaldf5ab4b22015-02-25 20:55:16 +0000334
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000335 /**
336 * Get the associated {@link SpanQuery} deserialization
Nils Diewaldbb33da22015-03-04 16:24:25 +0000337 * (i.e. the internal correspandence to KoralQuery's query
338 * object).
339 *
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000340 * <strong>Warning</strong>: SpanQueries may be lazy deserialized
341 * in future versions of Krill, rendering this API obsolete.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000342 *
Nils Diewaldd37f7e42015-02-27 21:08:22 +0000343 * @return The deserialized {@link SpanQuery} object.
344 */
Nils Diewaldbbd39a52015-02-23 19:56:57 +0000345 @Deprecated
346 public SpanQuery getSpanQuery () {
347 return this.spanQuery;
margaretha953fd012017-09-04 16:33:39 +0200348 }
349
350 //EM
351 public void setSpanQuery (SpanQuery sq) {
352 this.spanQuery = sq;
353
354 }
margaretha3827d532022-01-31 14:41:55 +0100355
356 public JsonNode retrieveFieldValues (String corpusQuery, KrillIndex index,
357 String fieldName) {
358 KrillCollection kc = new KrillCollection(corpusQuery);
359 List<String> fieldValues = index.getFieldVector(fieldName, kc);
360 VirtualCorpusResponse r = new VirtualCorpusResponse();
361 return r.createKoralQueryForField(fieldName, fieldValues);
362 }
margaretha464ae452024-05-17 11:57:30 +0200363
364
365 public int getMaxTokenMatchSize () {
366 return maxTokenMatchSize;
367 }
368
369
370 public void setMaxTokenMatchSize (int maxMatchTokens) {
371 this.maxTokenMatchSize = maxMatchTokens;
372 }
Nils Diewald9f310832013-12-06 22:38:55 +0000373};