blob: c011d6e16e166a962f135fbd8d46b2e4b4810f58 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap;
2
Nils Diewaldf399a672013-11-18 17:55:22 +00003import de.ids_mannheim.korap.query.wrap.*;
Nils Diewald6d50c1f2013-12-04 20:14:08 +00004import de.ids_mannheim.korap.util.QueryException;
5
6import org.apache.lucene.search.spans.SpanQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +00007import org.apache.lucene.util.automaton.RegExp;
8
Nils Diewald6d50c1f2013-12-04 20:14:08 +00009import com.fasterxml.jackson.databind.ObjectMapper;
Nils Diewaldc925b492013-12-03 23:56:10 +000010import com.fasterxml.jackson.databind.JsonNode;
11
Nils Diewaldf399a672013-11-18 17:55:22 +000012import java.util.*;
Nils Diewald6d50c1f2013-12-04 20:14:08 +000013import java.io.*;
Nils Diewaldf399a672013-11-18 17:55:22 +000014
15import org.slf4j.Logger;
16import org.slf4j.LoggerFactory;
17
Nils Diewald8c221782013-12-13 19:52:58 +000018/*
19 TODO: Create Pre-filter while preparing a Query.
20 The pre-filter will contain a boolena query with all
21 necessary terms, supporting boolean OR, ignoring
22 negation terms (and negation subqueries), like
23 [base=Der]([base=alte]|[base=junge])[base=Mann & p!=ADJA]![base=war | base=lag]
24 Search for all documents containing "s:Der" and ("s:alte" or "s:junge") and "s:Mann"
25
26 */
27
Nils Diewaldf399a672013-11-18 17:55:22 +000028/**
29 * @author Nils Diewald
30 *
31 * KorapQuery implements a simple API for wrapping
Nils Diewald26087ea2013-12-05 16:51:30 +000032 * KorAP Lucene Index specific query classes.
Nils Diewaldf399a672013-11-18 17:55:22 +000033 */
34public class KorapQuery {
35 private String field;
Nils Diewald6d50c1f2013-12-04 20:14:08 +000036 private ObjectMapper json;
Nils Diewaldf399a672013-11-18 17:55:22 +000037
Nils Diewaldc6b78752013-12-05 19:05:12 +000038 private String defaultFoundry = "mate/";
39
Nils Diewaldf399a672013-11-18 17:55:22 +000040 // Logger
41 private final static Logger log = LoggerFactory.getLogger(KorapQuery.class);
42
43 /**
44 * Constructs a new base object for query generation.
45 * @param field The specific index field for the query.
46 */
47 public KorapQuery (String field) {
48 this.field = field;
Nils Diewald6d50c1f2013-12-04 20:14:08 +000049 this.json = new ObjectMapper();
Nils Diewaldf399a672013-11-18 17:55:22 +000050 };
51
Nils Diewald6d50c1f2013-12-04 20:14:08 +000052 public SpanQueryWrapperInterface fromJSON (String jsonString) throws QueryException {
53 JsonNode json;
54 try {
55 json = this.json.readValue(jsonString, JsonNode.class);
56 }
57 catch (IOException e) {
58 throw new QueryException(e.getMessage());
59 };
60
61 if (!json.has("@type") && json.has("query"))
62 json = json.get("query");
63
64 return this.fromJSON(json);
Nils Diewaldc925b492013-12-03 23:56:10 +000065 };
66
67 // http://fasterxml.github.io/jackson-databind/javadoc/2.2.0/com/fasterxml/jackson/databind/JsonNode.html
Nils Diewald6d50c1f2013-12-04 20:14:08 +000068 // TODO: Exception messages are horrible!
Nils Diewald630811f2013-12-11 16:40:28 +000069 // TODO: Use the shortcuts implemented in this class instead of the wrapper constructors
70 // TODO: Check for isArray()
71 // TODO: Check for the number of operands before getting them
Nils Diewald6d50c1f2013-12-04 20:14:08 +000072 public SpanQueryWrapperInterface fromJSON (JsonNode json) throws QueryException {
73
74 if (!json.has("@type")) {
75 throw new QueryException("JSON-LD group has no @type attribute");
76 };
77
Nils Diewaldc925b492013-12-03 23:56:10 +000078 String type = json.get("@type").asText();
Nils Diewald6d50c1f2013-12-04 20:14:08 +000079
80 switch (type) {
81
82 case "korap:group":
83 SpanClassQueryWrapper classWrapper;
84
85 if (!json.has("relation")) {
86 if (json.has("class")) {
87 return new SpanClassQueryWrapper(
88 this.fromJSON(json.get("operands").get(0)),
89 json.get("class").asInt(0)
90 );
91 }
92 throw new QueryException("Group needs a relation or a class");
93 };
94
Nils Diewaldc925b492013-12-03 23:56:10 +000095 String relation = json.get("relation").asText();
96
Nils Diewald6d50c1f2013-12-04 20:14:08 +000097 if (!json.has("operands"))
98 throw new QueryException("Operation needs operands");
99
Nils Diewaldc925b492013-12-03 23:56:10 +0000100 // Alternation
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000101 switch (relation) {
102
103 case "or":
104
Nils Diewaldc925b492013-12-03 23:56:10 +0000105 SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
106 for (JsonNode operand : json.get("operands")) {
107 ssaq.or(this.fromJSON(operand));
108 };
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000109 if (json.has("class")) {
110 return new SpanClassQueryWrapper(ssaq, json.get("class").asInt(0));
111 };
Nils Diewaldc925b492013-12-03 23:56:10 +0000112 return ssaq;
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000113
114 case "position":
115 if (!json.has("position"))
116 throw new QueryException("Operation needs position specification");
117
Nils Diewald4d183ea2013-12-05 02:51:38 +0000118 String position = json.get("position").asText();
Nils Diewald26087ea2013-12-05 16:51:30 +0000119 short flag = 0;
Nils Diewald4d183ea2013-12-05 02:51:38 +0000120 switch (position) {
Nils Diewald4d183ea2013-12-05 02:51:38 +0000121 case "startswith":
Nils Diewald26087ea2013-12-05 16:51:30 +0000122 flag = (short) 1;
123 break;
Nils Diewald4d183ea2013-12-05 02:51:38 +0000124 case "endswith":
Nils Diewald26087ea2013-12-05 16:51:30 +0000125 flag = (short) 2;
126 break;
Nils Diewald4d183ea2013-12-05 02:51:38 +0000127 case "match":
Nils Diewald26087ea2013-12-05 16:51:30 +0000128 flag = (short) 3;
129 break;
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000130 };
Nils Diewald4d183ea2013-12-05 02:51:38 +0000131
Nils Diewald26087ea2013-12-05 16:51:30 +0000132 return new SpanWithinQueryWrapper(
133 this.fromJSON(json.get("operands").get(0)),
134 this.fromJSON(json.get("operands").get(1)),
135 flag
136 );
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000137
138 case "shrink":
139 int number = 0;
140 // temporary
141 if (json.has("shrink"))
142 number = json.get("shrink").asInt();
143
144 return new SpanMatchModifyQueryWrapper(this.fromJSON(json.get("operands").get(0)), number);
Nils Diewaldc925b492013-12-03 23:56:10 +0000145 };
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000146 throw new QueryException("Unknown group relation");
147
148 case "korap:token":
Nils Diewald4d183ea2013-12-05 02:51:38 +0000149 return this._segFromJSON(json.get("@value"));
Nils Diewaldc925b492013-12-03 23:56:10 +0000150
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000151 case "korap:sequence":
152 if (!json.has("operands"))
153 throw new QueryException("SpanSequenceQuery needs operands");
154
Nils Diewald630811f2013-12-11 16:40:28 +0000155 JsonNode operands = json.get("operands");
156 if (!operands.isArray() || operands.size() < 2)
157 throw new QueryException("SpanSequenceQuery needs operands");
158
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000159 SpanSequenceQueryWrapper sseqqw = new SpanSequenceQueryWrapper(this.field);
160 for (JsonNode operand : json.get("operands")) {
161 sseqqw.append(this.fromJSON(operand));
162 };
163 return sseqqw;
Nils Diewald4d183ea2013-12-05 02:51:38 +0000164
165 case "korap:element":
166 String value = json.get("@value").asText().replace('=',':');
167 return this.tag(value);
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000168 };
169 throw new QueryException("Unknown serialized query type: " + type);
Nils Diewaldc925b492013-12-03 23:56:10 +0000170 };
171
Nils Diewaldf399a672013-11-18 17:55:22 +0000172
Nils Diewald4d183ea2013-12-05 02:51:38 +0000173 private SpanQueryWrapperInterface _segFromJSON (JsonNode json) throws QueryException {
174 String type = json.get("@type").asText();
175 switch (type) {
Nils Diewald26087ea2013-12-05 16:51:30 +0000176
Nils Diewald4d183ea2013-12-05 02:51:38 +0000177 case "korap:term":
178 switch (json.get("relation").asText()) {
179 case "=":
Nils Diewald26087ea2013-12-05 16:51:30 +0000180 String value = json.get("@value").asText();
181
Nils Diewaldc6b78752013-12-05 19:05:12 +0000182 value = value.replaceFirst("base:", defaultFoundry +"l:").replaceFirst("orth:", "s:");
Nils Diewald26087ea2013-12-05 16:51:30 +0000183
184 if (json.has("@subtype") && json.get("@subtype").asText().equals("korap:regex")) {
185 if (value.charAt(0) == '\'' || value.charAt(0) == '"') {
186 value = "s:" + value;
187 };
188 value = value.replace("'", "").replace("\"", "");
189
Nils Diewald9cc86fe2013-12-07 17:45:59 +0000190 // Temporary
191 value = value.replace("_", "/");
192
Nils Diewald26087ea2013-12-05 16:51:30 +0000193 return this.seg(this.re(value));
194 };
195
196 if (!value.matches("[^:]+?:.+"))
197 value = "s:" + value;
198
Nils Diewald9cc86fe2013-12-07 17:45:59 +0000199 // Temporary
200 value = value.replace("_", "/");
201
Nils Diewald26087ea2013-12-05 16:51:30 +0000202 return this.seg(value);
203
Nils Diewald4d183ea2013-12-05 02:51:38 +0000204 case "!=":
205 throw new QueryException("Term relation != not yet supported");
206 };
207 throw new QueryException("Unknown term relation");
Nils Diewald26087ea2013-12-05 16:51:30 +0000208
Nils Diewald4d183ea2013-12-05 02:51:38 +0000209 case "korap:group":
210 SpanSegmentQueryWrapper ssegqw = new SpanSegmentQueryWrapper(this.field);
211 switch (json.get("relation").asText()) {
212 case "and":
213 for (JsonNode operand : json.get("operands")) {
214 SpanQueryWrapperInterface part = this._segFromJSON(operand);
215 if (part instanceof SpanAlterQueryWrapper) {
216 ssegqw.with((SpanAlterQueryWrapper) part);
217 }
218 else if (part instanceof SpanRegexQueryWrapper) {
219 ssegqw.with((SpanRegexQueryWrapper) part);
220 }
221 else if (part instanceof SpanSegmentQueryWrapper) {
222 ssegqw.with((SpanSegmentQueryWrapper) part);
223 }
224 else {
225 throw new QueryException("Object not supported in segment queries");
226 };
227 };
228 return ssegqw;
Nils Diewald26087ea2013-12-05 16:51:30 +0000229 case "or":
230 SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
231 for (JsonNode operand : json.get("operands")) {
232 ssaq.or(this._segFromJSON(operand));
233 };
234 return ssaq;
Nils Diewald4d183ea2013-12-05 02:51:38 +0000235 };
236 };
237 throw new QueryException("Unknown token type");
238};
239
240
241
Nils Diewaldb0dd9552013-12-20 02:28:34 +0000242 // SpanRegexQueryWrapper
Nils Diewaldf399a672013-11-18 17:55:22 +0000243 /**
244 * Create a query object based on a regular expression.
245 * @param re The regular expession as a string.
246 */
247 public SpanRegexQueryWrapper re (String re) {
248 return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL, false);
249 };
250
251 /**
252 * Create a query object based on a regular expression.
253 * @param re The regular expession as a string.
254 * @param flas The regular expession flag as an integer.
255 */
256 public SpanRegexQueryWrapper re (String re, int flags) {
257 return new SpanRegexQueryWrapper(this.field, re, flags, false);
258 };
259
Nils Diewaldf399a672013-11-18 17:55:22 +0000260 /**
261 * Create a query object based on a regular expression.
262 * @param re The regular expession as a string.
263 * @param flag The regular expession flag.
264 * @param caseinsensitive A boolean value indicating case insensitivity.
265 */
266 public SpanRegexQueryWrapper re (String re, int flags, boolean caseinsensitive) {
267 return new SpanRegexQueryWrapper(this.field, re, flags, caseinsensitive);
268 };
269
Nils Diewaldf399a672013-11-18 17:55:22 +0000270 /**
271 * Create a query object based on a regular expression.
272 * @param re The regular expession as a string.
273 * @param caseinsensitive A boolean value indicating case insensitivity.
274 */
275 public SpanRegexQueryWrapper re (String re, boolean caseinsensitive) {
276 return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL, caseinsensitive);
277 };
278
Nils Diewaldb0dd9552013-12-20 02:28:34 +0000279 // SpanWildcardQueryWrapper
280 /**
281 * Create a query object based on a wildcard term.
282 * @param wc The wildcard term as a string.
283 */
284 public SpanWildcardQueryWrapper wc (String wc) {
285 return new SpanWildcardQueryWrapper(this.field, wc, false);
286 };
287
288 /**
289 * Create a query object based on a wildcard term.
290 * @param wc The wildcard term as a string.
291 * @param caseinsensitive A boolean value indicating case insensitivity.
292 */
293 public SpanWildcardQueryWrapper wc (String wc, boolean caseinsensitive) {
294 return new SpanWildcardQueryWrapper(this.field, wc, caseinsensitive);
295 };
296
Nils Diewaldf399a672013-11-18 17:55:22 +0000297
298 // SpanSegmentQueries
299 /**
300 * Create a segment query object.
301 */
302 public SpanSegmentQueryWrapper seg () {
303 return new SpanSegmentQueryWrapper(this.field);
304 };
305
306
307 /**
308 * Create a segment query object.
309 * @param terms[] An array of terms, the segment consists of.
310 */
311 public SpanSegmentQueryWrapper seg (SpanRegexQueryWrapper ... terms) {
312 SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
313 for (SpanRegexQueryWrapper t : terms) {
314 ssq.with(t);
315 };
316 return ssq;
317 };
318
319 public SpanSegmentQueryWrapper seg (SpanAlterQueryWrapper ... terms) {
320 SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
321 for (SpanAlterQueryWrapper t : terms) {
322 ssq.with(t);
323 };
324 return ssq;
325 };
326
327 public SpanSegmentQueryWrapper seg (String ... terms) {
328 SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
329 for (String t : terms) {
330 ssq.with(t);
331 };
332 return ssq;
333 };
334
335 // SpanSegmentAlterQueries
336 /**
337 * Create a segment alternation query object.
338 * @param terms[] An array of alternative terms.
339 */
340 public SpanAlterQueryWrapper or (SpanQueryWrapperInterface ... terms) {
341 SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
342 for (SpanQueryWrapperInterface t : terms) {
343 ssaq.or(t);
344 };
345 return ssaq;
346 };
347
348 public SpanAlterQueryWrapper or (String ... terms) {
349 SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
350 for (String t : terms) {
351 ssaq.or(t);
352 };
353 return ssaq;
354 };
355
356
357 // SpanSegmentSequenceQueries
358 /**
359 * Create a sequence of segments query object.
360 */
361 public SpanSequenceQueryWrapper seq () {
362 return new SpanSequenceQueryWrapper(this.field);
363 };
364
365
366 /**
367 * Create a sequence of segments query object.
368 * @param terms[] An array of segment defining terms.
369 */
370 public SpanSequenceQueryWrapper seq (SpanQueryWrapperInterface ... terms) {
371 SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper(this.field);
372 for (SpanQueryWrapperInterface t : terms) {
373 sssq.append(t);
374 };
375 return sssq;
376 };
377
378
379 /**
380 * Create a sequence of segments query object.
381 * @param re A SpanSegmentRegexQuery, starting the sequence.
382 */
383 public SpanSequenceQueryWrapper seq (SpanRegexQueryWrapper re) {
384 return new SpanSequenceQueryWrapper(this.field, re);
385 };
386
387
388 public SpanSequenceQueryWrapper seq (Object ... terms) {
389 SpanSequenceQueryWrapper ssq = new SpanSequenceQueryWrapper(this.field);
390 for (Object t : terms) {
391 if (t instanceof SpanQueryWrapperInterface) {
392 ssq.append((SpanQueryWrapperInterface) t);
393 }
394 else if (t instanceof SpanRegexQueryWrapper) {
395 ssq.append((SpanRegexQueryWrapper) t);
396 }
397 else {
398 log.error("{} is not an acceptable parameter for seq()", t.getClass());
399 return ssq;
400 };
401 };
402 return ssq;
403 };
404
405 public SpanElementQueryWrapper tag (String element) {
406 return new SpanElementQueryWrapper(this.field, element);
407 };
408
409 /**
410 * Create a wrapping within query object.
411 * @param element A SpanQuery.
412 * @param embedded A SpanQuery that is wrapped in the element.
413 */
414 public SpanWithinQueryWrapper within (SpanQueryWrapperInterface element,
415 SpanQueryWrapperInterface embedded) {
416 return new SpanWithinQueryWrapper(element, embedded);
417 };
418
Nils Diewaldf399a672013-11-18 17:55:22 +0000419 // Class
420 public SpanClassQueryWrapper _ (byte number, SpanQueryWrapperInterface element) {
421 return new SpanClassQueryWrapper(element, number);
422 };
423
424 public SpanClassQueryWrapper _ (int number, SpanQueryWrapperInterface element) {
425 return new SpanClassQueryWrapper(element, number);
426 };
427
428 public SpanClassQueryWrapper _ (short number, SpanQueryWrapperInterface element) {
429 return new SpanClassQueryWrapper(element, number);
430 };
431
432 public SpanClassQueryWrapper _ (SpanQueryWrapperInterface element) {
433 return new SpanClassQueryWrapper(element);
434 };
435
436 // MatchModify
437 public SpanMatchModifyQueryWrapper shrink (byte number, SpanQueryWrapperInterface element) {
438 return new SpanMatchModifyQueryWrapper(element, number);
439 };
440
441 public SpanMatchModifyQueryWrapper shrink (int number, SpanQueryWrapperInterface element) {
442 return new SpanMatchModifyQueryWrapper(element, number);
443 };
444
445 public SpanMatchModifyQueryWrapper shrink (short number, SpanQueryWrapperInterface element) {
446 return new SpanMatchModifyQueryWrapper(element, number);
447 };
448
449 public SpanMatchModifyQueryWrapper shrink (SpanQueryWrapperInterface element) {
450 return new SpanMatchModifyQueryWrapper(element);
451 };
452
453 // split
454
455};