blob: 11616ea49866525adca067121aa0ecfbe3e4b9a3 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap;
2
Nils Diewaldf399a672013-11-18 17:55:22 +00003import de.ids_mannheim.korap.query.wrap.*;
Nils Diewald6d50c1f2013-12-04 20:14:08 +00004import de.ids_mannheim.korap.util.QueryException;
5
6import org.apache.lucene.search.spans.SpanQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +00007import org.apache.lucene.util.automaton.RegExp;
8
Nils Diewald6d50c1f2013-12-04 20:14:08 +00009import com.fasterxml.jackson.databind.ObjectMapper;
Nils Diewaldc925b492013-12-03 23:56:10 +000010import com.fasterxml.jackson.databind.JsonNode;
11
Nils Diewald6802acd2014-03-18 18:29:30 +000012import de.ids_mannheim.korap.query.SpanWithinQuery;
13
Nils Diewaldf399a672013-11-18 17:55:22 +000014import java.util.*;
Nils Diewald6d50c1f2013-12-04 20:14:08 +000015import java.io.*;
Nils Diewaldf399a672013-11-18 17:55:22 +000016
17import org.slf4j.Logger;
18import org.slf4j.LoggerFactory;
19
Nils Diewald8c221782013-12-13 19:52:58 +000020/*
21 TODO: Create Pre-filter while preparing a Query.
22 The pre-filter will contain a boolena query with all
23 necessary terms, supporting boolean OR, ignoring
24 negation terms (and negation subqueries), like
25 [base=Der]([base=alte]|[base=junge])[base=Mann & p!=ADJA]![base=war | base=lag]
26 Search for all documents containing "s:Der" and ("s:alte" or "s:junge") and "s:Mann"
Nils Diewald602c9222014-07-23 19:49:53 +000027
28 TODO: korap:reference doesn't work as expected:
29 - Check with
30 - focus(2:{1:[orth=der]{3:{2:[orth=Baum]}}})
31 - focus(3:startswith(<s>,{3:[tt/p=ART]{1:{2:[tt/p=ADJA]{3,4}}[tt/p=NN]}}))
32 - focus(3:endswith(<s>,{3:[tt/p=ART]{1:{2:[tt/p=ADJA]{3,4}}[tt/p=NN]}}))
33
Nils Diewald8a1fc012014-02-19 15:23:33 +000034*/
Nils Diewald8c221782013-12-13 19:52:58 +000035
Nils Diewaldf399a672013-11-18 17:55:22 +000036/**
37 * @author Nils Diewald
38 *
39 * KorapQuery implements a simple API for wrapping
Nils Diewald26087ea2013-12-05 16:51:30 +000040 * KorAP Lucene Index specific query classes.
Nils Diewaldf399a672013-11-18 17:55:22 +000041 */
42public class KorapQuery {
43 private String field;
Nils Diewald6d50c1f2013-12-04 20:14:08 +000044 private ObjectMapper json;
Nils Diewaldf399a672013-11-18 17:55:22 +000045
Nils Diewaldc86aa482014-02-12 16:58:05 +000046 // The default foundry for lemmata and pos
Nils Diewald164f8be2014-02-13 02:43:16 +000047 private String defaultFoundry = "mate/";
Nils Diewaldc6b78752013-12-05 19:05:12 +000048
Nils Diewaldf399a672013-11-18 17:55:22 +000049 // Logger
50 private final static Logger log = LoggerFactory.getLogger(KorapQuery.class);
51
Nils Diewald8c543432014-02-27 18:25:38 +000052 // This advices the java compiler to ignore all loggings
53 public static final boolean DEBUG = false;
54
Nils Diewald6802acd2014-03-18 18:29:30 +000055 public static final byte
56 OVERLAP = SpanWithinQuery.OVERLAP,
57 REAL_OVERLAP = SpanWithinQuery.REAL_OVERLAP,
58 WITHIN = SpanWithinQuery.WITHIN,
59 REAL_WITHIN = SpanWithinQuery.REAL_WITHIN,
60 ENDSWITH = SpanWithinQuery.ENDSWITH,
61 STARTSWITH = SpanWithinQuery.STARTSWITH,
62 MATCH = SpanWithinQuery.MATCH;
63
Nils Diewald1455e1e2014-08-01 16:12:43 +000064 private static final int MAX_CLASS_NUM = 127;
Nils Diewald8c543432014-02-27 18:25:38 +000065
Nils Diewaldf399a672013-11-18 17:55:22 +000066 /**
67 * Constructs a new base object for query generation.
68 * @param field The specific index field for the query.
69 */
70 public KorapQuery (String field) {
71 this.field = field;
Nils Diewald6d50c1f2013-12-04 20:14:08 +000072 this.json = new ObjectMapper();
Nils Diewaldf399a672013-11-18 17:55:22 +000073 };
74
Nils Diewald47f62e22014-07-24 14:51:38 +000075 /**
76 * Private class for korap:boundary objects
77 */
78 private class Boundary {
79 public int min, max;
80
81 public Boundary (JsonNode json, int defaultMin, int defaultMax) throws QueryException {
Nils Diewald1455e1e2014-08-01 16:12:43 +000082
Nils Diewald47f62e22014-07-24 14:51:38 +000083 if (!json.has("@type") ||
84 !json.get("@type").asText().equals("korap:boundary")) {
85 throw new QueryException("Boundary definition is not valid");
86 };
87
88 min = json.get("min").asInt(defaultMin);
89 max = json.get("max").asInt(defaultMax);
Nils Diewald1455e1e2014-08-01 16:12:43 +000090
91 if (DEBUG)
92 log.trace("Found korap:boundary with {}:{}");
Nils Diewald47f62e22014-07-24 14:51:38 +000093 };
94 };
95
Nils Diewald6d50c1f2013-12-04 20:14:08 +000096 public SpanQueryWrapperInterface fromJSON (String jsonString) throws QueryException {
97 JsonNode json;
98 try {
99 json = this.json.readValue(jsonString, JsonNode.class);
100 }
101 catch (IOException e) {
102 throw new QueryException(e.getMessage());
103 };
104
105 if (!json.has("@type") && json.has("query"))
106 json = json.get("query");
107
108 return this.fromJSON(json);
Nils Diewaldc925b492013-12-03 23:56:10 +0000109 };
110
111 // http://fasterxml.github.io/jackson-databind/javadoc/2.2.0/com/fasterxml/jackson/databind/JsonNode.html
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000112 // TODO: Exception messages are horrible!
Nils Diewald630811f2013-12-11 16:40:28 +0000113 // TODO: Use the shortcuts implemented in this class instead of the wrapper constructors
114 // TODO: Check for isArray()
Nils Diewaldc86aa482014-02-12 16:58:05 +0000115 // TODO: Rename this span context!
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000116 public SpanQueryWrapperInterface fromJSON (JsonNode json) throws QueryException {
117
Nils Diewald1455e1e2014-08-01 16:12:43 +0000118 int number = 0;
119
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000120 if (!json.has("@type")) {
121 throw new QueryException("JSON-LD group has no @type attribute");
122 };
123
Nils Diewaldc925b492013-12-03 23:56:10 +0000124 String type = json.get("@type").asText();
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000125
126 switch (type) {
127
128 case "korap:group":
129 SpanClassQueryWrapper classWrapper;
130
Nils Diewaldc86aa482014-02-12 16:58:05 +0000131 if (!json.has("operation"))
132 throw new QueryException("Group expects operation");
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000133
Nils Diewaldc86aa482014-02-12 16:58:05 +0000134 String operation = json.get("operation").asText();
Nils Diewaldc925b492013-12-03 23:56:10 +0000135
Nils Diewald1455e1e2014-08-01 16:12:43 +0000136 if (DEBUG)
137 log.trace("Found {} group", operation);
138
Nils Diewaldc86aa482014-02-12 16:58:05 +0000139 // Get all operands
140 JsonNode operands = json.get("operands");
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000141
Nils Diewald1455e1e2014-08-01 16:12:43 +0000142 if (DEBUG)
143 log.trace("Operands are {}", operands);
144
Nils Diewaldc86aa482014-02-12 16:58:05 +0000145 if (!json.has("operands") || !operands.isArray())
146 throw new QueryException("Operation needs operand list");
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000147
Nils Diewaldc86aa482014-02-12 16:58:05 +0000148 switch (operation) {
149
150 case "operation:or":
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000151
Nils Diewaldc925b492013-12-03 23:56:10 +0000152 SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
Nils Diewaldc86aa482014-02-12 16:58:05 +0000153 for (JsonNode operand : operands) {
Nils Diewaldc925b492013-12-03 23:56:10 +0000154 ssaq.or(this.fromJSON(operand));
155 };
156 return ssaq;
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000157
Nils Diewaldc86aa482014-02-12 16:58:05 +0000158 case "operation:position":
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000159
Nils Diewaldc86aa482014-02-12 16:58:05 +0000160 if (operands.size() != 2)
161 throw new QueryException("Operation needs exactly two operands");
162
163 // TODO: Check for operands
164
Nils Diewald1455e1e2014-08-01 16:12:43 +0000165 String frame = json.has("frame") ?
166 json.get("frame").asText() :
167 "frame:contains";
168
169 if (DEBUG)
170 log.trace("Position frame is '{}'", frame);
171
Nils Diewald6802acd2014-03-18 18:29:30 +0000172 byte flag = WITHIN;
Nils Diewaldc86aa482014-02-12 16:58:05 +0000173 switch (frame) {
174 case "frame:contains":
175 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000176 case "frame:strictlyContains":
177 flag = REAL_WITHIN;
178 break;
Nils Diewaldc86aa482014-02-12 16:58:05 +0000179 case "frame:within":
180 break;
181 case "frame:startswith":
Nils Diewald6802acd2014-03-18 18:29:30 +0000182 flag = STARTSWITH;
Nils Diewald26087ea2013-12-05 16:51:30 +0000183 break;
Nils Diewaldc86aa482014-02-12 16:58:05 +0000184 case "frame:endswith":
Nils Diewald6802acd2014-03-18 18:29:30 +0000185 flag = ENDSWITH;
Nils Diewald26087ea2013-12-05 16:51:30 +0000186 break;
Nils Diewaldc86aa482014-02-12 16:58:05 +0000187 case "frame:matches":
Nils Diewald6802acd2014-03-18 18:29:30 +0000188 flag = MATCH;
Nils Diewald26087ea2013-12-05 16:51:30 +0000189 break;
Nils Diewaldc86aa482014-02-12 16:58:05 +0000190 case "frame:overlaps":
Nils Diewald6802acd2014-03-18 18:29:30 +0000191 flag = OVERLAP;
192 break;
193 case "frame:strictlyOverlaps":
194 flag = REAL_OVERLAP;
195 break;
Nils Diewaldc86aa482014-02-12 16:58:05 +0000196 default:
197 throw new QueryException("Frame type unknown");
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000198 };
Nils Diewald4d183ea2013-12-05 02:51:38 +0000199
Nils Diewaldc86aa482014-02-12 16:58:05 +0000200 // Check for exclusion modificator
201 Boolean exclude;
202 if (json.has("exclude") && json.get("exclude").asBoolean())
Nils Diewald1455e1e2014-08-01 16:12:43 +0000203 throw new QueryException(
204 "Exclusion is currently not supported in position operations"
205 );
Nils Diewaldc86aa482014-02-12 16:58:05 +0000206
Nils Diewald26087ea2013-12-05 16:51:30 +0000207 return new SpanWithinQueryWrapper(
Nils Diewaldc86aa482014-02-12 16:58:05 +0000208 this.fromJSON(operands.get(0)),
209 this.fromJSON(operands.get(1)),
Nils Diewald26087ea2013-12-05 16:51:30 +0000210 flag
Nils Diewald6802acd2014-03-18 18:29:30 +0000211 );
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000212
Nils Diewaldc8160bd2014-06-20 12:22:31 +0000213 // TODO: This is DEPRECATED and should be communicated that way
Nils Diewaldc86aa482014-02-12 16:58:05 +0000214 case "operation:submatch":
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000215
Nils Diewaldc86aa482014-02-12 16:58:05 +0000216 if (operands.size() != 1)
217 throw new QueryException("Operation needs exactly two operands");
218
219 if (json.has("classRef")) {
220 if (json.has("classRefOp"))
221 throw new QueryException("Class reference operators not supported yet");
222
223 number = json.get("classRef").get(0).asInt();
224 }
225 else if (json.has("spanRef")) {
226 throw new QueryException("Span references not supported yet");
227 };
228
229 return new SpanMatchModifyQueryWrapper(
230 this.fromJSON(operands.get(0)), number
231 );
232
233 case "operation:sequence":
Nils Diewalddc8dc342014-07-25 13:38:50 +0000234
Nils Diewaldc86aa482014-02-12 16:58:05 +0000235 if (operands.size() < 2)
236 throw new QueryException(
237 "SpanSequenceQuery needs at least two operands"
238 );
239
Nils Diewald164f8be2014-02-13 02:43:16 +0000240 SpanSequenceQueryWrapper sseqqw = this.seq();
Nils Diewaldc86aa482014-02-12 16:58:05 +0000241 for (JsonNode operand : operands) {
242 sseqqw.append(this.fromJSON(operand));
243 };
Nils Diewald164f8be2014-02-13 02:43:16 +0000244
245 // Say if the operand order is important
246 if (json.has("inOrder"))
247 sseqqw.setInOrder(json.get("inOrder").asBoolean());
248
249 // Introduce distance constraints
250 if (json.has("distances")) {
251
252 // TODO
253 if (json.has("exclude") && json.get("exclude").asBoolean())
254 throw new QueryException(
255 "Excluding distance constraints are not supported yet"
256 );
257
258 // TEMPORARY: Workaround for group distances
259 JsonNode firstDistance = json.get("distances").get(0);
260 if (!firstDistance.has("@type"))
261 throw new QueryException("Distances need a defined @type");
262
263 JsonNode distances;
264 if (firstDistance.get("@type").asText().equals("korap:group"))
265 distances = firstDistance.get("operands");
266 else if (firstDistance.get("@type").asText().equals("korap:distance"))
267 distances = json.get("distances");
268 else
269 throw new QueryException("No valid distances defined");
270
271 for (JsonNode constraint : distances) {
272 String unit = "w";
273 if (constraint.has("key"))
274 unit = constraint.get("key").asText();
275
Nils Diewaldb2e13902014-07-24 17:08:37 +0000276 if (unit.equals("t"))
Nils Diewald1455e1e2014-08-01 16:12:43 +0000277 throw new QueryException(
278 "Text based distances are not supported yet"
279 );
Nils Diewaldb2e13902014-07-24 17:08:37 +0000280
Nils Diewald9b1efea2014-07-24 15:33:47 +0000281 int min, max;
282 if (constraint.has("boundary")) {
283 Boundary b = new Boundary(constraint.get("boundary"), 1,1);
284 min = b.min;
285 max = b.max;
286 }
287 else {
288 min = constraint.get("min").asInt(1);
289 max = constraint.get("max").asInt(1);
290 };
291
292 sseqqw.withConstraint(min, max, unit);
Nils Diewald164f8be2014-02-13 02:43:16 +0000293 };
294 };
295
296 // inOrder was set without a distance constraint
297 if (!sseqqw.isInOrder() && !sseqqw.hasConstraints()) {
298 sseqqw.withConstraint(1,1,"w");
299 };
300
Nils Diewaldc86aa482014-02-12 16:58:05 +0000301 return sseqqw;
302
303 case "operation:class":
Nils Diewald1455e1e2014-08-01 16:12:43 +0000304
Nils Diewaldc86aa482014-02-12 16:58:05 +0000305 if (json.has("class")) {
306 if (operands.size() != 1)
307 throw new QueryException(
308 "Class group expects exactly one operand in list"
309 );
Nils Diewald1455e1e2014-08-01 16:12:43 +0000310
311 if (DEBUG)
312 log.trace("Found Class definition for {}", json.get("class").asInt(0));
313
314 number = json.get("class").asInt(0);
315
316 if (number > MAX_CLASS_NUM)
317 throw new QueryException("Class numbers limited to " + MAX_CLASS_NUM);
318 return new SpanClassQueryWrapper(
319 this.fromJSON(operands.get(0)), number
320 );
Nils Diewaldc86aa482014-02-12 16:58:05 +0000321 };
322
323 throw new QueryException("Class group expects class attribute");
324
325 case "operation:repetition":
Nils Diewaldc86aa482014-02-12 16:58:05 +0000326
Nils Diewald1455e1e2014-08-01 16:12:43 +0000327 int min = 0;
328 int max = 100;
Nils Diewald47f62e22014-07-24 14:51:38 +0000329 if (json.has("boundary")) {
330 Boundary b = new Boundary(json.get("boundary"), 0, 100);
331 min = b.min;
332 max = b.max;
333 }
334 else {
Nils Diewald1455e1e2014-08-01 16:12:43 +0000335 if (json.has("min"))
336 min = json.get("min").asInt(0);
337 if (json.has("max"))
338 max = json.get("max").asInt(100);
339
340 if (DEBUG)
341 log.trace(
342 "Boundary is set by deprecated {}-{}",
343 min,
344 max);
Nils Diewald47f62e22014-07-24 14:51:38 +0000345 };
Nils Diewaldee4a6b72014-06-30 18:23:12 +0000346
347 // Sanitize max
348 if (max < 0)
349 max = 100;
350 else if (max > 100)
351 max = 100;
352
353 // Sanitize min
354 if (min < 0)
355 min = 0;
356 else if (min > 100)
Nils Diewald47f62e22014-07-24 14:51:38 +0000357 min = 100;
358
Nils Diewaldee4a6b72014-06-30 18:23:12 +0000359 // Check relation between min and max
360 if (min > max)
Nils Diewald1455e1e2014-08-01 16:12:43 +0000361 max = max;
Nils Diewalddc8dc342014-07-25 13:38:50 +0000362
Nils Diewaldee4a6b72014-06-30 18:23:12 +0000363 return new SpanRepetitionQueryWrapper(
364 this.fromJSON(operands.get(0)), min, max
365 );
Nils Diewaldc925b492013-12-03 23:56:10 +0000366 };
Nils Diewaldc86aa482014-02-12 16:58:05 +0000367
368 throw new QueryException("Unknown group operation");
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000369
Nils Diewaldee4a6b72014-06-30 18:23:12 +0000370 case "korap:reference":
Nils Diewald1455e1e2014-08-01 16:12:43 +0000371 if (json.has("operation") &&
372 !json.get("operation").asText().equals("operation:focus"))
373 throw new QueryException("Reference operation " +
374 json.get("operation").asText() +
375 " not supported yet");
Nils Diewaldee4a6b72014-06-30 18:23:12 +0000376
377 operands = json.get("operands");
378
Nils Diewald1455e1e2014-08-01 16:12:43 +0000379 if (operands.size() == 0) {
Nils Diewaldee4a6b72014-06-30 18:23:12 +0000380 throw new QueryException("Focus with peripheral references is not supported yet");
Nils Diewald1455e1e2014-08-01 16:12:43 +0000381 };
Nils Diewaldee4a6b72014-06-30 18:23:12 +0000382
383 if (operands.size() != 1)
384 throw new QueryException("Operation needs exactly two operands");
385
386
387 if (json.has("classRef")) {
388 if (json.has("classRefOp"))
389 throw new QueryException("Class reference operators not supported yet");
390
391 number = json.get("classRef").get(0).asInt();
Nils Diewald1455e1e2014-08-01 16:12:43 +0000392
393 if (number > MAX_CLASS_NUM)
394 throw new QueryException("Class numbers limited to " + MAX_CLASS_NUM);
395
Nils Diewaldee4a6b72014-06-30 18:23:12 +0000396 }
397 else if (json.has("spanRef")) {
398 throw new QueryException("Span references not supported yet");
399 };
400
Nils Diewald1455e1e2014-08-01 16:12:43 +0000401 if (DEBUG)
402 log.trace("Wrap class reference {}", number);
403
Nils Diewaldee4a6b72014-06-30 18:23:12 +0000404 return new SpanMatchModifyQueryWrapper(
405 this.fromJSON(operands.get(0)), number
406 );
407
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000408 case "korap:token":
Nils Diewaldc86aa482014-02-12 16:58:05 +0000409 if (!json.has("wrap"))
Nils Diewald602c9222014-07-23 19:49:53 +0000410 throw new QueryException("Empty Tokens are not supported yet");
Nils Diewaldc925b492013-12-03 23:56:10 +0000411
Nils Diewaldc86aa482014-02-12 16:58:05 +0000412 return this._segFromJSON(json.get("wrap"));
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000413
Nils Diewaldc86aa482014-02-12 16:58:05 +0000414 case "korap:span":
415 if (!json.has("key"))
Nils Diewald602c9222014-07-23 19:49:53 +0000416 throw new QueryException("A span needs at least a key definition");
Nils Diewald4d183ea2013-12-05 02:51:38 +0000417
Nils Diewaldc86aa482014-02-12 16:58:05 +0000418 return this._termFromJSON(json);
Nils Diewald6d50c1f2013-12-04 20:14:08 +0000419 };
420 throw new QueryException("Unknown serialized query type: " + type);
Nils Diewaldc925b492013-12-03 23:56:10 +0000421 };
422
Nils Diewaldf399a672013-11-18 17:55:22 +0000423
Nils Diewaldc86aa482014-02-12 16:58:05 +0000424
Nils Diewald4d183ea2013-12-05 02:51:38 +0000425 private SpanQueryWrapperInterface _segFromJSON (JsonNode json) throws QueryException {
426 String type = json.get("@type").asText();
Nils Diewald1455e1e2014-08-01 16:12:43 +0000427
428 if (DEBUG)
429 log.trace("Wrap new token definition by {}", type);
430
Nils Diewald4d183ea2013-12-05 02:51:38 +0000431 switch (type) {
Nils Diewald26087ea2013-12-05 16:51:30 +0000432
Nils Diewald4d183ea2013-12-05 02:51:38 +0000433 case "korap:term":
Nils Diewaldc86aa482014-02-12 16:58:05 +0000434 String match = "match:eq";
435 if (json.has("match"))
436 match = json.get("match").asText();
Nils Diewald26087ea2013-12-05 16:51:30 +0000437
Nils Diewaldc86aa482014-02-12 16:58:05 +0000438 switch (match) {
439 case "match:ne":
Nils Diewald1455e1e2014-08-01 16:12:43 +0000440 if (DEBUG)
441 log.trace("Term is negated");
442 SpanSegmentQueryWrapper ssqw =
443 (SpanSegmentQueryWrapper) this._termFromJSON(json);
444 ssqw.makeNegative();
445 return this.seg().without(ssqw);
Nils Diewaldc86aa482014-02-12 16:58:05 +0000446 case "match:eq":
447 return this._termFromJSON(json);
Nils Diewald4d183ea2013-12-05 02:51:38 +0000448 };
Nils Diewald26087ea2013-12-05 16:51:30 +0000449
Nils Diewaldc86aa482014-02-12 16:58:05 +0000450 throw new QueryException("Match relation unknown");
451
452 case "korap:termGroup":
453
454 if (!json.has("operands"))
455 throw new QueryException("TermGroup expects operands");
456
457 SpanSegmentQueryWrapper ssegqw = this.seg();
458
459 if (!json.has("relation"))
460 throw new QueryException("termGroup expects a relation");
461
Nils Diewald4d183ea2013-12-05 02:51:38 +0000462 switch (json.get("relation").asText()) {
Nils Diewaldc86aa482014-02-12 16:58:05 +0000463 case "relation:and":
464
Nils Diewald4d183ea2013-12-05 02:51:38 +0000465 for (JsonNode operand : json.get("operands")) {
466 SpanQueryWrapperInterface part = this._segFromJSON(operand);
467 if (part instanceof SpanAlterQueryWrapper) {
468 ssegqw.with((SpanAlterQueryWrapper) part);
469 }
470 else if (part instanceof SpanRegexQueryWrapper) {
471 ssegqw.with((SpanRegexQueryWrapper) part);
472 }
473 else if (part instanceof SpanSegmentQueryWrapper) {
474 ssegqw.with((SpanSegmentQueryWrapper) part);
475 }
476 else {
477 throw new QueryException("Object not supported in segment queries");
478 };
479 };
480 return ssegqw;
Nils Diewaldc86aa482014-02-12 16:58:05 +0000481
482 case "relation:or":
Nils Diewald26087ea2013-12-05 16:51:30 +0000483 SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
484 for (JsonNode operand : json.get("operands")) {
485 ssaq.or(this._segFromJSON(operand));
486 };
487 return ssaq;
Nils Diewald4d183ea2013-12-05 02:51:38 +0000488 };
Nils Diewaldc86aa482014-02-12 16:58:05 +0000489 };
490 throw new QueryException("Unknown token type");
Nils Diewald4d183ea2013-12-05 02:51:38 +0000491 };
Nils Diewald4d183ea2013-12-05 02:51:38 +0000492
493
494
Nils Diewaldc86aa482014-02-12 16:58:05 +0000495 private SpanQueryWrapperInterface _termFromJSON (JsonNode json) throws QueryException {
496 if (!json.has("key") || json.get("key").asText().length() < 1)
497 throw new QueryException("Terms and spans have to provide key attributes");
498
499 Boolean isTerm = json.get("@type").asText().equals("korap:term") ? true : false;
500 Boolean isCaseInsensitive = false;
501
502 if (json.has("caseInsensitive") && json.get("caseInsensitive").asBoolean())
503 isCaseInsensitive = true;
504
Nils Diewald164f8be2014-02-13 02:43:16 +0000505 StringBuilder value = new StringBuilder();
Nils Diewaldc86aa482014-02-12 16:58:05 +0000506
507 // expect orth? expect lemma?
508 // s:den | i:den | cnx/l:die | mate/m:mood:ind | cnx/syn:@PREMOD |
509 // mate/m:number:sg | opennlp/p:ART
510
511 if (json.has("foundry") && json.get("foundry").asText().length() > 0)
512 value.append(json.get("foundry").asText()).append('/');
513
514 // value.append(defaultFoundry).append('/');
515
516 if (json.has("layer") && json.get("layer").asText().length() > 0) {
517 String layer = json.get("layer").asText();
518 switch (layer) {
Nils Diewald164f8be2014-02-13 02:43:16 +0000519
Nils Diewaldc86aa482014-02-12 16:58:05 +0000520 case "lemma":
521 layer = "l";
522 break;
Nils Diewald164f8be2014-02-13 02:43:16 +0000523
Nils Diewaldc86aa482014-02-12 16:58:05 +0000524 case "pos":
525 layer = "p";
526 break;
Nils Diewald164f8be2014-02-13 02:43:16 +0000527
Nils Diewaldc86aa482014-02-12 16:58:05 +0000528 case "orth":
529 layer = "s";
530 break;
531 };
532
533 if (isCaseInsensitive && isTerm && layer.equals("s"))
534 layer = "i";
535
Nils Diewald164f8be2014-02-13 02:43:16 +0000536
537 // TEMPORARY
538 if (value.length() == 0 && (layer.equals("l") || layer.equals("p")))
539 value.append(defaultFoundry);
540
Nils Diewaldc86aa482014-02-12 16:58:05 +0000541 value.append(layer).append(':');
542 };
543
544 if (json.has("key") && json.get("key").asText().length() > 0) {
545 String key = json.get("key").asText();
546 value.append(isCaseInsensitive ? key.toLowerCase() : key);
547 };
548
549 // Regular expression or wildcard
550 if (isTerm && json.has("type")) {
551 switch (json.get("type").asText()) {
552 case "type:regex":
553 return this.seg(this.re(value.toString(), isCaseInsensitive));
554 case "type:wildcard":
555 return this.seq(this.wc(value.toString(), isCaseInsensitive));
556 };
557 };
558
559 if (json.has("value") && json.get("value").asText().length() > 0)
560 value.append(':').append(json.get("value").asText());
561
562 if (isTerm)
563 return this.seg(value.toString());
564
565 if (json.has("attr"))
566 throw new QueryException("Attributes not yet supported in spans");
567
568 return this.tag(value.toString());
569 };
570
571
Nils Diewaldb0dd9552013-12-20 02:28:34 +0000572 // SpanRegexQueryWrapper
Nils Diewaldf399a672013-11-18 17:55:22 +0000573 /**
574 * Create a query object based on a regular expression.
575 * @param re The regular expession as a string.
576 */
577 public SpanRegexQueryWrapper re (String re) {
578 return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL, false);
579 };
580
581 /**
582 * Create a query object based on a regular expression.
583 * @param re The regular expession as a string.
584 * @param flas The regular expession flag as an integer.
585 */
586 public SpanRegexQueryWrapper re (String re, int flags) {
587 return new SpanRegexQueryWrapper(this.field, re, flags, false);
588 };
589
Nils Diewaldf399a672013-11-18 17:55:22 +0000590 /**
591 * Create a query object based on a regular expression.
592 * @param re The regular expession as a string.
593 * @param flag The regular expession flag.
594 * @param caseinsensitive A boolean value indicating case insensitivity.
595 */
596 public SpanRegexQueryWrapper re (String re, int flags, boolean caseinsensitive) {
597 return new SpanRegexQueryWrapper(this.field, re, flags, caseinsensitive);
598 };
599
Nils Diewaldf399a672013-11-18 17:55:22 +0000600 /**
601 * Create a query object based on a regular expression.
602 * @param re The regular expession as a string.
603 * @param caseinsensitive A boolean value indicating case insensitivity.
604 */
605 public SpanRegexQueryWrapper re (String re, boolean caseinsensitive) {
606 return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL, caseinsensitive);
607 };
608
Nils Diewaldb0dd9552013-12-20 02:28:34 +0000609 // SpanWildcardQueryWrapper
610 /**
611 * Create a query object based on a wildcard term.
612 * @param wc The wildcard term as a string.
613 */
614 public SpanWildcardQueryWrapper wc (String wc) {
615 return new SpanWildcardQueryWrapper(this.field, wc, false);
616 };
617
618 /**
619 * Create a query object based on a wildcard term.
620 * @param wc The wildcard term as a string.
621 * @param caseinsensitive A boolean value indicating case insensitivity.
622 */
623 public SpanWildcardQueryWrapper wc (String wc, boolean caseinsensitive) {
624 return new SpanWildcardQueryWrapper(this.field, wc, caseinsensitive);
625 };
626
Nils Diewaldf399a672013-11-18 17:55:22 +0000627
628 // SpanSegmentQueries
629 /**
630 * Create a segment query object.
631 */
632 public SpanSegmentQueryWrapper seg () {
633 return new SpanSegmentQueryWrapper(this.field);
634 };
635
636
637 /**
638 * Create a segment query object.
639 * @param terms[] An array of terms, the segment consists of.
640 */
641 public SpanSegmentQueryWrapper seg (SpanRegexQueryWrapper ... terms) {
642 SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
643 for (SpanRegexQueryWrapper t : terms) {
644 ssq.with(t);
645 };
646 return ssq;
647 };
648
649 public SpanSegmentQueryWrapper seg (SpanAlterQueryWrapper ... terms) {
650 SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
651 for (SpanAlterQueryWrapper t : terms) {
652 ssq.with(t);
653 };
654 return ssq;
655 };
656
657 public SpanSegmentQueryWrapper seg (String ... terms) {
658 SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper(this.field);
659 for (String t : terms) {
660 ssq.with(t);
661 };
662 return ssq;
663 };
664
665 // SpanSegmentAlterQueries
666 /**
667 * Create a segment alternation query object.
668 * @param terms[] An array of alternative terms.
669 */
670 public SpanAlterQueryWrapper or (SpanQueryWrapperInterface ... terms) {
671 SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
672 for (SpanQueryWrapperInterface t : terms) {
673 ssaq.or(t);
674 };
675 return ssaq;
676 };
677
678 public SpanAlterQueryWrapper or (String ... terms) {
679 SpanAlterQueryWrapper ssaq = new SpanAlterQueryWrapper(this.field);
680 for (String t : terms) {
681 ssaq.or(t);
682 };
683 return ssaq;
684 };
685
686
687 // SpanSegmentSequenceQueries
688 /**
689 * Create a sequence of segments query object.
690 */
691 public SpanSequenceQueryWrapper seq () {
692 return new SpanSequenceQueryWrapper(this.field);
693 };
694
695
696 /**
697 * Create a sequence of segments query object.
698 * @param terms[] An array of segment defining terms.
699 */
700 public SpanSequenceQueryWrapper seq (SpanQueryWrapperInterface ... terms) {
701 SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper(this.field);
702 for (SpanQueryWrapperInterface t : terms) {
703 sssq.append(t);
704 };
705 return sssq;
706 };
707
708
709 /**
710 * Create a sequence of segments query object.
711 * @param re A SpanSegmentRegexQuery, starting the sequence.
712 */
713 public SpanSequenceQueryWrapper seq (SpanRegexQueryWrapper re) {
714 return new SpanSequenceQueryWrapper(this.field, re);
715 };
716
717
718 public SpanSequenceQueryWrapper seq (Object ... terms) {
719 SpanSequenceQueryWrapper ssq = new SpanSequenceQueryWrapper(this.field);
720 for (Object t : terms) {
721 if (t instanceof SpanQueryWrapperInterface) {
722 ssq.append((SpanQueryWrapperInterface) t);
723 }
724 else if (t instanceof SpanRegexQueryWrapper) {
725 ssq.append((SpanRegexQueryWrapper) t);
726 }
727 else {
728 log.error("{} is not an acceptable parameter for seq()", t.getClass());
729 return ssq;
730 };
731 };
732 return ssq;
733 };
734
735 public SpanElementQueryWrapper tag (String element) {
736 return new SpanElementQueryWrapper(this.field, element);
737 };
738
739 /**
740 * Create a wrapping within query object.
741 * @param element A SpanQuery.
742 * @param embedded A SpanQuery that is wrapped in the element.
743 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000744 @Deprecated
Nils Diewaldf399a672013-11-18 17:55:22 +0000745 public SpanWithinQueryWrapper within (SpanQueryWrapperInterface element,
746 SpanQueryWrapperInterface embedded) {
747 return new SpanWithinQueryWrapper(element, embedded);
748 };
749
Nils Diewald6802acd2014-03-18 18:29:30 +0000750 public SpanWithinQueryWrapper contains (SpanQueryWrapperInterface element,
751 SpanQueryWrapperInterface embedded) {
752 return new SpanWithinQueryWrapper(element, embedded, WITHIN);
753 };
754
755 public SpanWithinQueryWrapper startswith (SpanQueryWrapperInterface element,
756 SpanQueryWrapperInterface embedded) {
757 return new SpanWithinQueryWrapper(element, embedded, STARTSWITH);
758 };
759
760 public SpanWithinQueryWrapper endswith (SpanQueryWrapperInterface element,
761 SpanQueryWrapperInterface embedded) {
762 return new SpanWithinQueryWrapper(element, embedded, ENDSWITH);
763 };
764
765 public SpanWithinQueryWrapper overlaps (SpanQueryWrapperInterface element,
766 SpanQueryWrapperInterface embedded) {
767 return new SpanWithinQueryWrapper(element, embedded, OVERLAP);
768 };
769
770 public SpanWithinQueryWrapper matches (SpanQueryWrapperInterface element,
771 SpanQueryWrapperInterface embedded) {
772 return new SpanWithinQueryWrapper(element, embedded, MATCH);
773 };
774
Nils Diewaldf399a672013-11-18 17:55:22 +0000775 // Class
776 public SpanClassQueryWrapper _ (byte number, SpanQueryWrapperInterface element) {
777 return new SpanClassQueryWrapper(element, number);
778 };
779
780 public SpanClassQueryWrapper _ (int number, SpanQueryWrapperInterface element) {
781 return new SpanClassQueryWrapper(element, number);
782 };
783
784 public SpanClassQueryWrapper _ (short number, SpanQueryWrapperInterface element) {
785 return new SpanClassQueryWrapper(element, number);
786 };
787
788 public SpanClassQueryWrapper _ (SpanQueryWrapperInterface element) {
789 return new SpanClassQueryWrapper(element);
790 };
791
792 // MatchModify
793 public SpanMatchModifyQueryWrapper shrink (byte number, SpanQueryWrapperInterface element) {
794 return new SpanMatchModifyQueryWrapper(element, number);
795 };
796
797 public SpanMatchModifyQueryWrapper shrink (int number, SpanQueryWrapperInterface element) {
798 return new SpanMatchModifyQueryWrapper(element, number);
799 };
800
801 public SpanMatchModifyQueryWrapper shrink (short number, SpanQueryWrapperInterface element) {
802 return new SpanMatchModifyQueryWrapper(element, number);
803 };
804
805 public SpanMatchModifyQueryWrapper shrink (SpanQueryWrapperInterface element) {
806 return new SpanMatchModifyQueryWrapper(element);
807 };
808
Nils Diewaldee4a6b72014-06-30 18:23:12 +0000809 // Repetition
810 public SpanRepetitionQueryWrapper repeat (SpanQueryWrapperInterface element, int exact) {
811 return new SpanRepetitionQueryWrapper(element, exact);
812 };
813
814 public SpanRepetitionQueryWrapper repeat (SpanQueryWrapperInterface element, int min, int max) {
815 return new SpanRepetitionQueryWrapper(element, min, max);
816 };
817
818
Nils Diewaldf399a672013-11-18 17:55:22 +0000819 // split
Nils Diewaldf399a672013-11-18 17:55:22 +0000820};