blob: e0ac4eb6640eb00fba22819dea1c2f7dc3d05d13 [file] [log] [blame]
Joachim Bingel4b405f52013-11-15 15:29:30 +00001package de.ids_mannheim.korap.query.serialize;
2
Joachim Bingel6003b852014-12-18 14:20:55 +00003import de.ids_mannheim.korap.query.parse.poliqarpplus.PoliqarpPlusLexer;
4import de.ids_mannheim.korap.query.parse.poliqarpplus.PoliqarpPlusParser;
Joachim Bingel3fa584b2014-12-17 13:35:43 +00005import de.ids_mannheim.korap.query.serialize.util.Antlr4DescriptiveErrorListener;
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +00006import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator;
Joachim Bingel3fa584b2014-12-17 13:35:43 +00007import de.ids_mannheim.korap.query.serialize.util.StatusCodes;
Joachim Bingel3fa584b2014-12-17 13:35:43 +00008
Michael Hanld8116e52014-04-25 20:31:29 +00009import org.antlr.v4.runtime.*;
10import org.antlr.v4.runtime.tree.ParseTree;
Michael Hanl27e50582013-12-07 18:04:13 +000011import org.slf4j.Logger;
12import org.slf4j.LoggerFactory;
13
Michael Hanld8116e52014-04-25 20:31:29 +000014import java.lang.reflect.Method;
15import java.util.*;
16
Joachim Bingel4b405f52013-11-15 15:29:30 +000017/**
18 * Map representation of Poliqarp syntax tree as returned by ANTLR
Joachim Bingel4b405f52013-11-15 15:29:30 +000019 *
Joachim Bingel3fa584b2014-12-17 13:35:43 +000020 * @author Joachim Bingel (bingel@ids-mannheim.de)
Joachim Bingel4b405f52013-11-15 15:29:30 +000021 */
Joachim Bingel1faf8a52015-01-09 13:17:34 +000022public class PoliqarpPlusQueryProcessor extends Antlr4AbstractQueryProcessor {
Michael Hanl27e50582013-12-07 18:04:13 +000023
Joachim Bingel20e06ac2015-01-15 10:31:33 +000024 private static Logger log = LoggerFactory
25 .getLogger(PoliqarpPlusQueryProcessor.class);
Joachim Bingel5fd09322015-01-29 14:01:30 +000026 private int classCounter = 1;
Joachim Bingel4b405f52013-11-15 15:29:30 +000027
Joachim Bingel20e06ac2015-01-15 10:31:33 +000028 /**
29 * Most centrally, this class maintains a set of nested maps and
30 * lists which represent the JSON tree, which is built by the JSON
31 * serialiser on basis of the {@link #requestMap} at the root of
32 * the tree. <br/>
33 * The class further maintains a set of stacks which effectively
34 * keep track of which objects to embed in which containing
35 * objects.
36 *
37 * @param query
38 * The syntax tree as returned by ANTLR
39 * @throws QueryException
40 */
41 public PoliqarpPlusQueryProcessor (String query) {
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +000042 KoralObjectGenerator.setQueryProcessor(this);
Joachim Bingel20e06ac2015-01-15 10:31:33 +000043 process(query);
44 log.info(">>> " + requestMap.get("query") + " <<<");
45 }
Michael Hanld8116e52014-04-25 20:31:29 +000046
Joachim Bingel20e06ac2015-01-15 10:31:33 +000047 @Override
48 public void process(String query) {
49 ParseTree tree;
50 tree = parsePoliqarpQuery(query);
51 super.parser = this.parser;
52 log.info("Processing PoliqarpPlus query: " + query);
53 if (tree != null) {
54 log.debug("ANTLR parse tree: " + tree.toStringTree(parser));
55 processNode(tree);
56 }
57 else {
58 addError(StatusCodes.MALFORMED_QUERY, "Could not parse query >>> "
59 + query + " <<<.");
60 }
61 }
Joachim Bingel4b405f52013-11-15 15:29:30 +000062
Joachim Bingel20e06ac2015-01-15 10:31:33 +000063 /**
64 * Recursively calls itself with the children of the currently
65 * active node, traversing the tree nodes in a top-down,
66 * depth-first fashion. A list is maintained that contains all
67 * visited nodes which have been directly addressed by their
68 * (grand-/grand-grand-/...) parent nodes, such that some
69 * processing time is saved, as these node will not be processed.
70 * This method is effectively a list of if-statements that are
71 * responsible for treating the different node types correctly and
72 * filling the respective maps/lists.
73 *
74 * @param node
75 * The currently processed node. The process(String
76 * query) method calls this method with the root.
77 * @throws QueryException
78 */
79 private void processNode(ParseTree node) {
80 // Top-down processing
81 if (visited.contains(node))
82 return;
83 else
84 visited.add(node);
Michael Hanld8116e52014-04-25 20:31:29 +000085
Joachim Bingel20e06ac2015-01-15 10:31:33 +000086 String nodeCat = getNodeCat(node);
87 openNodeCats.push(nodeCat);
Michael Hanld8116e52014-04-25 20:31:29 +000088
Joachim Bingel20e06ac2015-01-15 10:31:33 +000089 stackedObjects = 0;
Michael Hanld8116e52014-04-25 20:31:29 +000090
Joachim Bingel20e06ac2015-01-15 10:31:33 +000091 if (verbose) {
92 System.err.println(" " + objectStack);
93 System.out.println(openNodeCats);
94 }
Michael Hanld8116e52014-04-25 20:31:29 +000095
Joachim Bingel20e06ac2015-01-15 10:31:33 +000096 /*
97 * ***************************************************************
98 * ***************************************************************
99 * *********** Processing individual node categories *************
100 * ***************************************************************
101 * ***************************************************************
102 */
Joachim Bingel832800e2014-10-17 14:46:39 +0000103
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000104 if (nodeCat.equals("segment")) {
105 processSegment(node);
106 }
Joachim Bingel832800e2014-10-17 14:46:39 +0000107
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000108 if (nodeCat.equals("sequence")) {
109 processSequence(node);
110 }
Joachim Bingel832800e2014-10-17 14:46:39 +0000111
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000112 if (nodeCat.equals("emptyTokenSequence")) {
113 processEmptyTokenSequence(node);
114 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000115
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000116 if (nodeCat.equals("emptyTokenSequenceClass")) {
117 processEmptyTokenSequenceClass(node);
118 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000119
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000120 if (nodeCat.equals("token")) {
121 processToken(node);
122 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000123
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000124 if (nodeCat.equals("alignment")) {
125 processAlignment(node);
126 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000127
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000128 if (nodeCat.equals("span")) {
129 processSpan(node);
130 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000131
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000132 if (nodeCat.equals("disjunction")) {
133 processDisjunction(node);
134 }
Joachim Bingel832800e2014-10-17 14:46:39 +0000135
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000136 if (nodeCat.equals("position")) {
137 processPosition(node);
138 }
Michael Hanld8116e52014-04-25 20:31:29 +0000139
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000140 if (nodeCat.equals("relation")) {
141 processRelation(node);
142 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000143
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000144 if (nodeCat.equals("spanclass")) {
145 processSpanclass(node);
146 }
Joachim Bingel832800e2014-10-17 14:46:39 +0000147
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000148 if (nodeCat.equals("matching")) {
149 processMatching(node);
150 }
Michael Hanld8116e52014-04-25 20:31:29 +0000151
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000152 if (nodeCat.equals("submatch")) {
153 processSubmatch(node);
154 }
Michael Hanld8116e52014-04-25 20:31:29 +0000155
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000156 if (nodeCat.equals("meta")) {
157 processMeta(node);
158 }
Joachim Bingel1f8f3782015-01-19 17:58:41 +0000159
160// if (nodeCat.equals("term") || nodeCat.equals("termGroup")) {
161// if (inMeta ) putIntoSuperObject(parseTermOrTermGroup(node, false));
162// }
Michael Hanld8116e52014-04-25 20:31:29 +0000163
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000164 if (nodeCat.equals("within")
165 && !getNodeCat(node.getParent()).equals("position")) {
166 processWithin(node);
167 }
Michael Hanld8116e52014-04-25 20:31:29 +0000168
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000169 objectsToPop.push(stackedObjects);
Joachim Bingel0900a892014-06-30 16:26:21 +0000170
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000171 /*
172 * ***************************************************************
173 * ***************************************************************
174 * recursion until 'request' node (root of tree) is processed
175 * *
176 * ***********************************************************
177 * ****
178 * ********************************************************
179 * *******
180 */
181 for (int i = 0; i < node.getChildCount(); i++) {
182 ParseTree child = node.getChild(i);
183 processNode(child);
184 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000185
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000186 // Stuff that happens when leaving a node (taking items off
187 // the stacks)
188 for (int i = 0; i < objectsToPop.get(0); i++) {
189 objectStack.pop();
190 }
191 objectsToPop.pop();
192 openNodeCats.pop();
193 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000194
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000195 private void processSegment(ParseTree node) {
196 // Cover possible quantification (i.e. repetition) of segment
197 ParseTree quantification = getFirstChildWithCat(node, "repetition");
198 if (quantification != null) {
199 LinkedHashMap<String, Object> quantGroup =
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000200 KoralObjectGenerator.makeGroup("repetition");
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000201 Integer[] minmax = parseRepetition(quantification);
202 quantGroup.put("boundary",
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000203 KoralObjectGenerator.makeBoundary(minmax[0], minmax[1]));
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000204 putIntoSuperObject(quantGroup);
205 objectStack.push(quantGroup);
206 stackedObjects++;
207 }
208 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000209
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000210 private void processSequence(ParseTree node) {
Joachim Bingel07ef0422015-01-30 16:05:38 +0000211 // skipe in case of emptyTokenSequence or emptyTokenSequenceClass
212 if (node.getChildCount() == 1 &&
213 getNodeCat(node.getChild(0)).startsWith("emptyTokenSequence")) {
214 return;
215 }
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000216 LinkedHashMap<String, Object> sequence =
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000217 KoralObjectGenerator.makeGroup("sequence");
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000218 ParseTree distanceNode = getFirstChildWithCat(node, "distance");
Joachim Bingel84395b22014-12-18 10:46:18 +0000219
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000220 if (distanceNode != null) {
221 Integer[] minmax = parseDistance(distanceNode);
222 LinkedHashMap<String, Object> distance =
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000223 KoralObjectGenerator.makeDistance("w", minmax[0], minmax[1]);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000224 sequence.put("inOrder", true);
225 ArrayList<Object> distances = new ArrayList<Object>();
226 distances.add(distance);
227 sequence.put("distances", distances);
228 // don't re-visit the emptyTokenSequence node
229 visited.add(distanceNode.getChild(0));
230 }
231 putIntoSuperObject(sequence);
232 objectStack.push(sequence);
233 stackedObjects++;
234 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000235
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000236 @SuppressWarnings("unchecked")
237 /**
238 * empty tokens at beginning/end of sequence
239 * @param node
240 */
241 private void processEmptyTokenSequence(ParseTree node) {
242 Integer[] minmax = parseEmptySegments(node);
243 // object will be either a repetition group or a single empty
244 // token
245 LinkedHashMap<String, Object> object;
246 LinkedHashMap<String, Object> emptyToken =
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000247 KoralObjectGenerator.makeToken();
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000248 if (minmax[0] != 1 || minmax[1] == null || minmax[1] != 1) {
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000249 object = KoralObjectGenerator.makeRepetition(minmax[0], minmax[1]);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000250 ((ArrayList<Object>) object.get("operands")).add(emptyToken);
251 }
252 else {
253 object = emptyToken;
254 }
255 putIntoSuperObject(object);
256 objectStack.push(object);
257 stackedObjects++;
258 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000259
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000260 private void processEmptyTokenSequenceClass(ParseTree node) {
261 int classId = 1;
262 if (hasChild(node, "spanclass_id")) {
263 classId = Integer.parseInt(node.getChild(1).getChild(0)
264 .toStringTree(parser));
265 }
266 LinkedHashMap<String, Object> classGroup =
Joachim Bingel5fd09322015-01-29 14:01:30 +0000267 KoralObjectGenerator.makeSpanClass(classId);
268 addHighlightClass(classId);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000269 putIntoSuperObject(classGroup);
270 objectStack.push(classGroup);
271 stackedObjects++;
272 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000273
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000274 private void processToken(ParseTree node) {
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000275 LinkedHashMap<String, Object> token = KoralObjectGenerator.makeToken();
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000276 // handle negation
277 List<ParseTree> negations = getChildrenWithCat(node, "!");
278 boolean negated = false;
279 boolean isRegex = false;
280 if (negations.size() % 2 == 1)
281 negated = true;
282 if (getNodeCat(node.getChild(0)).equals("key")) {
283 // no 'term' child, but direct key specification: process here
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000284 LinkedHashMap<String, Object> term = KoralObjectGenerator.makeTerm();
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000285 String key = node.getChild(0).getText();
286 if (getNodeCat(node.getChild(0).getChild(0)).equals("regex")) {
287 isRegex = true;
288 term.put("type", "type:regex");
289 key = key.substring(1, key.length() - 1);
290 }
291 term.put("layer", "orth");
292 term.put("key", key);
293 String matches = negated ? "ne" : "eq";
294 term.put("match", "match:" + matches);
295 ParseTree flagNode = getFirstChildWithCat(node, "flag");
296 if (flagNode != null) {
297 // substring removes leading slash '/'
298 String flag = getNodeCat(flagNode.getChild(0)).substring(1);
299 if (flag.contains("i"))
300 term.put("caseInsensitive", true);
301 else if (flag.contains("I"))
302 term.put("caseInsensitive", false);
303 if (flag.contains("x")) {
304 term.put("type", "type:regex");
305 if (!isRegex) {
306 key = QueryUtils.escapeRegexSpecialChars(key);
307 }
308 // overwrite key
309 term.put("key", ".*?" + key + ".*?");
310 }
311 }
312 token.put("wrap", term);
313 }
314 else {
315 // child is 'term' or 'termGroup' -> process in extra method
316 LinkedHashMap<String, Object> termOrTermGroup =
317 parseTermOrTermGroup(node.getChild(1), negated);
318 token.put("wrap", termOrTermGroup);
319 }
320 putIntoSuperObject(token);
321 visited.add(node.getChild(0));
322 visited.add(node.getChild(2));
323 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000324
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000325 @SuppressWarnings("unchecked")
326 private void processAlignment(ParseTree node) {
327 LinkedHashMap<String, Object> alignClass =
Joachim Bingel5fd09322015-01-29 14:01:30 +0000328 KoralObjectGenerator.makeSpanClass(classCounter);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000329 LinkedHashMap<String, Object> metaMap =
330 (LinkedHashMap<String, Object>) requestMap.get("meta");
331 if (metaMap.containsKey("alignment")) {
332 ArrayList<Integer> alignedClasses = new ArrayList<Integer>();
333 try {
334 alignedClasses = (ArrayList<Integer>) metaMap.get("alignment");
335 }
336 catch (ClassCastException cce) {
337 alignedClasses.add((Integer) metaMap.get("alignment"));
338 }
339 alignedClasses.add(classCounter);
340 metaMap.put("alignment", alignedClasses);
341 }
342 else {
343 metaMap.put("alignment", classCounter);
344 }
Joachim Bingel5fd09322015-01-29 14:01:30 +0000345 classCounter++;
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000346 putIntoSuperObject(alignClass);
347 objectStack.push(alignClass);
348 stackedObjects++;
349 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000350
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000351 private void processSpan(ParseTree node) {
352 List<ParseTree> negations = getChildrenWithCat(node, "!");
353 boolean negated = false;
354 if (negations.size() % 2 == 1)
355 negated = true;
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000356 LinkedHashMap<String, Object> span = KoralObjectGenerator.makeSpan();
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000357 ParseTree keyNode = getFirstChildWithCat(node, "key");
358 ParseTree layerNode = getFirstChildWithCat(node, "layer");
359 ParseTree foundryNode = getFirstChildWithCat(node, "foundry");
360 ParseTree termOpNode = getFirstChildWithCat(node, "termOp");
361 ParseTree termNode = getFirstChildWithCat(node, "term");
362 ParseTree termGroupNode = getFirstChildWithCat(node, "termGroup");
363 if (foundryNode != null)
364 span.put("foundry", foundryNode.getText());
365 if (layerNode != null) {
366 String layer = layerNode.getText();
367 if (layer.equals("base"))
368 layer = "lemma";
369 span.put("layer", layer);
370 }
371 span.put("key", keyNode.getText());
372 if (termOpNode != null) {
373 String termOp = termOpNode.getText();
374 if (termOp.equals("=="))
375 span.put("match", "match:eq");
376 else if (termOp.equals("!="))
377 span.put("match", "match:ne");
378 }
379 if (termNode != null) {
380 LinkedHashMap<String, Object> termOrTermGroup =
381 parseTermOrTermGroup(termNode, negated, "span");
382 span.put("attr", termOrTermGroup);
383 }
384 if (termGroupNode != null) {
385 LinkedHashMap<String, Object> termOrTermGroup =
386 parseTermOrTermGroup(termGroupNode, negated, "span");
387 span.put("attr", termOrTermGroup);
388 }
389 putIntoSuperObject(span);
390 objectStack.push(span);
391 stackedObjects++;
392 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000393
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000394 private void processDisjunction(ParseTree node) {
395 LinkedHashMap<String, Object> disjunction =
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000396 KoralObjectGenerator.makeGroup("or");
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000397 putIntoSuperObject(disjunction);
398 objectStack.push(disjunction);
399 stackedObjects++;
400 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000401
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000402 private void processPosition(ParseTree node) {
403 LinkedHashMap<String, Object> position = parseFrame(node.getChild(0));
404 putIntoSuperObject(position);
405 objectStack.push(position);
406 stackedObjects++;
407 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000408
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000409 private void processRelation(ParseTree node) {
410 LinkedHashMap<String, Object> relationGroup =
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000411 KoralObjectGenerator.makeGroup("relation");
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000412 LinkedHashMap<String, Object> relation =
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000413 KoralObjectGenerator.makeRelation();
Joachim Bingela6bf8d82015-01-26 14:43:36 +0000414 LinkedHashMap<String, Object> term =
415 KoralObjectGenerator.makeTerm();
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000416 relationGroup.put("relation", relation);
Joachim Bingela6bf8d82015-01-26 14:43:36 +0000417 relation.put("wrap", term);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000418 if (node.getChild(0).getText().equals("dominates")) {
Joachim Bingela6bf8d82015-01-26 14:43:36 +0000419 term.put("layer", "c");
Joachim Bingel5fd09322015-01-29 14:01:30 +0000420 } else if (node.getChild(0).getText().equals("dependency")) {
421 term.put("layer", "d");
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000422 }
423 ParseTree relSpec = getFirstChildWithCat(node, "relSpec");
424 ParseTree repetition = getFirstChildWithCat(node, "repetition");
425 if (relSpec != null) {
426 ParseTree foundry = getFirstChildWithCat(relSpec, "foundry");
427 ParseTree layer = getFirstChildWithCat(relSpec, "layer");
428 ParseTree key = getFirstChildWithCat(relSpec, "key");
429 if (foundry != null)
Joachim Bingela6bf8d82015-01-26 14:43:36 +0000430 term.put("foundry", foundry.getText());
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000431 if (layer != null)
Joachim Bingela6bf8d82015-01-26 14:43:36 +0000432 term.put("layer", layer.getText());
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000433 if (key != null)
Joachim Bingela6bf8d82015-01-26 14:43:36 +0000434 term.put("key", key.getText());
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000435 }
436 if (repetition != null) {
437 Integer[] minmax = parseRepetition(repetition);
438 relation.put("boundary",
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000439 KoralObjectGenerator.makeBoundary(minmax[0], minmax[1]));
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000440 }
441 putIntoSuperObject(relationGroup);
442 objectStack.push(relationGroup);
443 stackedObjects++;
444 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000445
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000446 private void processSpanclass(ParseTree node) {
447 // Step I: get info
448 int classId = 1;
449 if (getNodeCat(node.getChild(1)).equals("spanclass_id")) {
450 String ref = node.getChild(1).getChild(0).toStringTree(parser);
451 try {
452 classId = Integer.parseInt(ref);
453 }
454 catch (NumberFormatException e) {
455 String msg = "The specified class reference in the "
456 + "focus/split-Operator is not a number: " + ref;
457 log.error(msg);
458 addError(StatusCodes.UNDEFINED_CLASS_REFERENCE, msg);
459 }
460 // only allow class id up to 127
Joachim Bingel5fd09322015-01-29 14:01:30 +0000461 if (classId > 128) {
462 addWarning("Only class IDs up to 128 are allowed. Your class "
463 + classId + " has been set back to 128. "
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000464 + "Check for possible conflict with other classes.");
Joachim Bingel5fd09322015-01-29 14:01:30 +0000465 classId = 128;
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000466 }
467 }
468 LinkedHashMap<String, Object> classGroup =
Joachim Bingel5fd09322015-01-29 14:01:30 +0000469 KoralObjectGenerator.makeSpanClass(classId);
470 addHighlightClass(classId);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000471 putIntoSuperObject(classGroup);
472 objectStack.push(classGroup);
473 stackedObjects++;
Joachim Bingel84395b22014-12-18 10:46:18 +0000474
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000475 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000476
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000477 private void processMatching(ParseTree node) {
478 // Step I: get info
479 ArrayList<Integer> classRefs = new ArrayList<Integer>();
480 String classRefOp = null;
481 if (getNodeCat(node.getChild(2)).equals("spanclass_id")) {
482 ParseTree spanNode = node.getChild(2);
483 for (int i = 0; i < spanNode.getChildCount() - 1; i++) {
484 String ref = spanNode.getChild(i).getText();
485 if (ref.equals("|") || ref.equals("&")) {
486 classRefOp = ref.equals("|") ? "intersection" : "union";
487 }
488 else {
489 try {
490 int classRef = Integer.parseInt(ref);
Joachim Bingel5fd09322015-01-29 14:01:30 +0000491 // only allow class id up to 128
492 if (classRef > 128) {
493 addWarning("Only class references up to 128 are "
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000494 + "allowed. Your reference to class "
Joachim Bingel5fd09322015-01-29 14:01:30 +0000495 + classRef + " has been set back to 128. "
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000496 + "Check for possible conflict with "
497 + "other classes.");
Joachim Bingel5fd09322015-01-29 14:01:30 +0000498 classRef = 128;
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000499 }
500 classRefs.add(classRef);
501 }
502 catch (NumberFormatException e) {
503 String err = "The specified class reference in the "
504 + "shrink/split-Operator is not a number.";
505 addError(StatusCodes.UNDEFINED_CLASS_REFERENCE, err);
506 }
507 }
508 }
509 }
510 else {
511 classRefs.add(1);
512 }
513 LinkedHashMap<String, Object> referenceGroup =
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000514 KoralObjectGenerator.makeReference(classRefs);
Joachim Bingel84395b22014-12-18 10:46:18 +0000515
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000516 String type = node.getChild(0).toStringTree(parser);
517 // Default is focus(), if deviating catch here
518 if (type.equals("split"))
519 referenceGroup.put("operation", "operation:split");
520 if (type.equals("submatch") || type.equals("shrink")) {
521 String warning = "Deprecated 2014-07-24: "
522 + type
523 + "() as a match reducer "
524 + "to a specific class is deprecated in favor of focus() and will "
525 + "only be supported for 3 months after deprecation date.";
526 addMessage(StatusCodes.DEPRECATED_QUERY_ELEMENT, warning);
527 }
528 if (classRefOp != null) {
529 referenceGroup.put("classRefOp", "classRefOp:" + classRefOp);
530 }
531 ArrayList<Object> referenceOperands = new ArrayList<Object>();
532 referenceGroup.put("operands", referenceOperands);
533 // Step II: decide where to put the group
534 putIntoSuperObject(referenceGroup);
535 objectStack.push(referenceGroup);
536 stackedObjects++;
537 visited.add(node.getChild(0));
538 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000539
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000540 private void processSubmatch(ParseTree node) {
541 LinkedHashMap<String, Object> submatch =
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000542 KoralObjectGenerator.makeReference(null);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000543 submatch.put("operands", new ArrayList<Object>());
544 ParseTree startpos = getFirstChildWithCat(node, "startpos");
545 ParseTree length = getFirstChildWithCat(node, "length");
546 ArrayList<Integer> spanRef = new ArrayList<Integer>();
547 spanRef.add(Integer.parseInt(startpos.getText()));
548 if (length != null) {
549 spanRef.add(Integer.parseInt(length.getText()));
550 }
551 submatch.put("spanRef", spanRef);
552 putIntoSuperObject(submatch);
553 objectStack.push(submatch);
554 stackedObjects++;
555 visited.add(node.getChild(0));
556 }
Joachim Bingel84395b22014-12-18 10:46:18 +0000557
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000558 /**
559 * Creates meta field in requestMap, later filled by terms
560 *
561 * @param node
562 */
563 private void processMeta(ParseTree node) {
Joachim Bingel3c37eb22015-01-15 13:38:42 +0000564 addWarning("You used the 'meta' keyword in a PoliqarpPlus query. This"
565 + " feature is currently not supported. Please use virtual "
566 + "collections to restrict documents by metadata.");
Joachim Bingel1f8f3782015-01-19 17:58:41 +0000567 CollectionQueryProcessor cq = new CollectionQueryProcessor(node.getChild(1).getText());
568 requestMap.put("collection", cq.getRequestMap().get("collection"));
Joachim Bingel3c37eb22015-01-15 13:38:42 +0000569 for (ParseTree child : getChildren(node)) {
570 visited.add(child);
571 }
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000572 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000573
Joachim Bingel1f8f3782015-01-19 17:58:41 +0000574 @SuppressWarnings("unchecked")
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000575 private void processWithin(ParseTree node) {
Joachim Bingel3c37eb22015-01-15 13:38:42 +0000576 ParseTree domainNode = node.getChild(1);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000577 String domain = getNodeCat(domainNode);
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000578 LinkedHashMap<String, Object> span = KoralObjectGenerator.makeSpan(domain);
Joachim Bingel3c37eb22015-01-15 13:38:42 +0000579 LinkedHashMap<String, Object> queryObj = (LinkedHashMap<String, Object>) requestMap.get("query");
Joachim Bingel1d791042015-02-03 10:19:47 +0000580 LinkedHashMap<String, Object> contains = KoralObjectGenerator.makePosition(new String[]{"frames:isAround"});
Joachim Bingel3c37eb22015-01-15 13:38:42 +0000581 ArrayList<Object> operands = (ArrayList<Object>) contains.get("operands");
582 operands.add(span);
583 operands.add(queryObj);
584 requestMap.put("query", contains);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000585 visited.add(node.getChild(0));
586 visited.add(node.getChild(1));
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000587 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000588
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000589 /**
590 * Parses a repetition node
591 *
592 * @param node
593 * @return A two-element array, of which the first element is an
594 * int representing the minimal number of repetitions of
595 * the quantified element, and the second element
596 * representing the maximal number of repetitions
597 */
598 private Integer[] parseRepetition(ParseTree node) {
599 Integer min = 0, max = 0;
600 boolean maxInfinite = false;
601 // (repetition) node can be of two types: 'kleene' or 'range'
602 ParseTree repetitionTypeNode = node.getChild(0);
603 String repetitionType = getNodeCat(repetitionTypeNode);
604 if (repetitionType.equals("kleene")) {
605 // kleene operators (+ and *) as well as optionality (?)
606 String kleeneOp = repetitionTypeNode.getText();
607 if (kleeneOp.equals("*")) {
608 maxInfinite = true;
609 }
610 else if (kleeneOp.equals("+")) {
611 min = 1;
612 maxInfinite = true;
613 }
614 if (kleeneOp.equals("?")) {
615 max = 1;
616 }
617 }
618 else {
619 // Range node of form "{ min , max }" or "{ max }" or
620 // "{ , max }" or "{ min , }"
621 ParseTree minNode = getFirstChildWithCat(repetitionTypeNode, "min");
622 ParseTree maxNode = getFirstChildWithCat(repetitionTypeNode, "max");
623 if (maxNode != null)
624 max = Integer.parseInt(maxNode.getText());
625 else
626 maxInfinite = true;
627 // min is optional: if not specified, min = max
628 if (minNode != null)
629 min = Integer.parseInt(minNode.getText());
630 else if (hasChild(repetitionTypeNode, ","))
631 min = 0;
632 else {
633 min = max;
634// addWarning("Your query contains a segment of the form {n}, where n is some number. This expression is ambiguous. "
635// + "It could mean a repetition (\"Repeat the previous element n times!\") or a word form that equals the number, "
636// + "enclosed by a \"class\" (which is denoted by braces like '{x}', see the documentation on classes)."
637// + "KorAP has by default interpreted the segment as a repetition statement. If you want to express the"
638// + "number as a word form inside a class, use the non-shorthand form {[orth=n]}.");
639 }
640 }
641 if (maxInfinite) {
642 max = null;
643 }
644 return new Integer[] { min, max };
645 }
Joachim Bingel3a41a442014-07-22 12:16:16 +0000646
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000647 private LinkedHashMap<String, Object> parseFrame(ParseTree node) {
648 String operator = node.toStringTree(parser).toLowerCase();
649 String[] frames = new String[] { "" };
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000650 switch (operator) {
651 case "contains":
Joachim Bingel1d791042015-02-03 10:19:47 +0000652 frames = new String[] { "frames:isAround" };
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000653 break;
654 case "matches":
655 frames = new String[] { "frames:matches" };
656 break;
657 case "startswith":
Joachim Bingel1d791042015-02-03 10:19:47 +0000658 frames = new String[] { "frames:startsWith", "frames:matches" };
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000659 break;
660 case "endswith":
Joachim Bingel1d791042015-02-03 10:19:47 +0000661 frames = new String[] { "frames:endsWith", "frames:matches" };
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000662 break;
663 case "overlaps":
664 frames = new String[] { "frames:overlapsLeft",
665 "frames:overlapsRight" };
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000666 break;
667 }
Joachim Bingel1d791042015-02-03 10:19:47 +0000668 return KoralObjectGenerator.makePosition(frames);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000669 }
Joachim Bingel832800e2014-10-17 14:46:39 +0000670
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000671 private LinkedHashMap<String, Object> parseTermOrTermGroup(ParseTree node,
672 boolean negated) {
673 return parseTermOrTermGroup(node, negated, "token");
674 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000675
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000676 /**
677 * Parses a (term) or (termGroup) node
678 *
679 * @param node
680 * @param negatedGlobal
681 * Indicates whether the term/termGroup is globally
682 * negated, e.g. through a negation operator preceding
683 * the related token like "![base=foo]". Global
684 * negation affects the term's "match" parameter.
Joachim Bingel5fd09322015-01-29 14:01:30 +0000685 * @param mode 'token' or 'span' (tokens and spans are treated
686 * differently).
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000687 * @return A term or termGroup object, depending on input
688 */
689 @SuppressWarnings("unchecked")
690 private LinkedHashMap<String, Object> parseTermOrTermGroup(ParseTree node,
691 boolean negatedGlobal, String mode) {
692 String nodeCat = getNodeCat(node);
693 if (nodeCat.equals("term")) {
694 String key = null;
695 LinkedHashMap<String, Object> term =
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000696 KoralObjectGenerator.makeTerm();
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000697 // handle negation
698 boolean negated = negatedGlobal;
699 boolean isRegex = false;
700 List<ParseTree> negations = getChildrenWithCat(node, "!");
701 if (negations.size() % 2 == 1)
702 negated = !negated;
703 // retrieve possible nodes
704 ParseTree keyNode = getFirstChildWithCat(node, "key");
705 ParseTree valueNode = getFirstChildWithCat(node, "value");
706 ParseTree layerNode = getFirstChildWithCat(node, "layer");
707 ParseTree foundryNode = getFirstChildWithCat(node, "foundry");
708 ParseTree termOpNode = getFirstChildWithCat(node, "termOp");
709 ParseTree flagNode = getFirstChildWithCat(node, "flag");
710 // process foundry
711 if (foundryNode != null)
712 term.put("foundry", foundryNode.getText());
713 // process layer: map "base" -> "lemma"
714 if (layerNode != null) {
715 String layer = layerNode.getText();
Joachim Bingel5fd09322015-01-29 14:01:30 +0000716 if (mode.equals("span")) {
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000717 term.put("key", layer);
Joachim Bingel5fd09322015-01-29 14:01:30 +0000718 } else if (mode.equals("token")) {
719 if (layer.equals("base")) {
720 layer = "lemma"; }
721 else if (layer.equals("punct")) {
722 layer = "orth";
723 term.put("type", "type:punct");
724 }
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000725 term.put("layer", layer);
Joachim Bingel5fd09322015-01-29 14:01:30 +0000726 }
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000727 }
728 // process key: 'normal' or regex?
729 key = keyNode.getText();
730 if (getNodeCat(keyNode.getChild(0)).equals("regex")) {
731 isRegex = true;
732 term.put("type", "type:regex");
733 // remove leading and trailing quotes
734 key = key.substring(1, key.length() - 1);
735 }
736 if (mode.equals("span"))
737 term.put("value", key);
738 else
739 term.put("key", key);
740 // process value
741 if (valueNode != null)
742 term.put("value", valueNode.getText());
743 // process operator ("match" property)
744 if (termOpNode != null) {
745 String termOp = termOpNode.getText();
746 negated = termOp.contains("!") ? !negated : negated;
747 if (!negated)
748 term.put("match", "match:eq");
749 else
750 term.put("match", "match:ne");
751 }
752 // process possible flags
753 if (flagNode != null) {
754 // substring removes leading slash
755 String flag = getNodeCat(flagNode.getChild(0)).substring(1);
756 if (flag.contains("i"))
757 term.put("caseInsensitive", true);
758 else if (flag.contains("I"))
759 term.put("caseInsensitive", false);
760 if (flag.contains("x")) {
761 if (!isRegex) {
762 key = QueryUtils.escapeRegexSpecialChars(key);
763 }
764 // flag 'x' allows submatches:
765 // overwrite key with appended .*?
766 term.put("key", ".*?" + key + ".*?"); //
767 term.put("type", "type:regex");
768 }
769 }
770 return term;
771 }
772 else if (nodeCat.equals("termGroup")) {
773 // For termGroups, establish a boolean relation between
774 // operands and recursively call this function with
775 // the term or termGroup operands
776 LinkedHashMap<String, Object> termGroup = null;
777 ParseTree leftOp = null;
778 ParseTree rightOp = null;
779 // check for leading/trailing parantheses
780 if (!getNodeCat(node.getChild(0)).equals("("))
781 leftOp = node.getChild(0);
782 else
783 leftOp = node.getChild(1);
784 if (!getNodeCat(node.getChild(node.getChildCount() - 1))
785 .equals(")"))
786 rightOp = node.getChild(node.getChildCount() - 1);
787 else
788 rightOp = node.getChild(node.getChildCount() - 2);
789 // establish boolean relation
790 ParseTree boolOp = getFirstChildWithCat(node, "boolOp");
791 String operator = boolOp.getText().equals("&") ? "and" : "or";
Joachim Bingelaa4ab2f2015-01-16 14:26:51 +0000792 termGroup = KoralObjectGenerator.makeTermGroup(operator);
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000793 ArrayList<Object> operands = (ArrayList<Object>) termGroup
794 .get("operands");
795 // recursion with left/right operands
796 operands.add(parseTermOrTermGroup(leftOp, negatedGlobal, mode));
797 operands.add(parseTermOrTermGroup(rightOp, negatedGlobal, mode));
798 return termGroup;
799 }
800 return null;
801 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000802
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000803 /**
804 * Puts an object into the operands list of its governing (or
805 * "super") object which had been placed on the
806 * {@link #objectStack} before and is still on top of the stack.
807 * If this is the top object of the tree, it is put there instead
808 * of into some (non-existent) operand stack.
809 *
810 * @param object
811 * The object to be inserted
812 */
813 private void putIntoSuperObject(LinkedHashMap<String, Object> object) {
814 putIntoSuperObject(object, 0);
815 }
Joachim Bingel33bd45f2014-06-25 15:00:54 +0000816
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000817 /**
818 * Puts an object into the operands list of its governing (or
819 * "super") object which had been placed on the
820 * {@link #objectStack} before. If this is the top object of the
821 * tree, it is put there instead of into some (non-existent)
822 * operand stack.
823 *
824 * @param object
825 * The object to be inserted
826 * @param objStackPosition
827 * Indicated the position of the super object on the
828 * {@link #objectStack} (in case not the top element of
829 * the stack is the super object.
830 */
831 @SuppressWarnings({ "unchecked" })
832 private void putIntoSuperObject(LinkedHashMap<String, Object> object,
833 int objStackPosition) {
834 if (objectStack.size() > objStackPosition) {
835 ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack
836 .get(objStackPosition).get("operands");
837 topObjectOperands.add(object);
838 }
839 else {
840 requestMap.put("query", object);
841 }
842 }
Joachim Bingel832800e2014-10-17 14:46:39 +0000843
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000844 /**
845 * Basically only increases the min and max counters as required
846 * by Poliqarp
847 *
848 * @param distanceNode
849 * @return
850 */
851 private Integer[] parseDistance(ParseTree distanceNode) {
852 int emptyTokenSeqIndex = getNodeCat(distanceNode).equals("distance") ? 0
853 : 2;
854 Integer[] minmax = parseEmptySegments(distanceNode
855 .getChild(emptyTokenSeqIndex));
856 Integer min = minmax[0];
857 Integer max = minmax[1];
858 min++;
859 if (max != null)
860 max++;
861 // min = cropToMaxValue(min);
862 // max = cropToMaxValue(max);
863 return new Integer[] { min, max };
864 }
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000865
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000866 private Integer[] parseEmptySegments(ParseTree emptySegments) {
867 Integer min = 0;
868 Integer max = 0;
869 ParseTree child;
870 for (int i = 0; i < emptySegments.getChildCount(); i++) {
871 child = emptySegments.getChild(i);
872 ParseTree nextSibling = emptySegments.getChild(i + 1);
873 if (child.toStringTree(parser).equals("(emptyToken [ ])")) {
874 if (nextSibling != null
875 && getNodeCat(nextSibling).equals("repetition")) {
876 Integer[] minmax = parseRepetition(nextSibling);
877 min += minmax[0];
878 if (minmax[1] != null) {
879 max += minmax[1];
880 }
881 else {
882 max = null;
883 }
884 }
885 else {
886 min++;
887 max++;
888 }
889 }
890 }
891 // min = cropToMaxValue(min);
892 // max = cropToMaxValue(max);
893 return new Integer[] { min, max };
894 }
Joachim Bingel14239d82014-07-22 09:55:04 +0000895
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000896 private ParserRuleContext parsePoliqarpQuery(String query) {
897 Lexer lexer = new PoliqarpPlusLexer((CharStream) null);
898 ParserRuleContext tree = null;
899 Antlr4DescriptiveErrorListener errorListener =
900 new Antlr4DescriptiveErrorListener(query);
901 // Like p. 111
902 try {
903 // Tokenize input data
904 ANTLRInputStream input = new ANTLRInputStream(query);
905 lexer.setInputStream(input);
906 CommonTokenStream tokens = new CommonTokenStream(lexer);
907 parser = new PoliqarpPlusParser(tokens);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000908
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000909 // Don't throw out erroneous stuff
910 parser.setErrorHandler(new BailErrorStrategy());
911 lexer.removeErrorListeners();
Joachim Bingel3fa584b2014-12-17 13:35:43 +0000912 lexer.addErrorListener(errorListener);
913 parser.removeErrorListeners();
914 parser.addErrorListener(errorListener);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000915
Joachim Bingel20e06ac2015-01-15 10:31:33 +0000916 // Get starting rule from parser
917 Method startRule = PoliqarpPlusParser.class.getMethod("request");
918 tree =
919 (ParserRuleContext) startRule.invoke(parser, (Object[]) null);
920 }
921 // Some things went wrong ...
922 catch (Exception e) {
923 log.error("Could not parse query. "
924 + "Please make sure it is well-formed.");
925 log.error(errorListener.generateFullErrorMsg().toString());
926 addError(errorListener.generateFullErrorMsg());
927 }
928 return tree;
929 }
Nils Diewald4128a922014-07-18 14:39:24 +0000930}