blob: 5d8452659c27486818239c8fded24f6d25862bca [file] [log] [blame]
Joachim Bingel4b405f52013-11-15 15:29:30 +00001package de.ids_mannheim.korap.query.serialize;
2
Joachim Bingel0900a892014-06-30 16:26:21 +00003import de.ids_mannheim.korap.query.poliqarp.PoliqarpPlusLexer;
4import de.ids_mannheim.korap.query.poliqarp.PoliqarpPlusParser;
Joachim Bingel16da4e12013-12-17 09:48:12 +00005import de.ids_mannheim.korap.util.QueryException;
Michael Hanld8116e52014-04-25 20:31:29 +00006import org.antlr.v4.runtime.*;
7import org.antlr.v4.runtime.tree.ParseTree;
Michael Hanl27e50582013-12-07 18:04:13 +00008import org.slf4j.Logger;
9import org.slf4j.LoggerFactory;
10
Michael Hanld8116e52014-04-25 20:31:29 +000011import java.lang.reflect.Method;
12import java.util.*;
Joachim Bingel998954a2014-07-14 15:58:34 +000013import java.util.regex.Pattern;
Michael Hanld8116e52014-04-25 20:31:29 +000014
Joachim Bingel4b405f52013-11-15 15:29:30 +000015/**
16 * Map representation of Poliqarp syntax tree as returned by ANTLR
Joachim Bingel4b405f52013-11-15 15:29:30 +000017 *
Michael Hanld8116e52014-04-25 20:31:29 +000018 * @author joachim
Joachim Bingel4b405f52013-11-15 15:29:30 +000019 */
Joachim Bingelc8a28e42014-04-24 15:06:42 +000020public class PoliqarpPlusTree extends Antlr4AbstractSyntaxTree {
Michael Hanl27e50582013-12-07 18:04:13 +000021
Joachim Bingel687e4d42014-07-30 09:34:18 +000022 private static Logger log = LoggerFactory.getLogger(PoliqarpPlusTree.class);
Joachim Bingel4b405f52013-11-15 15:29:30 +000023
Joachim Bingel0900a892014-06-30 16:26:21 +000024 /**
25 * Most centrally, this class maintains a set of nested maps and lists which represent the JSON tree, which is built by the JSON serialiser
26 * on basis of the {@link #requestMap} at the root of the tree.
27 * <br/>
28 * The class further maintains a set of stacks which effectively keep track of which objects to embed in which containing objects.
29 *
30 * @param query The syntax tree as returned by ANTLR
31 * @throws QueryException
32 */
33 public PoliqarpPlusTree(String query) throws QueryException {
34 process(query);
Joachim Bingelaaabb722014-09-24 14:29:10 +000035 System.out.println(">>> " + requestMap + " <<<");
Joachim Bingel0900a892014-06-30 16:26:21 +000036 log.info(">>> " + requestMap.get("query") + " <<<");
37 }
Michael Hanld8116e52014-04-25 20:31:29 +000038
Joachim Bingel0900a892014-06-30 16:26:21 +000039 @Override
Joachim Bingel0900a892014-06-30 16:26:21 +000040 public void process(String query) throws QueryException {
41 ParseTree tree;
42 tree = parsePoliqarpQuery(query);
43 super.parser = this.parser;
44 log.info("Processing PoliqarpPlus");
Joachim Bingel0900a892014-06-30 16:26:21 +000045 processNode(tree);
46 }
Joachim Bingel4b405f52013-11-15 15:29:30 +000047
Joachim Bingel0900a892014-06-30 16:26:21 +000048 /**
49 * Recursively calls itself with the children of the currently active node, traversing the tree nodes in a top-down, depth-first fashion.
50 * A list is maintained that contains all visited nodes
Joachim Bingelbea1ec62014-07-11 15:00:14 +000051 * which have been directly addressed by their (grand-/grand-grand-/...) parent nodes, such that some processing time is saved, as these node will
Joachim Bingel0900a892014-06-30 16:26:21 +000052 * not be processed. This method is effectively a list of if-statements that are responsible for treating the different node types correctly and filling the
53 * respective maps/lists.
54 *
55 * @param node The currently processed node. The process(String query) method calls this method with the root.
56 * @throws QueryException
57 */
Joachim Bingel14239d82014-07-22 09:55:04 +000058 @SuppressWarnings("unchecked")
Joachim Bingel0900a892014-06-30 16:26:21 +000059 private void processNode(ParseTree node) throws QueryException {
60 // Top-down processing
61 if (visited.contains(node)) return;
62 else visited.add(node);
Michael Hanld8116e52014-04-25 20:31:29 +000063
Joachim Bingel0900a892014-06-30 16:26:21 +000064 currentNode = node;
Michael Hanld8116e52014-04-25 20:31:29 +000065
Joachim Bingel0900a892014-06-30 16:26:21 +000066 String nodeCat = getNodeCat(node);
67 openNodeCats.push(nodeCat);
Michael Hanld8116e52014-04-25 20:31:29 +000068
Joachim Bingel0900a892014-06-30 16:26:21 +000069 stackedObjects = 0;
Michael Hanld8116e52014-04-25 20:31:29 +000070
Joachim Bingel0900a892014-06-30 16:26:21 +000071 if (verbose) {
72 System.err.println(" " + objectStack);
73 System.out.println(openNodeCats);
74 }
Michael Hanld8116e52014-04-25 20:31:29 +000075
Joachim Bingel4b405f52013-11-15 15:29:30 +000076 /*
Joachim Bingel0900a892014-06-30 16:26:21 +000077 ****************************************************************
Joachim Bingel4b405f52013-11-15 15:29:30 +000078 ****************************************************************
79 * Processing individual node categories *
80 ****************************************************************
81 ****************************************************************
82 */
Joachim Bingel4b405f52013-11-15 15:29:30 +000083
Joachim Bingel0900a892014-06-30 16:26:21 +000084 if (nodeCat.equals("segment")) {
85 // Cover possible quantification (i.e. repetition) of segment
86 ParseTree quantification = getFirstChildWithCat(node, "repetition");
87 if (quantification != null) {
88 LinkedHashMap<String,Object> quantGroup = makeGroup("repetition");
Joachim Bingel61631562014-07-24 14:26:02 +000089 Integer[] minmax = parseRepetition(quantification);
90 quantGroup.put("boundary", makeBoundary(minmax[0], minmax[1]));
91 if (minmax[0] != null) quantGroup.put("min", minmax[0]);
92 if (minmax[1] != null) quantGroup.put("max", minmax[1]);
Joachim Bingelaaabb722014-09-24 14:29:10 +000093 announcements.add("Deprecated 2014-07-24: 'min' and 'max' to be " +
94 "supported until 3 months from deprecation date.");
Joachim Bingel0900a892014-06-30 16:26:21 +000095 putIntoSuperObject(quantGroup);
96 objectStack.push(quantGroup);
97 stackedObjects++;
98 }
99 }
100
101 if (nodeCat.equals("sequence")) {
102 LinkedHashMap<String,Object> sequence = makeGroup("sequence");
Joachim Bingel14239d82014-07-22 09:55:04 +0000103 ParseTree distanceNode = getFirstChildWithCat(node, "distance");
104 if (distanceNode!=null) {
Joachim Bingel687e4d42014-07-30 09:34:18 +0000105 Integer[] minmax = parseDistance(distanceNode);
Joachim Bingel0900a892014-06-30 16:26:21 +0000106 LinkedHashMap<String,Object> distance = makeDistance("w", minmax[0], minmax[1]);
107 sequence.put("inOrder", true);
108 ArrayList<Object> distances = new ArrayList<Object>();
109 distances.add(distance);
110 sequence.put("distances", distances);
Joachim Bingelaaabb722014-09-24 14:29:10 +0000111 visited.add(distanceNode.getChild(0)); // don't re-visit the emptyTokenSequence node
Joachim Bingel0900a892014-06-30 16:26:21 +0000112 }
113 putIntoSuperObject(sequence);
114 objectStack.push(sequence);
115 stackedObjects++;
116 }
Joachim Bingel14239d82014-07-22 09:55:04 +0000117
Joachim Bingelaaabb722014-09-24 14:29:10 +0000118 /*
119 * empty tokens at beginning/end of sequence
120 */
Joachim Bingel14239d82014-07-22 09:55:04 +0000121 if (nodeCat.equals("emptyTokenSequence")) {
Joachim Bingel687e4d42014-07-30 09:34:18 +0000122 Integer[] minmax = parseEmptySegments(node);
Joachim Bingelaaabb722014-09-24 14:29:10 +0000123 // object will be either a repetition group or a single empty token
124 LinkedHashMap<String,Object> object;
Joachim Bingel14239d82014-07-22 09:55:04 +0000125 LinkedHashMap<String,Object> emptyToken = makeToken();
Joachim Bingel687e4d42014-07-30 09:34:18 +0000126 if (minmax[0] != 1 || minmax[1] == null || minmax[1] != 1) {
Joachim Bingel14239d82014-07-22 09:55:04 +0000127 object = makeRepetition(minmax[0], minmax[1]);
128 ((ArrayList<Object>) object.get("operands")).add(emptyToken);
129 } else {
130 object = emptyToken;
131 }
132 putIntoSuperObject(object);
133 objectStack.push(object);
134 stackedObjects++;
135 }
136
Joachim Bingel0900a892014-06-30 16:26:21 +0000137
138 if (nodeCat.equals("token")) {
139 LinkedHashMap<String,Object> token = makeToken();
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000140 // handle negation
Joachim Bingel0900a892014-06-30 16:26:21 +0000141 List<ParseTree> negations = getChildrenWithCat(node, "!");
142 boolean negated = false;
Joachim Bingel723ced02014-07-14 16:17:22 +0000143 boolean isRegex = false;
Joachim Bingel0900a892014-06-30 16:26:21 +0000144 if (negations.size() % 2 == 1) negated = true;
145 if (getNodeCat(node.getChild(0)).equals("key")) {
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000146 // no 'term' child, but direct key specification: process here
Joachim Bingel0900a892014-06-30 16:26:21 +0000147 LinkedHashMap<String,Object> term = makeTerm();
148
149 String key = node.getChild(0).getText();
150 if (getNodeCat(node.getChild(0).getChild(0)).equals("regex")) {
Joachim Bingel723ced02014-07-14 16:17:22 +0000151 isRegex = true;
Joachim Bingel0900a892014-06-30 16:26:21 +0000152 term.put("type", "type:regex");
153 key = key.substring(1,key.length()-1);
154 }
155 term.put("layer", "orth");
156 term.put("key", key);
157 String matches = negated ? "ne" : "eq";
158 term.put("match", "match:"+matches);
159 ParseTree flagNode = getFirstChildWithCat(node, "flag");
160 if (flagNode != null) {
Joachim Bingelaaabb722014-09-24 14:29:10 +0000161 // substring removes leading slash '/'
162 String flag = getNodeCat(flagNode.getChild(0)).substring(1);
Joachim Bingel0900a892014-06-30 16:26:21 +0000163 if (flag.contains("i")) term.put("caseInsensitive", true);
164 else if (flag.contains("I")) term.put("caseInsensitive", false);
165 if (flag.contains("x")) {
Joachim Bingel899bdcf2014-07-14 14:55:06 +0000166 term.put("type", "type:regex");
Joachim Bingel723ced02014-07-14 16:17:22 +0000167 if (!isRegex) {
Joachim Bingela83f8cc2014-08-05 14:12:59 +0000168 key = QueryUtils.escapeRegexSpecialChars(key);
Joachim Bingel723ced02014-07-14 16:17:22 +0000169 }
170 term.put("key", ".*?"+key+".*?"); // overwrite key
Joachim Bingel0900a892014-06-30 16:26:21 +0000171 }
172 }
173 token.put("wrap", term);
174 } else {
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000175 // child is 'term' or 'termGroup' -> process in extra method
Joachim Bingelaaabb722014-09-24 14:29:10 +0000176 LinkedHashMap<String,Object> termOrTermGroup =
177 parseTermOrTermGroup(node.getChild(1), negated);
Joachim Bingel0900a892014-06-30 16:26:21 +0000178 token.put("wrap", termOrTermGroup);
179 }
180 putIntoSuperObject(token);
181 visited.add(node.getChild(0));
182 visited.add(node.getChild(2));
183 }
184
185 if (nodeCat.equals("alignment")) {
186 LinkedHashMap<String,Object> aligned = makeGroup("alignment");
187 aligned.put("align", "align:left");
188 putIntoSuperObject(aligned);
189 objectStack.push(aligned);
190 stackedObjects++;
191 }
192
193 if (nodeCat.equals("span")) {
Joachim Bingel3a41a442014-07-22 12:16:16 +0000194 List<ParseTree> negations = getChildrenWithCat(node, "!");
195 boolean negated = false;
196 boolean isRegex = false;
197 if (negations.size() % 2 == 1) negated = true;
Joachim Bingel0900a892014-06-30 16:26:21 +0000198 LinkedHashMap<String,Object> span = makeSpan();
199 ParseTree keyNode = getFirstChildWithCat(node, "key");
200 ParseTree layerNode = getFirstChildWithCat(node, "layer");
201 ParseTree foundryNode = getFirstChildWithCat(node, "foundry");
202 ParseTree termOpNode = getFirstChildWithCat(node, "termOp");
Joachim Bingel3a41a442014-07-22 12:16:16 +0000203 ParseTree termNode = getFirstChildWithCat(node, "term");
204 ParseTree termGroupNode = getFirstChildWithCat(node, "termGroup");
Joachim Bingel0900a892014-06-30 16:26:21 +0000205 if (foundryNode != null) span.put("foundry", foundryNode.getText());
206 if (layerNode != null) {
207 String layer = layerNode.getText();
208 if (layer.equals("base")) layer="lemma";
209 span.put("layer", layer);
210 }
211 span.put("key", keyNode.getText());
212 if (termOpNode != null) {
213 String termOp = termOpNode.getText();
Joachim Bingel3a41a442014-07-22 12:16:16 +0000214 if (termOp.equals("==")) span.put("match", "match:eq");
215 else if (termOp.equals("!=")) span.put("match", "match:ne");
216 }
217 if (termNode != null) {
Joachim Bingelaaabb722014-09-24 14:29:10 +0000218 LinkedHashMap<String,Object> termOrTermGroup =
219 parseTermOrTermGroup(termNode, negated, "span");
Joachim Bingel3a41a442014-07-22 12:16:16 +0000220 span.put("attr", termOrTermGroup);
221 }
222 if (termGroupNode != null) {
Joachim Bingelaaabb722014-09-24 14:29:10 +0000223 LinkedHashMap<String,Object> termOrTermGroup =
224 parseTermOrTermGroup(termGroupNode, negated, "span");
Joachim Bingel3a41a442014-07-22 12:16:16 +0000225 span.put("attr", termOrTermGroup);
Joachim Bingel0900a892014-06-30 16:26:21 +0000226 }
227 putIntoSuperObject(span);
228 objectStack.push(span);
229 stackedObjects++;
230 }
231
232 if (nodeCat.equals("disjunction")) {
233 LinkedHashMap<String,Object> disjunction = makeGroup("or");
234 putIntoSuperObject(disjunction);
235 objectStack.push(disjunction);
236 stackedObjects++;
237 }
238
239 if (nodeCat.equals("position")) {
Joachim Bingelceb79062014-09-22 11:50:37 +0000240 LinkedHashMap<String,Object> position = parseFrame(node.getChild(0));
Joachim Bingel0900a892014-06-30 16:26:21 +0000241 putIntoSuperObject(position);
242 objectStack.push(position);
243 stackedObjects++;
Joachim Bingel0900a892014-06-30 16:26:21 +0000244 }
245
Joachim Bingel23c31ad2014-08-11 09:44:46 +0000246 if (nodeCat.equals("relation")) {
247 LinkedHashMap<String, Object> relationGroup = makeGroup("relation");
248 LinkedHashMap<String, Object> relation = makeRelation();
249 relationGroup.put("relation", relation);
250 if (node.getChild(0).getText().equals("dominates")) {
251 relation.put("layer", "c");
252 }
253 ParseTree relSpec = getFirstChildWithCat(node, "relSpec");
254 ParseTree repetition = getFirstChildWithCat(node, "repetition");
255 if (relSpec != null) {
256 ParseTree foundry = getFirstChildWithCat(relSpec, "foundry");
257 ParseTree layer = getFirstChildWithCat(relSpec, "layer");
258 ParseTree key = getFirstChildWithCat(relSpec, "key");
259 if (foundry != null) relation.put("foundry", foundry.getText());
260 if (layer != null) relation.put("layer", layer.getText());
261 if (key != null) relation.put("key", key.getText());
262 }
263 if (repetition != null) {
264 Integer[] minmax = parseRepetition(repetition);
265 relation.put("boundary", makeBoundary(minmax[0], minmax[1]));
266 }
267 putIntoSuperObject(relationGroup);
268 objectStack.push(relationGroup);
269 stackedObjects++;
270 }
271
Joachim Bingel0900a892014-06-30 16:26:21 +0000272 if (nodeCat.equals("spanclass")) {
273 // Step I: get info
274 int classId = 0;
275 if (getNodeCat(node.getChild(1)).equals("spanclass_id")) {
276 String ref = node.getChild(1).getChild(0).toStringTree(parser);
277 try {
278 classId = Integer.parseInt(ref);
279 } catch (NumberFormatException e) {
Joachim Bingelaaabb722014-09-24 14:29:10 +0000280 String msg = "The specified class reference in the " +
281 "focus/split-Operator is not a number: " + ref;
282 log.error(msg);
283 throw new QueryException(msg);
Joachim Bingel0900a892014-06-30 16:26:21 +0000284 }
285 // only allow class id up to 255
286 if (classId > 255) {
287 classId = 0;
288 }
289 }
290 LinkedHashMap<String, Object> classGroup = makeSpanClass(classId);
291 putIntoSuperObject(classGroup);
292 objectStack.push(classGroup);
293 stackedObjects++;
294 }
Michael Hanld8116e52014-04-25 20:31:29 +0000295
Joachim Bingel0900a892014-06-30 16:26:21 +0000296 if (nodeCat.equals("matching")) {
297 // Step I: get info
298 ArrayList<Integer> classRefs = new ArrayList<Integer>();
299 String classRefOp = null;
300 if (getNodeCat(node.getChild(2)).equals("spanclass_id")) {
301 ParseTree spanNode = node.getChild(2);
302 for (int i = 0; i < spanNode.getChildCount() - 1; i++) {
303 String ref = spanNode.getChild(i).getText();
304 if (ref.equals("|") || ref.equals("&")) {
305 classRefOp = ref.equals("|") ? "intersection" : "union";
306 } else {
307 try {
308 int classRef = Integer.parseInt(ref);
309 classRefs.add(classRef);
310 } catch (NumberFormatException e) {
Joachim Bingelaaabb722014-09-24 14:29:10 +0000311 String err = "The specified class reference in the " +
312 "shrink/split-Operator is not a number.";
Joachim Bingel23c31ad2014-08-11 09:44:46 +0000313 errorMsgs.add(err);
314 throw new QueryException(err);
Joachim Bingel0900a892014-06-30 16:26:21 +0000315 }
316 }
317 }
318 } else {
319 classRefs.add(0);
320 }
321 LinkedHashMap<String, Object> referenceGroup = makeReference(classRefs);
Michael Hanld8116e52014-04-25 20:31:29 +0000322
Joachim Bingel0900a892014-06-30 16:26:21 +0000323 String type = node.getChild(0).toStringTree(parser);
Joachim Bingel23c31ad2014-08-11 09:44:46 +0000324 // Default is focus(), if deviating catch here
Joachim Bingel0900a892014-06-30 16:26:21 +0000325 if (type.equals("split")) referenceGroup.put("operation", "operation:split");
Joachim Bingel899bdcf2014-07-14 14:55:06 +0000326 if (type.equals("submatch") || type.equals("shrink")) {
Joachim Bingelaaabb722014-09-24 14:29:10 +0000327 String warning = "Deprecated 2014-07-24: "+type + "() as a match reducer " +
328 "to a specific class is deprecated in favor of focus() and will " +
329 "only be supported for 3 months after deprecation date.";
Joachim Bingel899bdcf2014-07-14 14:55:06 +0000330 log.warn(warning);
331 requestMap.put("warning", warning);
332 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000333 if (classRefOp != null) {
334 referenceGroup.put("classRefOp", "classRefOp:" + classRefOp);
335 }
336 ArrayList<Object> referenceOperands = new ArrayList<Object>();
337 referenceGroup.put("operands", referenceOperands);
338 // Step II: decide where to put the group
339 putIntoSuperObject(referenceGroup);
340 objectStack.push(referenceGroup);
341 stackedObjects++;
342 visited.add(node.getChild(0));
343 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000344
Joachim Bingel9bbd4fc2014-08-11 14:56:48 +0000345 if (nodeCat.equals("submatch")) {
346 LinkedHashMap<String,Object> submatch = makeReference(null);
347 submatch.put("operands", new ArrayList<Object>());
Joachim Bingel23c31ad2014-08-11 09:44:46 +0000348 ParseTree startpos = getFirstChildWithCat(node,"startpos");
349 ParseTree length = getFirstChildWithCat(node,"length");
350 ArrayList<Integer> spanRef = new ArrayList<Integer>();
351 spanRef.add(Integer.parseInt(startpos.getText()));
352 if (length != null) {
353 spanRef.add(Integer.parseInt(length.getText()));
354 }
Joachim Bingel9bbd4fc2014-08-11 14:56:48 +0000355 submatch.put("spanRef", spanRef);
356 putIntoSuperObject(submatch);
357 objectStack.push(submatch);
Joachim Bingel23c31ad2014-08-11 09:44:46 +0000358 stackedObjects++;
359 visited.add(node.getChild(0));
360 }
361
Joachim Bingel0900a892014-06-30 16:26:21 +0000362 if (nodeCat.equals("meta")) {
363 LinkedHashMap<String, Object> metaFilter = new LinkedHashMap<String, Object>();
364 requestMap.put("meta", metaFilter);
365 metaFilter.put("@type", "korap:meta");
366 }
Michael Hanld8116e52014-04-25 20:31:29 +0000367
Joachim Bingel0900a892014-06-30 16:26:21 +0000368 if (nodeCat.equals("within") && !getNodeCat(node.getParent()).equals("position")) {
369 ParseTree domainNode = node.getChild(2);
370 String domain = getNodeCat(domainNode);
Joachim Bingelaaabb722014-09-24 14:29:10 +0000371 LinkedHashMap<String, Object> curObject =
372 (LinkedHashMap<String, Object>) objectStack.getFirst();
Joachim Bingel0900a892014-06-30 16:26:21 +0000373 curObject.put("within", domain);
374 visited.add(node.getChild(0));
375 visited.add(node.getChild(1));
376 visited.add(domainNode);
377 }
Michael Hanld8116e52014-04-25 20:31:29 +0000378
Joachim Bingel0900a892014-06-30 16:26:21 +0000379 objectsToPop.push(stackedObjects);
Michael Hanld8116e52014-04-25 20:31:29 +0000380
Joachim Bingel4b405f52013-11-15 15:29:30 +0000381 /*
382 ****************************************************************
383 ****************************************************************
384 * recursion until 'request' node (root of tree) is processed *
Joachim Bingel7fd4b1b2013-12-04 09:04:40 +0000385 ****************************************************************
Joachim Bingel4b405f52013-11-15 15:29:30 +0000386 ****************************************************************
387 */
Joachim Bingel0900a892014-06-30 16:26:21 +0000388 for (int i = 0; i < node.getChildCount(); i++) {
389 ParseTree child = node.getChild(i);
390 processNode(child);
391 }
Michael Hanld8116e52014-04-25 20:31:29 +0000392
Joachim Bingel0900a892014-06-30 16:26:21 +0000393 // Stuff that happens when leaving a node (taking items off the stacks)
394 for (int i = 0; i < objectsToPop.get(0); i++) {
395 objectStack.pop();
396 }
397 objectsToPop.pop();
Joachim Bingel0900a892014-06-30 16:26:21 +0000398 openNodeCats.pop();
399 }
400
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000401 /**
402 * Parses a repetition node
403 * @param node
404 * @return A two-element array, of which the first element is an int representing
405 * the minimal number of repetitions of the quantified element, and the second
406 * element representing the maximal number of repetitions
407 */
Joachim Bingel61631562014-07-24 14:26:02 +0000408 private Integer[] parseRepetition(ParseTree node) {
Joachim Bingel687e4d42014-07-30 09:34:18 +0000409 Integer min = 0, max = 0;
410 boolean maxInfinite = false;
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000411 // (repetition) node can be of two types: 'kleene' or 'range'
412 ParseTree repetitionTypeNode = node.getChild(0);
413 String repetitionType = getNodeCat(repetitionTypeNode);
414 if (repetitionType.equals("kleene")) {
415 // kleene operators (+ and *) as well as optionality (?)
416 String kleeneOp = repetitionTypeNode.getText();
Joachim Bingel0900a892014-06-30 16:26:21 +0000417 if (kleeneOp.equals("*")) {
Joachim Bingel687e4d42014-07-30 09:34:18 +0000418 maxInfinite = true;
Joachim Bingel0900a892014-06-30 16:26:21 +0000419 } else if (kleeneOp.equals("+")) {
420 min = 1;
Joachim Bingel687e4d42014-07-30 09:34:18 +0000421 maxInfinite = true;
Joachim Bingel0900a892014-06-30 16:26:21 +0000422 } if (kleeneOp.equals("?")) {
423 max = 1;
424 }
425 } else {
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000426 // Range node of form "{ min , max }" or "{ max }" or "{ , max }" or "{ min , }"
427 ParseTree minNode = getFirstChildWithCat(repetitionTypeNode, "min");
428 ParseTree maxNode = getFirstChildWithCat(repetitionTypeNode, "max");
429 if (maxNode!=null) max = Integer.parseInt(maxNode.getText());
Joachim Bingel687e4d42014-07-30 09:34:18 +0000430 else maxInfinite = true;
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000431 // min is optional: if not specified, min = max
Joachim Bingel0900a892014-06-30 16:26:21 +0000432 if (minNode!=null) min = Integer.parseInt(minNode.getText());
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000433 else if (hasChild(repetitionTypeNode, ",")) min = 0;
Joachim Bingel0900a892014-06-30 16:26:21 +0000434 else min = max;
435 }
Joachim Bingel687e4d42014-07-30 09:34:18 +0000436 if (maxInfinite) {
437 max = null;
438 }
Joachim Bingel61631562014-07-24 14:26:02 +0000439 return new Integer[]{min,max};
Joachim Bingel0900a892014-06-30 16:26:21 +0000440 }
441
Joachim Bingelceb79062014-09-22 11:50:37 +0000442 private LinkedHashMap<String,Object> parseFrame(ParseTree node) {
Joachim Bingelaaabb722014-09-24 14:29:10 +0000443 String operator = node.toStringTree(parser).toLowerCase();
Joachim Bingelceb79062014-09-22 11:50:37 +0000444 String[] frames = new String[]{""};
445 String[] sharedClasses = new String[]{"includes"};
446 switch (operator) {
447 case "contains":
448 frames = new String[]{};
449 break;
450 case "matches":
451 frames = new String[]{"matches"};
452 break;
453 case "startswith":
454 frames = new String[]{"startswith"};
455 break;
456 case "endswith":
457 frames = new String[]{"endswith"};
458 break;
459 case "overlaps":
460 frames = new String[]{"overlapsLeft","overlapsRight"};
461 sharedClasses = new String[]{"intersects"};
462 break;
463 }
464 return makePosition(frames,sharedClasses);
Joachim Bingel0900a892014-06-30 16:26:21 +0000465 }
466
Joachim Bingel3a41a442014-07-22 12:16:16 +0000467
468 private LinkedHashMap<String, Object> parseTermOrTermGroup(
469 ParseTree node, boolean negated) {
470 return parseTermOrTermGroup(node, negated, "token");
471 }
472
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000473 /**
474 * Parses a (term) or (termGroup) node
475 * @param node
476 * @param negatedGlobal Indicates whether the term/termGroup is globally negated, e.g. through a negation
477 * operator preceding the related token like "![base=foo]". Global negation affects the term's "match" parameter.
478 * @return A term or termGroup object, depending on input
479 */
Joachim Bingel0900a892014-06-30 16:26:21 +0000480 @SuppressWarnings("unchecked")
Joachim Bingel3a41a442014-07-22 12:16:16 +0000481 private LinkedHashMap<String, Object> parseTermOrTermGroup(ParseTree node, boolean negatedGlobal, String mode) {
Joachim Bingel0900a892014-06-30 16:26:21 +0000482 if (getNodeCat(node).equals("term")) {
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000483 String key = null;
Joachim Bingel0900a892014-06-30 16:26:21 +0000484 LinkedHashMap<String,Object> term = makeTerm();
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000485 // handle negation
Joachim Bingel0900a892014-06-30 16:26:21 +0000486 boolean negated = negatedGlobal;
Joachim Bingel723ced02014-07-14 16:17:22 +0000487 boolean isRegex = false;
Joachim Bingel0900a892014-06-30 16:26:21 +0000488 List<ParseTree> negations = getChildrenWithCat(node, "!");
489 if (negations.size() % 2 == 1) negated = !negated;
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000490 // retrieve possible nodes
Joachim Bingel0900a892014-06-30 16:26:21 +0000491 ParseTree keyNode = getFirstChildWithCat(node, "key");
Joachim Bingel998954a2014-07-14 15:58:34 +0000492 ParseTree valueNode = getFirstChildWithCat(node, "value");
Joachim Bingel0900a892014-06-30 16:26:21 +0000493 ParseTree layerNode = getFirstChildWithCat(node, "layer");
494 ParseTree foundryNode = getFirstChildWithCat(node, "foundry");
495 ParseTree termOpNode = getFirstChildWithCat(node, "termOp");
496 ParseTree flagNode = getFirstChildWithCat(node, "flag");
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000497 // process foundry
Joachim Bingel0900a892014-06-30 16:26:21 +0000498 if (foundryNode != null) term.put("foundry", foundryNode.getText());
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000499 // process layer: map "base" -> "lemma"
Joachim Bingel0900a892014-06-30 16:26:21 +0000500 if (layerNode != null) {
501 String layer = layerNode.getText();
502 if (layer.equals("base")) layer="lemma";
Joachim Bingel3a41a442014-07-22 12:16:16 +0000503 if (mode.equals("span")) term.put("key", layer);
504 else term.put("layer", layer);
Joachim Bingel0900a892014-06-30 16:26:21 +0000505 }
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000506 // process key: 'normal' or regex?
507 key = keyNode.getText();
Joachim Bingel0900a892014-06-30 16:26:21 +0000508 if (getNodeCat(keyNode.getChild(0)).equals("regex")) {
Joachim Bingel723ced02014-07-14 16:17:22 +0000509 isRegex = true;
Joachim Bingel0900a892014-06-30 16:26:21 +0000510 term.put("type", "type:regex");
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000511 key = key.substring(1, key.length()-1); // remove leading and trailing quotes
Joachim Bingel0900a892014-06-30 16:26:21 +0000512 }
Joachim Bingel3a41a442014-07-22 12:16:16 +0000513 if (mode.equals("span")) term.put("value", key);
514 else term.put("key", key);
Joachim Bingel998954a2014-07-14 15:58:34 +0000515 // process value
516 if (valueNode != null) term.put("value", valueNode.getText());
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000517 // process operator ("match" property)
Joachim Bingel0900a892014-06-30 16:26:21 +0000518 if (termOpNode != null) {
519 String termOp = termOpNode.getText();
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000520 negated = termOp.contains("!") ? !negated : negated;
Joachim Bingel0900a892014-06-30 16:26:21 +0000521 if (!negated) term.put("match", "match:eq");
522 else term.put("match", "match:ne");
523 }
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000524 // process possible flags
Joachim Bingel0900a892014-06-30 16:26:21 +0000525 if (flagNode != null) {
526 String flag = getNodeCat(flagNode.getChild(0)).substring(1); //substring removes leading slash '/'
Joachim Bingel0900a892014-06-30 16:26:21 +0000527 if (flag.contains("i")) term.put("caseInsensitive", true);
528 else if (flag.contains("I")) term.put("caseInsensitive", false);
529 if (flag.contains("x")) {
Joachim Bingel723ced02014-07-14 16:17:22 +0000530 if (!isRegex) {
Joachim Bingela83f8cc2014-08-05 14:12:59 +0000531 key = QueryUtils.escapeRegexSpecialChars(key);
Joachim Bingel723ced02014-07-14 16:17:22 +0000532 }
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000533 term.put("key", ".*?"+key+".*?"); // flag 'x' allows submatches: overwrite key with appended .*?
Joachim Bingel723ced02014-07-14 16:17:22 +0000534 term.put("type", "type:regex");
Joachim Bingel0900a892014-06-30 16:26:21 +0000535 }
536 }
537 return term;
538 } else {
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000539 // For termGroups, establish a boolean relation between operands and recursively call this function with
540 // the term or termGroup operands
Joachim Bingel0900a892014-06-30 16:26:21 +0000541 LinkedHashMap<String,Object> termGroup = null;
542 ParseTree leftOp = null;
543 ParseTree rightOp = null;
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000544 // check for leading/trailing parantheses
Joachim Bingel0900a892014-06-30 16:26:21 +0000545 if (!getNodeCat(node.getChild(0)).equals("(")) leftOp = node.getChild(0);
546 else leftOp = node.getChild(1);
547 if (!getNodeCat(node.getChild(node.getChildCount()-1)).equals(")")) rightOp = node.getChild(node.getChildCount()-1);
548 else rightOp = node.getChild(node.getChildCount()-2);
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000549 // establish boolean relation
Joachim Bingel23c31ad2014-08-11 09:44:46 +0000550 ParseTree boolOp = getFirstChildWithCat(node, "boolOp");
Joachim Bingel0900a892014-06-30 16:26:21 +0000551 String operator = boolOp.getText().equals("&") ? "and" : "or";
552 termGroup = makeTermGroup(operator);
553 ArrayList<Object> operands = (ArrayList<Object>) termGroup.get("operands");
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000554 // recursion with left/right operands
Joachim Bingel3a41a442014-07-22 12:16:16 +0000555 operands.add(parseTermOrTermGroup(leftOp, negatedGlobal, mode));
556 operands.add(parseTermOrTermGroup(rightOp, negatedGlobal, mode));
Joachim Bingel0900a892014-06-30 16:26:21 +0000557 return termGroup;
558 }
559 }
560
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000561 /**
562 * Puts an object into the operands list of its governing (or "super") object which had been placed on the
563 * {@link #objectStack} before and is still on top of the stack. If this is the top object of the tree, it is put there
564 * instead of into some (non-existent) operand stack.
565 * @param object The object to be inserted
566 */
Joachim Bingel0900a892014-06-30 16:26:21 +0000567 private void putIntoSuperObject(LinkedHashMap<String, Object> object) {
Joachim Bingel33bd45f2014-06-25 15:00:54 +0000568 putIntoSuperObject(object, 0);
569 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000570
Joachim Bingelb2aa1c72014-07-01 12:48:46 +0000571 /**
572 * Puts an object into the operands list of its governing (or "super") object which had been placed on the
573 * {@link #objectStack} before. If this is the top object of the tree, it is put there
574 * instead of into some (non-existent) operand stack.
575 * @param object The object to be inserted
576 * @param objStackPosition Indicated the position of the super object on the {@link #objectStack} (in case not the top
577 * element of the stack is the super object.
578 */
Joachim Bingel33bd45f2014-06-25 15:00:54 +0000579 @SuppressWarnings({ "unchecked" })
580 private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) {
Joachim Bingel0900a892014-06-30 16:26:21 +0000581 if (objectStack.size()>objStackPosition) {
Joachim Bingel33bd45f2014-06-25 15:00:54 +0000582 ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands");
583 topObjectOperands.add(object);
Joachim Bingel33bd45f2014-06-25 15:00:54 +0000584 } else {
Joachim Bingel0900a892014-06-30 16:26:21 +0000585 requestMap.put("query", object);
Joachim Bingel33bd45f2014-06-25 15:00:54 +0000586 }
587 }
588
Joachim Bingel14239d82014-07-22 09:55:04 +0000589 /**
590 * Basically only increases the min and max counters as required by Poliqarp
591 * @param distanceNode
592 * @return
593 */
Joachim Bingel687e4d42014-07-30 09:34:18 +0000594 private Integer[] parseDistance(ParseTree distanceNode) {
595 Integer[] minmax = parseEmptySegments(distanceNode.getChild(0));
Joachim Bingel14239d82014-07-22 09:55:04 +0000596 Integer min = minmax[0];
597 Integer max = minmax[1];
Joachim Bingel687e4d42014-07-30 09:34:18 +0000598 min++;
599 if (max != null) max++;
600// min = cropToMaxValue(min);
601// max = cropToMaxValue(max);
602 return new Integer[]{min, max};
Joachim Bingel14239d82014-07-22 09:55:04 +0000603 }
604
Joachim Bingel687e4d42014-07-30 09:34:18 +0000605 private Integer[] parseEmptySegments(ParseTree emptySegments) {
Joachim Bingel14239d82014-07-22 09:55:04 +0000606 Integer min = 0;
607 Integer max = 0;
Joachim Bingel0900a892014-06-30 16:26:21 +0000608 ParseTree child;
609 for (int i = 0; i < emptySegments.getChildCount(); i++) {
610 child = emptySegments.getChild(i);
611 ParseTree nextSibling = emptySegments.getChild(i + 1);
612 if (child.toStringTree(parser).equals("(emptyToken [ ])")) {
613 if (nextSibling != null && getNodeCat(nextSibling).equals("repetition")) {
Joachim Bingel61631562014-07-24 14:26:02 +0000614 Integer[] minmax = parseRepetition(nextSibling);
Joachim Bingel0900a892014-06-30 16:26:21 +0000615 min += minmax[0];
Joachim Bingel687e4d42014-07-30 09:34:18 +0000616 if (minmax[1] != null) {
617 max += minmax[1];
618 } else {
619 max = null;
620 }
Joachim Bingel0900a892014-06-30 16:26:21 +0000621 } else {
622 min++;
623 max++;
624 }
Joachim Bingelbef23622014-04-29 15:04:02 +0000625 }
Joachim Bingelbef23622014-04-29 15:04:02 +0000626 }
Joachim Bingel687e4d42014-07-30 09:34:18 +0000627// min = cropToMaxValue(min);
628// max = cropToMaxValue(max);
629 return new Integer[]{min, max};
Joachim Bingelbef23622014-04-29 15:04:02 +0000630 }
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000631
Joachim Bingel14239d82014-07-22 09:55:04 +0000632
Joachim Bingel0900a892014-06-30 16:26:21 +0000633 private ParserRuleContext parsePoliqarpQuery(String p) throws QueryException {
634 checkUnbalancedPars(p);
Joachim Bingel0900a892014-06-30 16:26:21 +0000635 Lexer poliqarpLexer = new PoliqarpPlusLexer((CharStream) null);
636 ParserRuleContext tree = null;
Joachim Bingel0900a892014-06-30 16:26:21 +0000637 // Like p. 111
638 try {
Joachim Bingel0900a892014-06-30 16:26:21 +0000639 // Tokenize input data
640 ANTLRInputStream input = new ANTLRInputStream(p);
641 poliqarpLexer.setInputStream(input);
642 CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer);
643 parser = new PoliqarpPlusParser(tokens);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000644
Joachim Bingel0900a892014-06-30 16:26:21 +0000645 // Don't throw out erroneous stuff
646 parser.setErrorHandler(new BailErrorStrategy());
647 parser.removeErrorListeners();
Joachim Bingel4b405f52013-11-15 15:29:30 +0000648
Joachim Bingel0900a892014-06-30 16:26:21 +0000649 // Get starting rule from parser
650 Method startRule = PoliqarpPlusParser.class.getMethod("request");
651 tree = (ParserRuleContext) startRule.invoke(parser, (Object[]) null);
652 log.debug(tree.toStringTree(parser));
653 }
654 // Some things went wrong ...
655 catch (Exception e) {
656 log.error("Could not parse query. Please make sure it is well-formed.");;
657 log.error("Underlying error is: "+e.getMessage());
658 System.err.println(e.getMessage());
659 }
Michael Hanld8116e52014-04-25 20:31:29 +0000660
Joachim Bingel0900a892014-06-30 16:26:21 +0000661 if (tree == null) {
662 throw new QueryException("The query you specified could not be processed. Please make sure it is well-formed.");
663 }
664 // Return the generated tree
665 return tree;
666 }
Nils Diewald4128a922014-07-18 14:39:24 +0000667}