blob: b4f22a19e2c626cf10c65b1fdc9421f9b30f3aed [file] [log] [blame]
Joachim Bingel4b405f52013-11-15 15:29:30 +00001package de.ids_mannheim.korap.query.serialize;
2
3import java.lang.reflect.Method;
4import java.util.ArrayList;
5import java.util.HashMap;
6import java.util.LinkedHashMap;
7import java.util.LinkedList;
8import java.util.List;
9import java.util.Map;
Joachim Bingel4b405f52013-11-15 15:29:30 +000010
11import org.antlr.v4.runtime.ANTLRInputStream;
12import org.antlr.v4.runtime.BailErrorStrategy;
13import org.antlr.v4.runtime.CharStream;
14import org.antlr.v4.runtime.CommonTokenStream;
15import org.antlr.v4.runtime.Lexer;
16import org.antlr.v4.runtime.Parser;
17import org.antlr.v4.runtime.ParserRuleContext;
18import org.antlr.v4.runtime.tree.ParseTree;
19
Joachim Bingelb5ada902013-11-19 14:46:04 +000020import de.ids_mannheim.korap.query.PoliqarpPlusLexer;
21import de.ids_mannheim.korap.query.PoliqarpPlusParser;
22import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree;
Joachim Bingel16da4e12013-12-17 09:48:12 +000023import de.ids_mannheim.korap.util.QueryException;
Joachim Bingel4b405f52013-11-15 15:29:30 +000024
Michael Hanl27e50582013-12-07 18:04:13 +000025import org.slf4j.Logger;
26import org.slf4j.LoggerFactory;
27
Joachim Bingel4b405f52013-11-15 15:29:30 +000028/**
29 * Map representation of Poliqarp syntax tree as returned by ANTLR
30 * @author joachim
31 *
32 */
33public class PoliqarpPlusTree extends AbstractSyntaxTree {
Michael Hanl27e50582013-12-07 18:04:13 +000034
35 Logger log = LoggerFactory.getLogger(PoliqarpPlusTree.class);
Joachim Bingel4b405f52013-11-15 15:29:30 +000036 /**
37 * Top-level map representing the whole request.
38 */
39 LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>();
40 /**
41 * Keeps track of open node categories
42 */
43 LinkedList<String> openNodeCats = new LinkedList<String>();
44 /**
45 * Flag that indicates whether token fields or meta fields are currently being processed
46 */
47 boolean inMeta = false;
48 /**
49 * Flag that indicates whether a cq_segment is to be ignored (e.g. when it is empty, is followed directly by only a spanclass and has no other children etc...).
50 */
51 boolean ignoreCq_segment = false;
52 /**
Joachim Bingela14e13a2013-12-04 15:59:07 +000053 * Flag that indicates whether a cq_segments element is quantified by an occ element.
54 */
Joachim Bingel94a1ccd2013-12-10 10:37:29 +000055 boolean cqHasOccSibling = false;
56 /**
57 * Flag that indicates whether a cq_segments' children are quantified by an occ element.
58 */
59 boolean cqHasOccChild = false;
Joachim Bingela14e13a2013-12-04 15:59:07 +000060 /**
Joachim Bingelf143ac92013-12-04 18:52:54 +000061 * Flag for negation of complete field
62 */
63 boolean negField = false;
64 /**
Joachim Bingela67e6a32014-01-02 18:35:24 +000065 * Flag that indicates whether subsequent element is to be aligned.
66 */
67 boolean alignNext = false;
68 /**
69 * Flag that indicates whether current element has been aligned.
70 */
71 boolean isAligned = false;
72 /**
73 * Indicates a sequence which has an align operator as its child. Needed for deciding
74 * when to close the align group object.
75 */
76// ParseTree alignedSequence = null;
77 /**
Joachim Bingel4b405f52013-11-15 15:29:30 +000078 * Parser object deriving the ANTLR parse tree.
79 */
80 static Parser poliqarpParser;
81 /**
82 * Keeps track of all visited nodes in a tree
83 */
84 List<ParseTree> visited = new ArrayList<ParseTree>();
85
86 /**
87 * Keeps track of active fields (like 'base=foo').
88 */
89 LinkedList<ArrayList<Object>> fieldStack = new LinkedList<ArrayList<Object>>();
90 /**
Joachim Bingel4b405f52013-11-15 15:29:30 +000091 * Keeps track of active tokens.
92 */
93 LinkedList<LinkedHashMap<String,Object>> tokenStack = new LinkedList<LinkedHashMap<String,Object>>();
94 /**
Joachim Bingel4b405f52013-11-15 15:29:30 +000095 * Marks the currently active token in order to know where to add flags (might already have been taken away from token stack).
96 */
97 LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>();
Joachim Bingelf921b212013-11-20 16:54:38 +000098 /**
99 * Keeps track of active object.
100 */
Joachim Bingela14e13a2013-12-04 15:59:07 +0000101 LinkedList<LinkedHashMap<String,Object>> objectStack = new LinkedList<LinkedHashMap<String,Object>>();
102 /**
103 * Marks the object to which following occurrence information is to be added.
104 */
105 LinkedHashMap<String,Object> curOccGroup = new LinkedHashMap<String,Object>();
Joachim Bingel1417e192013-12-04 16:33:07 +0000106 /**
107 * Keeps track of how many objects there are to pop after every recursion of {@link #processNode(ParseTree)}
108 */
109 LinkedList<Integer> objectsToPop = new LinkedList<Integer>();
110 /**
111 * Keeps track of how many objects there are to pop after every recursion of {@link #processNode(ParseTree)}
112 */
113 LinkedList<Integer> tokensToPop = new LinkedList<Integer>();
114 /**
115 * Keeps track of how many objects there are to pop after every recursion of {@link #processNode(ParseTree)}
116 */
117 LinkedList<Integer> fieldsToPop = new LinkedList<Integer>();
Joachim Bingelf84bd622013-12-13 10:34:04 +0000118 /**
119 * If true, print debug statements
120 */
121 public static boolean debug = false;
122 /**
123 * Index of the current child of its parent (needed for relating occ elements to their operands).
124 */
125 int curChildIndex = 0;
126 /**
127 *
128 */
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000129 Integer stackedObjects = 0;
130 Integer stackedTokens= 0;
131 Integer stackedFields = 0;
Joachim Bingel1417e192013-12-04 16:33:07 +0000132
Joachim Bingel4b405f52013-11-15 15:29:30 +0000133
134 /**
Joachim Bingela14e13a2013-12-04 15:59:07 +0000135 * Most centrally, this class maintains a set of nested maps and lists which represent the JSON tree, which is built by the JSON serialiser
136 * on basis of the {@link #requestMap} at the root of the tree.
137 * <br/>
138 * The class further maintains a set of stacks which effectively keep track of which objects to embed in which containing objects.
Joachim Bingel4b405f52013-11-15 15:29:30 +0000139 *
Michael Hanl27e50582013-12-07 18:04:13 +0000140 * @param query The syntax tree as returned by ANTLR
Joachim Bingel16da4e12013-12-17 09:48:12 +0000141 * @throws QueryException
Joachim Bingel4b405f52013-11-15 15:29:30 +0000142 */
Joachim Bingel8c640e42014-02-07 16:20:47 +0000143 public PoliqarpPlusTree(String query) throws QueryException {
Joachim Bingelf84bd622013-12-13 10:34:04 +0000144 try {
Joachim Bingelb4da7022013-12-09 23:17:24 +0000145 process(query);
Joachim Bingelf84bd622013-12-13 10:34:04 +0000146 } catch (NullPointerException e) {
147 if (query.contains(" ")) {
148 System.err.println("Warning: It seems like your query contains illegal whitespace characters. Trying again with whitespaces removed...");
149 query = query.replaceAll(" ", "");
150 process(query);
151 } else {
Joachim Bingel16da4e12013-12-17 09:48:12 +0000152 try {
153 throw new QueryException("Error handling query.");
154 } catch (QueryException e1) {
155 e1.printStackTrace();
156 System.exit(1);
157 }
Joachim Bingelf84bd622013-12-13 10:34:04 +0000158 }
159 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000160 System.out.println(">>> "+requestMap.get("query")+" <<<");
Michael Hanl27e50582013-12-07 18:04:13 +0000161 log.info(">>> " + requestMap.get("query") + " <<<");
Joachim Bingel4b405f52013-11-15 15:29:30 +0000162 }
163
Joachim Bingel4b405f52013-11-15 15:29:30 +0000164 @Override
165 public Map<String, Object> getRequestMap() {
Joachim Bingel593964f2013-11-29 16:45:47 +0000166 return requestMap;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000167 }
168
169 @Override
Joachim Bingel8c640e42014-02-07 16:20:47 +0000170 public void process(String query) throws QueryException {
Joachim Bingel16da4e12013-12-17 09:48:12 +0000171 ParseTree tree = null;
Joachim Bingel2f7e0d82013-12-08 23:10:41 +0000172 try {
173 tree = parsePoliqarpQuery(query);
Joachim Bingel16da4e12013-12-17 09:48:12 +0000174 } catch (QueryException e) {
175 try {
176 tree = parsePoliqarpQuery(query.replaceAll(" ", ""));
177 } catch (QueryException e1) {
178 System.exit(1);
179 }
Joachim Bingel2f7e0d82013-12-08 23:10:41 +0000180 }
Joachim Bingelf143ac92013-12-04 18:52:54 +0000181 System.out.println("Processing PoliqarpPlus");
Joachim Bingel2daf9862014-02-12 10:18:54 +0000182 requestMap.put("context", "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld");
183// QueryUtils.prepareContext(requestMap);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000184 processNode(tree);
185 }
186
Joachim Bingela14e13a2013-12-04 15:59:07 +0000187 /**
188 * Recursively calls itself with the children of the currently active node, traversing the tree nodes in a top-down, depth-first fashion.
189 * A list is maintained that contains all visited nodes
190 * in case they have been directly addressed by its (grand-/grand-grand-/...) parent node, such that some processing time is saved, as these node will
191 * not be processed. This method is effectively a list of if-statements that are responsible for treating the different node types correctly and filling the
192 * respective maps/lists.
193 *
194 * @param node The currently processed node. The process(String query) method calls this method with the root.
Joachim Bingel8c640e42014-02-07 16:20:47 +0000195 * @throws QueryException
Joachim Bingela14e13a2013-12-04 15:59:07 +0000196 */
Joachim Bingel4b405f52013-11-15 15:29:30 +0000197 @SuppressWarnings("unchecked")
Joachim Bingel8c640e42014-02-07 16:20:47 +0000198 private void processNode(ParseTree node) throws QueryException {
Joachim Bingel4b405f52013-11-15 15:29:30 +0000199 // Top-down processing
200 if (visited.contains(node)) return;
201 else visited.add(node);
202
Joachim Bingela67e6a32014-01-02 18:35:24 +0000203 if (alignNext) {
204 alignNext=false;
205 isAligned=true;
206 }
207
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000208 String nodeCat = QueryUtils.getNodeCat(node);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000209 openNodeCats.push(nodeCat);
210
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000211 stackedObjects = 0;
212 stackedTokens= 0;
213 stackedFields = 0;
Joachim Bingel2f7e0d82013-12-08 23:10:41 +0000214
Joachim Bingel03882702013-12-31 19:53:05 +0000215 if (debug) {
Joachim Bingel2f7e0d82013-12-08 23:10:41 +0000216 System.err.println(" "+objectStack);
217 System.err.println(" "+tokenStack);
218 System.out.println(openNodeCats);
219 }
Joachim Bingelf921b212013-11-20 16:54:38 +0000220
Joachim Bingel4b405f52013-11-15 15:29:30 +0000221
222 /*
223 ****************************************************************
224 ****************************************************************
225 * Processing individual node categories *
226 ****************************************************************
227 ****************************************************************
228 */
Joachim Bingel4b405f52013-11-15 15:29:30 +0000229
230 // cq_segments/sq_segments: token group
231 if (nodeCat.equals("cq_segments") || nodeCat.equals("sq_segments")) {
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000232 cqHasOccSibling = false;
233 cqHasOccChild = false;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000234 // disregard empty segments in simple queries (parsed by ANTLR as empty cq_segments)
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000235 ignoreCq_segment = (node.getChildCount() == 1 && (node.getChild(0).toStringTree(poliqarpParser).equals(" ") || QueryUtils.getNodeCat(node.getChild(0)).equals("spanclass") || QueryUtils.getNodeCat(node.getChild(0)).equals("position")));
Joachim Bingela67e6a32014-01-02 18:35:24 +0000236 // ignore this node if it only serves as an aligned sequence container
237 if (node.getChildCount()>1) {
Joachim Bingel84e33df2014-01-31 14:02:46 +0000238 if (QueryUtils.getNodeCat(node.getChild(1)).equals("cq_segments") && QueryUtils.hasChild(node.getChild(1), "alignment")) {
239// if (QueryUtils.getNodeCat(node.getChild(0)).equals("align")) {
Joachim Bingela67e6a32014-01-02 18:35:24 +0000240 ignoreCq_segment = true;
241 }
242 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000243 if (!ignoreCq_segment) {
244 LinkedHashMap<String,Object> sequence = new LinkedHashMap<String,Object>();
Joachim Bingela14e13a2013-12-04 15:59:07 +0000245 // Step 0: cq_segments has 'occ' child -> introduce group as super group to the sequence/token/group
246 // this requires creating a group and inserting it at a suitable place
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000247 if (node.getParent().getChildCount()>curChildIndex+2 && QueryUtils.getNodeCat(node.getParent().getChild(curChildIndex+2)).equals("occ")) {
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000248 cqHasOccSibling = true;
249 createOccGroup(node);
250 }
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000251 if (QueryUtils.getNodeCat(node.getChild(node.getChildCount()-1)).equals("occ")) {
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000252 cqHasOccChild = true;
253 }
254 // Step I: decide type of element (one or more elements? -> token or sequence)
255 // take into account a possible 'occ' child with accompanying parantheses, therefore 3 extra children
256 int occExtraChildren = cqHasOccChild ? 3:0;
257 if (node.getChildCount()>1 + occExtraChildren) {
Joachim Bingelffd65e32014-01-22 14:22:57 +0000258 ParseTree emptySegments = QueryUtils.getFirstChildWithCat(node, "empty_segments");
259 if (emptySegments != null && emptySegments != node.getChild(0)) {
260 String[] minmax = parseEmptySegments(emptySegments);
261 Integer min = Integer.parseInt(minmax[0]);
262 Integer max = Integer.parseInt(minmax[1]);
Joachim Bingel2daf9862014-02-12 10:18:54 +0000263 sequence.put("@type", "korap:group");
264 sequence.put("operation", "operation:"+"sequence");
265// sequence.put("operation", "operation:"+"distance");
266 sequence.put("inOrder", true);
Joachim Bingelffd65e32014-01-22 14:22:57 +0000267 ArrayList<Object> constraint = new ArrayList<Object>();
Joachim Bingel11d5b152014-02-11 21:33:47 +0000268 sequence.put("distances", constraint);
Joachim Bingelffd65e32014-01-22 14:22:57 +0000269 ArrayList<Object> sequenceOperands = new ArrayList<Object>();
Joachim Bingel11d5b152014-02-11 21:33:47 +0000270 sequence.put("operands", sequenceOperands);
Joachim Bingelffd65e32014-01-22 14:22:57 +0000271 objectStack.push(sequence);
272 stackedObjects++;
273 LinkedHashMap<String, Object> distMap = new LinkedHashMap<String, Object>();
274 constraint.add(distMap);
Joachim Bingel2daf9862014-02-12 10:18:54 +0000275 distMap.put("@type", "korap:distance");
276 distMap.put("key", "w");
Joachim Bingelffd65e32014-01-22 14:22:57 +0000277 distMap.put("min", min);
278 distMap.put("max", max);
279 } else {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000280 sequence.put("@type", "korap:group");
281 sequence.put("operation", "operation:"+"sequence");
Joachim Bingelffd65e32014-01-22 14:22:57 +0000282 ArrayList<Object> sequenceOperands = new ArrayList<Object>();
283 if (emptySegments != null) {
284 String[] minmax = parseEmptySegments(emptySegments);
285 Integer min = Integer.parseInt(minmax[0]);
286 Integer max = Integer.parseInt(minmax[1]);
287 sequence.put("offset-min", min);
288 sequence.put("offset-max", max);
289 }
Joachim Bingel11d5b152014-02-11 21:33:47 +0000290 sequence.put("operands", sequenceOperands);
Joachim Bingelffd65e32014-01-22 14:22:57 +0000291 objectStack.push(sequence);
292 stackedObjects++;
293 }
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000294 } else {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000295 // if only child, make the sequence a mere token...
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000296 // ... but only if it has a real token/element beneath it
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000297 if (QueryUtils.getNodeCat(node.getChild(0)).equals("cq_segment")
298 || QueryUtils.getNodeCat(node.getChild(0)).equals("sq_segment")
299 || QueryUtils.getNodeCat(node.getChild(0)).equals("element") ) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000300 sequence.put("@type", "korap:token");
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000301 tokenStack.push(sequence);
302 stackedTokens++;
303 objectStack.push(sequence);
304 stackedObjects++;
Joachim Bingel16da4e12013-12-17 09:48:12 +0000305 // else, it's a group (with shrink()/spanclass/align... as child)
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000306 } else {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000307 sequence.put("@type", "korap:group");
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000308 }
309 }
310 // Step II: decide where to put this element
311 // check if this is an argument for a containing occurrence group (see step 0)
312 if (cqHasOccSibling) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000313 ArrayList<Object> topGroupOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000314 topGroupOperands.add(sequence);
315 // ...if not modified by occurrence, put into suitable super structure
316 } else {
Joachim Bingela14e13a2013-12-04 15:59:07 +0000317 if (openNodeCats.get(1).equals("query")) {
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000318 // cq_segment is top query node
319 if (node.getParent().getChildCount()==1) {
Joachim Bingel1417e192013-12-04 16:33:07 +0000320 // only child
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000321 requestMap.put("query", sequence);
Joachim Bingela14e13a2013-12-04 15:59:07 +0000322 } else {
Joachim Bingel1417e192013-12-04 16:33:07 +0000323 // not an only child, need to create containing sequence
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000324 if (node.getParent().getChild(0).equals(node)) {
Joachim Bingel1417e192013-12-04 16:33:07 +0000325 // if first child, create containing sequence and embed there
326 LinkedHashMap<String,Object> superSequence = new LinkedHashMap<String,Object>();
Joachim Bingel2daf9862014-02-12 10:18:54 +0000327 superSequence.put("@type", "korap:group");
328 superSequence.put("operation", "operation:"+"sequence");
Joachim Bingel1417e192013-12-04 16:33:07 +0000329 ArrayList<Object> operands = new ArrayList<Object>();
Joachim Bingel11d5b152014-02-11 21:33:47 +0000330 superSequence.put("operands", operands);
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000331 operands.add(sequence);
Joachim Bingel1417e192013-12-04 16:33:07 +0000332 requestMap.put("query", superSequence);
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000333 objectStack.push(superSequence); // add at 2nd position to keep current cq_segment accessible
334 stackedObjects++;
Joachim Bingel1417e192013-12-04 16:33:07 +0000335 } else {
336 // if not first child, add to previously created parent sequence
Joachim Bingel11d5b152014-02-11 21:33:47 +0000337 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000338 topSequenceOperands.add(sequence);
Joachim Bingel1417e192013-12-04 16:33:07 +0000339 }
Joachim Bingela14e13a2013-12-04 15:59:07 +0000340 }
341 } else if (!objectStack.isEmpty()){
342 // embed in super sequence
Joachim Bingel11d5b152014-02-11 21:33:47 +0000343 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000344 topSequenceOperands.add(sequence);
Joachim Bingela14e13a2013-12-04 15:59:07 +0000345 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000346 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000347 }
348 }
349
350 // cq_segment
351 if (nodeCat.equals("cq_segment")) {
Joachim Bingel666c2652013-12-08 15:07:29 +0000352 int onTopOfObjectStack = 0;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000353 // Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
354 LinkedHashMap<String, Object> token;
355 if (tokenStack.isEmpty()) {
356 token = new LinkedHashMap<String, Object>();
357 tokenStack.push(token);
Joachim Bingel1417e192013-12-04 16:33:07 +0000358 stackedTokens++;
Joachim Bingelf143ac92013-12-04 18:52:54 +0000359 // do this only if token is newly created, otherwise it'll be in objectStack twice
360 objectStack.push(token);
Joachim Bingel666c2652013-12-08 15:07:29 +0000361 onTopOfObjectStack = 1;
Joachim Bingelf143ac92013-12-04 18:52:54 +0000362 stackedObjects++;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000363 } else {
364 // in case cq_segments has already added the token
365 token = tokenStack.getFirst();
366 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000367 curToken = token;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000368 // Step II: start filling object and add to containing sequence
Joachim Bingel2daf9862014-02-12 10:18:54 +0000369 token.put("@type", "korap:token");
Joachim Bingel4b405f52013-11-15 15:29:30 +0000370 // add token to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
Joachim Bingela14e13a2013-12-04 15:59:07 +0000371 // take into account a possible 'occ' child
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000372 if (node.getParent().getChildCount()>1) {
Joachim Bingel87480d02014-01-17 14:07:46 +0000373 if (node.getText().equals("[]")) {
Joachim Bingelffd65e32014-01-22 14:22:57 +0000374// LinkedHashMap<String, Object> sequence = objectStack.get(onTopOfObjectStack);
375// String offsetStr = (String) sequence.get("offset");
376// if (offsetStr == null) {
377// sequence.put("offset", "1");
378// } else {
379// Integer offset = Integer.parseInt(offsetStr);
380// sequence.put("offset", offset+1);
381// }
382//
Joachim Bingel87480d02014-01-17 14:07:46 +0000383 } else {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000384 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(onTopOfObjectStack).get("operands");
Joachim Bingel87480d02014-01-17 14:07:46 +0000385 topSequenceOperands.add(token);
386 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000387 }
388 }
Joachim Bingelb4da7022013-12-09 23:17:24 +0000389
390 // cq_segment modified by occurrence
391 if (nodeCat.equals("cq_seg_occ")) {
392 LinkedHashMap<String,Object> group = new LinkedHashMap<String,Object>();
393 curOccGroup = group;
Joachim Bingel2daf9862014-02-12 10:18:54 +0000394 group.put("@type", "korap:group");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000395 group.put("operands", new ArrayList<Object>());
Joachim Bingelb4da7022013-12-09 23:17:24 +0000396 objectStack.push(group);
397 stackedObjects++;
398 // add group to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
399 // take into account a possible 'occ' child
400 if (node.getParent().getChildCount()>1) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000401 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingelb4da7022013-12-09 23:17:24 +0000402 topSequenceOperands.add(group);
403 } else {
404 requestMap.put("query", group);
405 }
406 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000407
408 // disjoint cq_segments, like ([base=foo][base=bar])|[base=foobar]
409 if (nodeCat.equals("cq_disj_segments")) {
410 LinkedHashMap<String,Object> disjunction = new LinkedHashMap<String,Object>();
Joachim Bingelf921b212013-11-20 16:54:38 +0000411 objectStack.push(disjunction);
Joachim Bingel1417e192013-12-04 16:33:07 +0000412 stackedObjects++;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000413 ArrayList<Object> disjOperands = new ArrayList<Object>();
Joachim Bingel2daf9862014-02-12 10:18:54 +0000414 disjunction.put("@type", "korap:group");
415 disjunction.put("operation", "operation:"+"or");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000416 disjunction.put("operands", disjOperands);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000417 // decide where to put the disjunction
418 if (openNodeCats.get(1).equals("query")) {
419 requestMap.put("query", disjunction);
420 } else if (openNodeCats.get(1).equals("cq_segments")) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000421 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingel4b405f52013-11-15 15:29:30 +0000422 topSequenceOperands.add(disjunction);
423 }
424 }
425
426 // field element (outside meta)
427 if (nodeCat.equals("field")) {
428 LinkedHashMap<String,Object> fieldMap = new LinkedHashMap<String,Object>();
Joachim Bingel4b405f52013-11-15 15:29:30 +0000429 // Step I: extract info
Joachim Bingel81dd65b2013-12-07 17:55:04 +0000430 String fieldName = "";
431 ParseTree fieldNameNode = node.getChild(0);
432 if (fieldNameNode.getChildCount() == 1) {
433 fieldName = fieldNameNode.getChild(0).toStringTree(poliqarpParser); //e.g. (field_name base) (field_op !=) (re_query "bar*")
434 } else if (fieldNameNode.getChildCount() == 3) {
435 // layer is indicated, merge layer and field name (0th and 2nd children, 1st is "/")
436 String layer = fieldNameNode.getChild(0).toStringTree(poliqarpParser);
Joachim Bingel11d5b152014-02-11 21:33:47 +0000437 if (layer.equals("base")) layer="lemma";
Joachim Bingel81dd65b2013-12-07 17:55:04 +0000438 String layeredFieldName = fieldNameNode.getChild(2).toStringTree(poliqarpParser);
439 fieldName = layer+"/"+layeredFieldName;
440 }
441
Joachim Bingel4b405f52013-11-15 15:29:30 +0000442 String relation = node.getChild(1).getChild(0).toStringTree(poliqarpParser);
Joachim Bingelf143ac92013-12-04 18:52:54 +0000443 if (negField) {
444 if (relation.startsWith("!")) {
445 relation = relation.substring(1);
446 } else {
447 relation = "!"+relation;
448 }
449 }
Joachim Bingel11d5b152014-02-11 21:33:47 +0000450 if (relation.equals("=")) {
451 relation="eq";
452 }
453 if (relation.equals("!=")) {
454 relation="ne";
455 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000456 String value = "";
457 ParseTree valNode = node.getChild(2);
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000458 String valType = QueryUtils.getNodeCat(valNode);
Joachim Bingel2daf9862014-02-12 10:18:54 +0000459 fieldMap.put("@type", "korap:term");
Joachim Bingel4b405f52013-11-15 15:29:30 +0000460 if (valType.equals("simple_query")) {
461 value = valNode.getChild(0).getChild(0).toStringTree(poliqarpParser); //e.g. (simple_query (sq_segment foo))
462 } else if (valType.equals("re_query")) {
463 value = valNode.getChild(0).toStringTree(poliqarpParser); //e.g. (re_query "bar*")
Joachim Bingelbc537b62014-02-12 12:47:45 +0000464 fieldMap.put("type", "type:regex");
Joachim Bingel41e112e2014-02-12 10:46:18 +0000465 value = value.substring(1,value.length()-1); //remove trailing quotes
Joachim Bingel4b405f52013-11-15 15:29:30 +0000466 }
Joachim Bingel11d5b152014-02-11 21:33:47 +0000467 fieldMap.put("key", value);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000468 if (fieldName.contains("/")) {
469 String[] splitted = fieldName.split("/");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000470 fieldMap.put("layer", splitted[1]);
471 fieldMap.put("foundry", splitted[0]);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000472 } else {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000473 if (fieldName.equals("base")) fieldName = "lemma";
474 fieldMap.put("layer", fieldName);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000475 }
Joachim Bingelee3b21d2014-02-12 12:34:59 +0000476 fieldMap.put("match", "match:"+relation);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000477 // Step II: decide where to put the field map (as the only value of a token or the meta filter or as a part of a group in case of coordinated fields)
478 if (fieldStack.isEmpty()) {
479 if (!inMeta) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000480 tokenStack.getFirst().put("wrap", fieldMap);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000481 } else {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000482 ((HashMap<String, Object>) requestMap.get("meta")).put("key", fieldMap);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000483 }
484 } else {
485 fieldStack.getFirst().add(fieldMap);
486 }
487 visited.add(node.getChild(0));
488 visited.add(node.getChild(1));
489 visited.add(node.getChild(2));
490 }
491
Joachim Bingelf143ac92013-12-04 18:52:54 +0000492 if (nodeCat.equals("neg_field") || nodeCat.equals("neg_field_group")) {
493 negField=!negField;
494 }
495
Joachim Bingel4b405f52013-11-15 15:29:30 +0000496 // conj_field serves for both conjunctions and disjunctions
497 if (nodeCat.equals("conj_field")) {
498 LinkedHashMap<String,Object> group = new LinkedHashMap<String,Object>();
Joachim Bingelee3b21d2014-02-12 12:34:59 +0000499
500 group.put("@type", "korap:termGroup");
501
Joachim Bingel4b405f52013-11-15 15:29:30 +0000502 // Step I: get operator (& or |)
503 ParseTree operatorNode = node.getChild(1).getChild(0);
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000504 String operator = QueryUtils.getNodeCat(operatorNode);
Joachim Bingelf143ac92013-12-04 18:52:54 +0000505 String relation = operator.equals("&") ? "and" : "or";
506 if (negField) {
507 relation = relation.equals("or") ? "and": "or";
Joachim Bingel4b405f52013-11-15 15:29:30 +0000508 }
Joachim Bingelee3b21d2014-02-12 12:34:59 +0000509 group.put("relation", relation);
510 ArrayList<Object> groupOperands = new ArrayList<Object>();
511 group.put("operands", groupOperands);
512 fieldStack.push(groupOperands);
513 stackedFields++;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000514 // Step II: decide where to put the group (directly under token or in top meta filter section or embed in super group)
515 if (openNodeCats.get(1).equals("cq_segment")) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000516 tokenStack.getFirst().put("wrap", group);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000517 } else if (openNodeCats.get(1).equals("meta_field_group")) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000518 ((HashMap<String, Object>) requestMap.get("meta")).put("key", group);
Joachim Bingelf143ac92013-12-04 18:52:54 +0000519 } else if (openNodeCats.get(2).equals("conj_field")) {
Joachim Bingel4b405f52013-11-15 15:29:30 +0000520 fieldStack.get(1).add(group);
Joachim Bingelf143ac92013-12-04 18:52:54 +0000521 } else {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000522 tokenStack.getFirst().put("wrap", group);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000523 }
524 // skip the operator
525 visited.add(node.getChild(1));
526 }
527
528
529 if (nodeCat.equals("sq_segment")) {
530 // Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
531 LinkedHashMap<String, Object> token;
532 if (tokenStack.isEmpty()) {
533 token = new LinkedHashMap<String, Object>();
534 tokenStack.push(token);
Joachim Bingel1417e192013-12-04 16:33:07 +0000535 stackedTokens++;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000536 } else {
537 // in case sq_segments has already added the token
538 token = tokenStack.getFirst();
539 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000540 curToken = token;
Joachim Bingelf921b212013-11-20 16:54:38 +0000541 objectStack.push(token);
Joachim Bingel1417e192013-12-04 16:33:07 +0000542 stackedObjects++;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000543 // Step II: fill object (token values) and put into containing sequence
Joachim Bingel87480d02014-01-17 14:07:46 +0000544 if (node.getText().equals("[]")) {
545
546 } else {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000547 token.put("@type", "korap:token");
Joachim Bingel87480d02014-01-17 14:07:46 +0000548 String word = node.getChild(0).toStringTree(poliqarpParser);
549 LinkedHashMap<String,Object> tokenValues = new LinkedHashMap<String,Object>();
Joachim Bingel2daf9862014-02-12 10:18:54 +0000550 token.put("wrap", tokenValues);
551 tokenValues.put("@type", "korap:term");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000552 tokenValues.put("key", word);
553 tokenValues.put("layer", "orth");
Joachim Bingelee3b21d2014-02-12 12:34:59 +0000554 tokenValues.put("match", "match:"+"eq");
Joachim Bingel87480d02014-01-17 14:07:46 +0000555 // add token to sequence only if it is not an only child (in that case, sq_segments has already added the info and is just waiting for the values from "field")
556 if (node.getParent().getChildCount()>1) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000557 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingel87480d02014-01-17 14:07:46 +0000558 topSequenceOperands.add(token);
559 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000560 }
Joachim Bingela67e6a32014-01-02 18:35:24 +0000561 visited.add(node.getChild(0));
Joachim Bingel4b405f52013-11-15 15:29:30 +0000562 }
563
Joachim Bingelf143ac92013-12-04 18:52:54 +0000564 if (nodeCat.equals("re_query")) {
565 LinkedHashMap<String,Object> reQuery = new LinkedHashMap<String,Object>();
Joachim Bingelbc537b62014-02-12 12:47:45 +0000566 reQuery.put("type", "type:regex");
Joachim Bingelf143ac92013-12-04 18:52:54 +0000567 String regex = node.getChild(0).toStringTree(poliqarpParser);
Joachim Bingel11d5b152014-02-11 21:33:47 +0000568 reQuery.put("key", regex);
Joachim Bingelee3b21d2014-02-12 12:34:59 +0000569 reQuery.put("match", "match:"+"eq");
Joachim Bingelf143ac92013-12-04 18:52:54 +0000570
Joachim Bingele140cad2013-12-06 12:15:11 +0000571 // if in field, regex was already added there
Joachim Bingelf143ac92013-12-04 18:52:54 +0000572 if (!openNodeCats.get(1).equals("field")) {
573 LinkedHashMap<String,Object> token = new LinkedHashMap<String,Object>();
Joachim Bingel2daf9862014-02-12 10:18:54 +0000574 token.put("@type", "korap:token");
575 token.put("wrap", reQuery);
576 reQuery.put("@type", "korap:term");
Joachim Bingelf143ac92013-12-04 18:52:54 +0000577
578 if (openNodeCats.get(1).equals("query")) {
579 requestMap.put("query", token);
580 } else {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000581 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingelf143ac92013-12-04 18:52:54 +0000582 topSequenceOperands.add(token);
583 }
584 }
Joachim Bingelf143ac92013-12-04 18:52:54 +0000585 }
586
Joachim Bingel84e33df2014-01-31 14:02:46 +0000587 if (nodeCat.equals("alignment")) {
Joachim Bingela67e6a32014-01-02 18:35:24 +0000588 alignNext = true;
Joachim Bingel16da4e12013-12-17 09:48:12 +0000589 LinkedHashMap<String,Object> alignGroup = new LinkedHashMap<String,Object>();
Joachim Bingela67e6a32014-01-02 18:35:24 +0000590 // push but don't increase the stackedObjects counter in order to keep this
591 // group open until the mother cq_segments node will be closed, since the
592 // operands are siblings of this align node rather than children, i.e. the group
593 // would be removed from the stack before seeing its operands.
Joachim Bingel16da4e12013-12-17 09:48:12 +0000594 objectStack.push(alignGroup);
Joachim Bingel84e33df2014-01-31 14:02:46 +0000595 stackedObjects++;
Joachim Bingel16da4e12013-12-17 09:48:12 +0000596 // Step I: get info
Joachim Bingel16da4e12013-12-17 09:48:12 +0000597 // fill group
Joachim Bingel2daf9862014-02-12 10:18:54 +0000598 alignGroup.put("@type", "korap:group");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000599 alignGroup.put("alignment", "left");
600 alignGroup.put("operands", new ArrayList<Object>());
Joachim Bingel16da4e12013-12-17 09:48:12 +0000601 // Step II: decide where to put the group
602 // add group to sequence only if it is not an only child (in that case, sq_segments has already added the info and is just waiting for the relevant info)
603 if (node.getParent().getChildCount()>1) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000604 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingel84e33df2014-01-31 14:02:46 +0000605 topSequenceOperands.add(alignGroup);
Joachim Bingel16da4e12013-12-17 09:48:12 +0000606 } else if (openNodeCats.get(2).equals("query")) {
607 requestMap.put("query", alignGroup);
608 } else {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000609 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingel16da4e12013-12-17 09:48:12 +0000610 topSequenceOperands.add(alignGroup);
611 }
Joachim Bingela67e6a32014-01-02 18:35:24 +0000612 visited.add(node.getChild(0));
Joachim Bingel16da4e12013-12-17 09:48:12 +0000613 }
614
Joachim Bingel4b405f52013-11-15 15:29:30 +0000615 if (nodeCat.equals("element")) {
616 // Step I: determine whether to create new token or get token from the stack (if added by cq_segments)
617 LinkedHashMap<String, Object> elem;
618 if (tokenStack.isEmpty()) {
619 elem = new LinkedHashMap<String, Object>();
620 } else {
621 // in case sq_segments has already added the token
622 elem = tokenStack.getFirst();
623 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000624 curToken = elem;
Joachim Bingelf921b212013-11-20 16:54:38 +0000625 objectStack.push(elem);
Joachim Bingel1417e192013-12-04 16:33:07 +0000626 stackedObjects++;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000627 // Step II: fill object (token values) and put into containing sequence
Joachim Bingel2daf9862014-02-12 10:18:54 +0000628 elem.put("@type", "korap:span");
Joachim Bingel4b405f52013-11-15 15:29:30 +0000629 String value = node.getChild(1).toStringTree(poliqarpParser);
Joachim Bingel11d5b152014-02-11 21:33:47 +0000630 elem.put("key", value);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000631 // add token to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the values from "field")
Joachim Bingelf921b212013-11-20 16:54:38 +0000632 if (node.getParent().getChildCount()>1) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000633 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingel4b405f52013-11-15 15:29:30 +0000634 topSequenceOperands.add(elem);
Joachim Bingelf921b212013-11-20 16:54:38 +0000635 }
Joachim Bingel2f7e0d82013-12-08 23:10:41 +0000636 visited.add(node.getChild(0));
637 visited.add(node.getChild(1));
638 visited.add(node.getChild(2));
Joachim Bingel4b405f52013-11-15 15:29:30 +0000639 }
640
641 if (nodeCat.equals("spanclass")) {
642 LinkedHashMap<String,Object> span = new LinkedHashMap<String,Object>();
Joachim Bingel2daf9862014-02-12 10:18:54 +0000643 span.put("@type", "korap:group");
644 span.put("operation", "operation:"+"class");
Joachim Bingelf921b212013-11-20 16:54:38 +0000645 objectStack.push(span);
Joachim Bingel1417e192013-12-04 16:33:07 +0000646 stackedObjects++;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000647 ArrayList<Object> spanOperands = new ArrayList<Object>();
Joachim Bingel4b405f52013-11-15 15:29:30 +0000648 // Step I: get info
Joachim Bingel8c640e42014-02-07 16:20:47 +0000649 int classId = 0;
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000650 if (QueryUtils.getNodeCat(node.getChild(1)).equals("spanclass_id")) {
Joachim Bingel8c640e42014-02-07 16:20:47 +0000651 String ref = node.getChild(1).getChild(0).toStringTree(poliqarpParser);
652 try {
653 classId = Integer.parseInt(ref);
654 } catch (NumberFormatException e) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000655 throw new QueryException("The specified class reference in the shrink/split-Operator is not a number: "+ref);
Joachim Bingel8c640e42014-02-07 16:20:47 +0000656 }
Joachim Bingel593964f2013-11-29 16:45:47 +0000657 // only allow class id up to 255
Joachim Bingel8c640e42014-02-07 16:20:47 +0000658 if (classId>255) {
659 classId = 0;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000660 }
661 }
Joachim Bingel8c640e42014-02-07 16:20:47 +0000662 span.put("class", classId);
Joachim Bingel11d5b152014-02-11 21:33:47 +0000663 span.put("operands", spanOperands);
Joachim Bingelf921b212013-11-20 16:54:38 +0000664 // Step II: decide where to put the span
Joachim Bingel593964f2013-11-29 16:45:47 +0000665 // add span to sequence only if it is not an only child (in that case, cq_segments has already added the info and is just waiting for the relevant info)
Joachim Bingel666c2652013-12-08 15:07:29 +0000666 if (openNodeCats.get(2).equals("query") && node.getParent().getChildCount() == 1) {
Joachim Bingel4b405f52013-11-15 15:29:30 +0000667 requestMap.put("query", span);
Joachim Bingela14e13a2013-12-04 15:59:07 +0000668 } else if (objectStack.size()>1) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000669 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingela14e13a2013-12-04 15:59:07 +0000670 topSequenceOperands.add(span);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000671 }
Joachim Bingel2f7e0d82013-12-08 23:10:41 +0000672 // ignore leading and trailing braces
673 visited.add(node.getChild(0));
674 visited.add(node.getChild(node.getChildCount()-1));
Joachim Bingel8c640e42014-02-07 16:20:47 +0000675 if (QueryUtils.getNodeCat(node.getChild(1)).equals("spanclass_id")) {
Joachim Bingel2f7e0d82013-12-08 23:10:41 +0000676 visited.add(node.getChild(1));
677 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000678 }
679
680 if (nodeCat.equals("position")) {
Joachim Bingelf921b212013-11-20 16:54:38 +0000681 LinkedHashMap<String,Object> positionGroup = new LinkedHashMap<String,Object>();
Joachim Bingelf921b212013-11-20 16:54:38 +0000682 objectStack.push(positionGroup);
Joachim Bingel1417e192013-12-04 16:33:07 +0000683 stackedObjects++;
Joachim Bingelf921b212013-11-20 16:54:38 +0000684 ArrayList<Object> posOperands = new ArrayList<Object>();
685 // Step I: get info
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000686 String relation = QueryUtils.getNodeCat(node.getChild(0));
Joachim Bingel2daf9862014-02-12 10:18:54 +0000687 positionGroup.put("@type", "korap:group");
688 positionGroup.put("operation", "operation:"+"position");
689 positionGroup.put("frame", "frame:"+relation.toLowerCase());
Joachim Bingelffd65e32014-01-22 14:22:57 +0000690// positionGroup.put("@subtype", "incl");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000691 positionGroup.put("operands", posOperands);
Joachim Bingelf921b212013-11-20 16:54:38 +0000692 // Step II: decide where to put the group
693 // add group to sequence only if it is not an only child (in that case, sq_segments has already added the info and is just waiting for the relevant info)
694 if (node.getParent().getChildCount()>1) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000695 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingela14e13a2013-12-04 15:59:07 +0000696 topSequenceOperands.add(positionGroup);
Joachim Bingelfc984f62013-12-09 18:57:16 +0000697 } else if (openNodeCats.get(2).equals("query")) {
698 requestMap.put("query", positionGroup);
Joachim Bingelcd9ed332013-12-09 21:01:35 +0000699 } else {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000700 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingelcd9ed332013-12-09 21:01:35 +0000701 topSequenceOperands.add(positionGroup);
702 }
Joachim Bingel4b405f52013-11-15 15:29:30 +0000703 }
704
Joachim Bingel593964f2013-11-29 16:45:47 +0000705 if (nodeCat.equals("shrink")) {
706 LinkedHashMap<String,Object> shrinkGroup = new LinkedHashMap<String,Object>();
707 objectStack.push(shrinkGroup);
Joachim Bingel1417e192013-12-04 16:33:07 +0000708 stackedObjects++;
Joachim Bingel593964f2013-11-29 16:45:47 +0000709 ArrayList<Object> shrinkOperands = new ArrayList<Object>();
710 // Step I: get info
Joachim Bingel8c640e42014-02-07 16:20:47 +0000711 ArrayList<Integer> classRefs = new ArrayList<Integer>();
712 String classRefOp = null;
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000713 if (QueryUtils.getNodeCat(node.getChild(2)).equals("spanclass_id")) {
Joachim Bingel8c640e42014-02-07 16:20:47 +0000714 ParseTree spanNode = node.getChild(2);
715 for (int i=0; i<spanNode.getChildCount()-1; i++) {
716 String ref = spanNode.getChild(i).getText();
717 System.err.println(" "+ref);
718 if (ref.equals("|") || ref.equals("&")) {
719 classRefOp = ref.equals("|") ? "intersection" : "union";
720 } else {
721 try {
722 int classRef = Integer.parseInt(ref);
723 // only allow class id up to 255
724 if (classRef>255) {
725 classRef = 0;
726 }
727 classRefs.add(classRef);
728 } catch (NumberFormatException e) {
729 throw new QueryException("The specified class reference in the shrink/split-Operator is not a number.");
730 }
731 }
Joachim Bingel593964f2013-11-29 16:45:47 +0000732 }
Joachim Bingel8c640e42014-02-07 16:20:47 +0000733 } else {
734 classRefs.add(0);
Joachim Bingel593964f2013-11-29 16:45:47 +0000735 }
Joachim Bingel2daf9862014-02-12 10:18:54 +0000736 shrinkGroup.put("@type", "korap:group");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000737 String type = node.getChild(0).toStringTree(poliqarpParser);
738 String operation = type.equals("shrink") ? "submatch" : "split";
Joachim Bingel2daf9862014-02-12 10:18:54 +0000739 shrinkGroup.put("operation", "operation:"+operation);
Joachim Bingel8c640e42014-02-07 16:20:47 +0000740 shrinkGroup.put("classRef", classRefs);
741 if (classRefOp != null) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000742 shrinkGroup.put("classRefOp", "classRefOp:"+classRefOp);
Joachim Bingel8c640e42014-02-07 16:20:47 +0000743 }
Joachim Bingel11d5b152014-02-11 21:33:47 +0000744 shrinkGroup.put("operands", shrinkOperands);
Joachim Bingela14e13a2013-12-04 15:59:07 +0000745 int i=1;
Joachim Bingel593964f2013-11-29 16:45:47 +0000746 // Step II: decide where to put the group
747 // add group to sequence only if it is not an only child (in that case, sq_segments has already added the info and is just waiting for the relevant info)
748 if (node.getParent().getChildCount()>1) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000749 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(i).get("operands"); // this shrinkGroup is on top
Joachim Bingel593964f2013-11-29 16:45:47 +0000750 topSequenceOperands.add(shrinkGroup);
751 } else if (openNodeCats.get(2).equals("query")) {
752 requestMap.put("query", shrinkGroup);
Joachim Bingela14e13a2013-12-04 15:59:07 +0000753 } else if (objectStack.size()>1) {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000754 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(i).get("operands");
Joachim Bingela14e13a2013-12-04 15:59:07 +0000755 topSequenceOperands.add(shrinkGroup);
Joachim Bingel593964f2013-11-29 16:45:47 +0000756 }
Joachim Bingel593964f2013-11-29 16:45:47 +0000757 visited.add(node.getChild(0));
Joachim Bingel4b405f52013-11-15 15:29:30 +0000758 }
759
760 // repetition of token group
761 if (nodeCat.equals("occ")) {
762 ParseTree occChild = node.getChild(0);
763 String repetition = occChild.toStringTree(poliqarpParser);
Joachim Bingel2daf9862014-02-12 10:18:54 +0000764 int[] minmax = parseRepetition(repetition);
765 curOccGroup.put("operation", "operation:"+"repetition");
766 curOccGroup.put("min", minmax[0]);
767 curOccGroup.put("max", minmax[1]);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000768 visited.add(occChild);
769 }
770
771 // flags for case sensitivity and whole-word-matching
772 if (nodeCat.equals("flag")) {
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000773 String flag = QueryUtils.getNodeCat(node.getChild(0)).substring(1); //substring removes leading slash '/'
Joachim Bingel4b405f52013-11-15 15:29:30 +0000774 // add to current token's value
Joachim Bingelee3b21d2014-02-12 12:34:59 +0000775 if (flag.contains("i")) ((HashMap<String, Object>) curToken.get("wrap")).put("caseInsensitive", true);
776 else if (flag.contains("I")) ((HashMap<String, Object>) curToken.get("wrap")).put("caseInsensitive", false);
777 else ((HashMap<String, Object>) curToken.get("wrap")).put("flag", flag);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000778 }
779
780 if (nodeCat.equals("meta")) {
781 inMeta=true;
782 LinkedHashMap<String,Object> metaFilter = new LinkedHashMap<String,Object>();
783 requestMap.put("meta", metaFilter);
Joachim Bingel2daf9862014-02-12 10:18:54 +0000784 metaFilter.put("@type", "korap:meta");
Joachim Bingel4b405f52013-11-15 15:29:30 +0000785 }
786
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000787 if (nodeCat.equals("within") && !QueryUtils.getNodeCat(node.getParent()).equals("position")) {
Joachim Bingel4b405f52013-11-15 15:29:30 +0000788 ParseTree domainNode = node.getChild(2);
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000789 String domain = QueryUtils.getNodeCat(domainNode);
Joachim Bingelf921b212013-11-20 16:54:38 +0000790 LinkedHashMap<String,Object> curObject = (LinkedHashMap<String, Object>) objectStack.getFirst();
Joachim Bingel4b405f52013-11-15 15:29:30 +0000791 curObject.put("within", domain);
792 visited.add(node.getChild(0));
793 visited.add(node.getChild(1));
794 visited.add(domainNode);
795 }
796
Joachim Bingel1417e192013-12-04 16:33:07 +0000797 objectsToPop.push(stackedObjects);
798 tokensToPop.push(stackedTokens);
799 fieldsToPop.push(stackedFields);
800
Joachim Bingel4b405f52013-11-15 15:29:30 +0000801 /*
802 ****************************************************************
803 ****************************************************************
804 * recursion until 'request' node (root of tree) is processed *
Joachim Bingel7fd4b1b2013-12-04 09:04:40 +0000805 ****************************************************************
Joachim Bingel4b405f52013-11-15 15:29:30 +0000806 ****************************************************************
807 */
808 for (int i=0; i<node.getChildCount(); i++) {
809 ParseTree child = node.getChild(i);
Joachim Bingelf84bd622013-12-13 10:34:04 +0000810 curChildIndex = i;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000811 processNode(child);
812 }
813
Joachim Bingelf143ac92013-12-04 18:52:54 +0000814 // set negField back
815 if (nodeCat.equals("neg_field") || nodeCat.equals("neg_field_group")) {
816 negField = !negField;
817 }
818
Joachim Bingela67e6a32014-01-02 18:35:24 +0000819 // pop the align group that was introduced by previous 'align' but never closed
Joachim Bingel84e33df2014-01-31 14:02:46 +0000820// if (isAligned) {
821// isAligned=false;
822// objectStack.pop();
823// }
Joachim Bingela67e6a32014-01-02 18:35:24 +0000824
Joachim Bingel1417e192013-12-04 16:33:07 +0000825 // Stuff that happens when leaving a node (taking items off the stacks)
826 for (int i=0; i<objectsToPop.get(0); i++) {
Joachim Bingelf921b212013-11-20 16:54:38 +0000827 objectStack.pop();
Joachim Bingel4b405f52013-11-15 15:29:30 +0000828 }
Joachim Bingel1417e192013-12-04 16:33:07 +0000829 objectsToPop.pop();
830 for (int i=0; i<tokensToPop.get(0); i++) {
Joachim Bingel4b405f52013-11-15 15:29:30 +0000831 tokenStack.pop();
832 }
Joachim Bingel1417e192013-12-04 16:33:07 +0000833 tokensToPop.pop();
834 for (int i=0; i<fieldsToPop.get(0); i++) {
Joachim Bingel4b405f52013-11-15 15:29:30 +0000835 fieldStack.pop();
836 }
Joachim Bingel1417e192013-12-04 16:33:07 +0000837 fieldsToPop.pop();
Joachim Bingel4b405f52013-11-15 15:29:30 +0000838 openNodeCats.pop();
Joachim Bingel4b405f52013-11-15 15:29:30 +0000839 }
840
Joachim Bingel2daf9862014-02-12 10:18:54 +0000841 private int[] parseRepetition(String repetition) {
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000842 if (repetition.equals("*")) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000843 return new int[] {0, 100};
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000844 } else if (repetition.equals("+")) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000845 return new int[] {1, 100};
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000846 } else if (repetition.equals("?")) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000847 return new int[] {0, 1};
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000848 } else {
849 repetition = repetition.substring(1, repetition.length()-1); // remove braces
Joachim Bingel2daf9862014-02-12 10:18:54 +0000850 String[] splitted = repetition.split(",");
851 return new int[] {Integer.parseInt(splitted[0]), Integer.parseInt(splitted[1])};
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000852 }
853 }
854
Joachim Bingelffd65e32014-01-22 14:22:57 +0000855 private String[] parseEmptySegments(ParseTree emptySegments) {
856 String[] minmax = new String[2];
857 Integer min = 0;
858 Integer max = 0;
859 ParseTree child;
860 for (int i=0; i<emptySegments.getChildCount()-1; i++) {
861 child = emptySegments.getChild(i);
862 ParseTree nextSibling = emptySegments.getChild(i+1);
863 String nextSiblingString = nextSibling.toStringTree();
Joachim Bingelffd65e32014-01-22 14:22:57 +0000864 if (child.toStringTree().equals("[]")) {
865 if (nextSiblingString.equals("?")) {
866 max++;
867 } else if (nextSiblingString.startsWith("{")) {
868 String occ = nextSiblingString.substring(1,nextSiblingString.length()-1);
869 System.out.println(occ);
870 if (occ.contains(",")) {
871 String[] minmaxOcc = occ.split(",");
872 min += Integer.parseInt(minmaxOcc[0]);
873 max += Integer.parseInt(minmaxOcc[1]);
874 } else {
875 min += Integer.parseInt(occ);
876 max += Integer.parseInt(occ);
877 }
878 } else {
879 min++;
880 max++;
881 }
882 }
883 }
884 child = emptySegments.getChild(emptySegments.getChildCount()-1);
885 if (child.toStringTree().equals("[]")) {
886 min++;
887 max++;
888 }
889 minmax[0] = min.toString();
890 minmax[1] = max.toString();
891 return minmax;
892 }
893
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000894 @SuppressWarnings("unchecked")
895 private void createOccGroup(ParseTree node) {
896 LinkedHashMap<String,Object> occGroup = new LinkedHashMap<String,Object>();
Joachim Bingel2daf9862014-02-12 10:18:54 +0000897 occGroup.put("@type", "korap:group");
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000898 ArrayList<Object> groupOperands = new ArrayList<Object>();
Joachim Bingel11d5b152014-02-11 21:33:47 +0000899 occGroup.put("operands", groupOperands);
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000900 curOccGroup = occGroup;
901 objectStack.push(occGroup);
902 stackedObjects++;
903 // if only this group is on the object stack, add as top query element
904 if (objectStack.size()==1) {
905 requestMap.put("query", occGroup);
906 // embed in super sequence
907 } else {
Joachim Bingel11d5b152014-02-11 21:33:47 +0000908 ArrayList<Object> topSequenceOperands = (ArrayList<Object>) objectStack.get(1).get("operands");
Joachim Bingel94a1ccd2013-12-10 10:37:29 +0000909 topSequenceOperands.add(occGroup);
910 }
911 }
912
Joachim Bingel4b405f52013-11-15 15:29:30 +0000913
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000914
Joachim Bingelfc984f62013-12-09 18:57:16 +0000915
Joachim Bingel16da4e12013-12-17 09:48:12 +0000916 private static ParserRuleContext parsePoliqarpQuery (String p) throws QueryException {
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000917 QueryUtils.checkUnbalancedPars(p);
Joachim Bingelfc984f62013-12-09 18:57:16 +0000918
Joachim Bingel4b405f52013-11-15 15:29:30 +0000919 Lexer poliqarpLexer = new PoliqarpPlusLexer((CharStream)null);
920 ParserRuleContext tree = null;
921 // Like p. 111
922 try {
923
924 // Tokenize input data
925 ANTLRInputStream input = new ANTLRInputStream(p);
926 poliqarpLexer.setInputStream(input);
927 CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer);
928 poliqarpParser = new PoliqarpPlusParser(tokens);
929
930 // Don't throw out erroneous stuff
931 poliqarpParser.setErrorHandler(new BailErrorStrategy());
932 poliqarpParser.removeErrorListeners();
933
934 // Get starting rule from parser
935 Method startRule = PoliqarpPlusParser.class.getMethod("request");
936 tree = (ParserRuleContext) startRule.invoke(poliqarpParser, (Object[])null);
937 }
938
939 // Some things went wrong ...
940 catch (Exception e) {
941 System.err.println( e.getMessage() );
942 }
Joachim Bingelfc984f62013-12-09 18:57:16 +0000943
Joachim Bingel16da4e12013-12-17 09:48:12 +0000944 if (tree==null) throw new QueryException(
Joachim Bingel2f7e0d82013-12-08 23:10:41 +0000945 "The query you specified could not be processed. Please make sure it is well-formed.");
946
Joachim Bingel4b405f52013-11-15 15:29:30 +0000947 // Return the generated tree
948 return tree;
949 }
950
951 public static void main(String[] args) {
952 /*
953 * For testing
954 */
Joachim Bingel84e33df2014-01-31 14:02:46 +0000955
Joachim Bingel84e33df2014-01-31 14:02:46 +0000956
Joachim Bingel4b405f52013-11-15 15:29:30 +0000957 String[] queries = new String[] {
Joachim Bingel8c640e42014-02-07 16:20:47 +0000958 "shrink(1|2:{1:[base=der]}{2:[base=Mann]})",
959// "[base=foo] meta (author=name&year=2000)",
960// "[base=foo] meta year=2000",
Joachim Bingel11d5b152014-02-11 21:33:47 +0000961 "{[base=Mann]}",
Joachim Bingelee3b21d2014-02-12 12:34:59 +0000962 "shrink(1:[orth=Der]{1:[orth=Mann][orth=geht]})",
963 "[base=Mann/i]"
Joachim Bingelfc984f62013-12-09 18:57:16 +0000964 };
Joachim Bingel16da4e12013-12-17 09:48:12 +0000965 PoliqarpPlusTree.debug=true;
Joachim Bingel4b405f52013-11-15 15:29:30 +0000966 for (String q : queries) {
967 try {
968 System.out.println(q);
Joachim Bingel16da4e12013-12-17 09:48:12 +0000969 System.out.println(PoliqarpPlusTree.parsePoliqarpQuery(q).toStringTree(PoliqarpPlusTree.poliqarpParser));
Joachim Bingel4b405f52013-11-15 15:29:30 +0000970 @SuppressWarnings("unused")
971 PoliqarpPlusTree pt = new PoliqarpPlusTree(q);
Joachim Bingel2f7e0d82013-12-08 23:10:41 +0000972 System.out.println(q);
Joachim Bingel4b405f52013-11-15 15:29:30 +0000973 System.out.println();
974
Joachim Bingel2f7e0d82013-12-08 23:10:41 +0000975 } catch (Exception npe) {
Joachim Bingel4b405f52013-11-15 15:29:30 +0000976 npe.printStackTrace();
977 System.out.println("null\n");
978 }
979 }
980 }
981
982}