blob: 3aa9322c111b31da92cb7a11506ae8936f0a9af9 [file] [log] [blame]
Joachim Bingel5c93f902013-11-19 14:49:04 +00001package de.ids_mannheim.korap.query.serialize;
2
Michael Hanla60a2f22014-05-15 19:50:38 +00003import de.ids_mannheim.korap.query.cosmas2.c2psLexer;
4import de.ids_mannheim.korap.query.cosmas2.c2psParser;
5import de.ids_mannheim.korap.query.serialize.util.CosmasCondition;
6import de.ids_mannheim.korap.query.serialize.util.ResourceMapper;
7import de.ids_mannheim.korap.util.QueryException;
Joachim Bingel5c93f902013-11-19 14:49:04 +00008import org.antlr.runtime.ANTLRStringStream;
9import org.antlr.runtime.RecognitionException;
10import org.antlr.runtime.tree.Tree;
Joachim Bingelb5f7bf02014-01-07 16:36:54 +000011import org.antlr.v4.runtime.tree.ParseTree;
12import org.slf4j.Logger;
13import org.slf4j.LoggerFactory;
Joachim Bingel5c93f902013-11-19 14:49:04 +000014
Michael Hanla60a2f22014-05-15 19:50:38 +000015import java.util.*;
16import java.util.regex.Matcher;
17import java.util.regex.Pattern;
Joachim Bingel5c93f902013-11-19 14:49:04 +000018
19/**
20 * Map representation of CosmasII syntax tree as returned by ANTLR
Joachim Bingel5c93f902013-11-19 14:49:04 +000021 *
Michael Hanla60a2f22014-05-15 19:50:38 +000022 * @author joachim
Joachim Bingel5c93f902013-11-19 14:49:04 +000023 */
Joachim Bingelc8a28e42014-04-24 15:06:42 +000024public class CosmasTree extends Antlr3AbstractSyntaxTree {
Joachim Bingelb5f7bf02014-01-07 16:36:54 +000025
Michael Hanla60a2f22014-05-15 19:50:38 +000026 private static Logger log = LoggerFactory.getLogger(CosmasTree.class);
Joachim Bingeleecc7652014-01-11 17:21:07 +000027
Michael Hanla60a2f22014-05-15 19:50:38 +000028 /*
29 * Following collections have the following functions:
30 * - the request is a map with two keys (meta/query): {meta=[], query=[]}
31 * - the query is a list of token group maps: {meta=[], query=[tg1=[], tg2=[]]}
32 * - each token group is a list of tokens: {meta=[], query=[tg1=[t1_1, t1_2], tg2=[t2_1, t2_2, t2_3]]}
33 * - each token corresponds to a single 'fields' linked list {meta=[], query=[tg1=[t1_1=[], t1_2=[]], ... ]}
34 * - each fields list contains a logical operator and 'field maps' defining attributes and values
35 * {meta=[], query=[tg1=[t1_1=[[disj, {base=foo}, {base=bar}]], t1_2=[]], ... ]}
36 */
37 String query;
38 LinkedHashMap<String, Object> requestMap = new LinkedHashMap<String, Object>();
39 /**
40 * Keeps track of active object.
41 */
42 LinkedList<LinkedHashMap<String, Object>> objectStack = new LinkedList<LinkedHashMap<String, Object>>();
43 /**
44 * Makes it possible to store several distantTokenGroups
45 */
46 LinkedList<ArrayList<List<Object>>> distantTokensStack = new LinkedList<ArrayList<List<Object>>>();
47 /**
48 * Field for repetition query (Kleene + or * operations, or min/max queries: {2,4}
49 */
50 String repetition = "";
51 /**
52 * Keeps track of open node categories
53 */
54 LinkedList<String> openNodeCats = new LinkedList<String>();
55 /**
56 * Global control structure for fieldGroups, keeps track of open fieldGroups.
57 */
58 LinkedList<ArrayList<Object>> openFieldGroups = new LinkedList<ArrayList<Object>>();
59 /**
60 * Keeps track of how many objects there are to pop after every recursion of {@link #processNode(ParseTree)}
61 */
62 LinkedList<Integer> objectsToPop = new LinkedList<Integer>();
63 /**
64 * Flag that indicates whether token fields or meta fields are currently being processed
65 */
66 boolean inMeta = false;
67 /**
68 *
69 */
70 int classRefCounter = 1;
71 boolean negate = false;
Joachim Bingeleecc7652014-01-11 17:21:07 +000072
Michael Hanla60a2f22014-05-15 19:50:38 +000073 Tree cosmasTree;
74
75 LinkedHashMap<String, Object> treeMap = new LinkedHashMap<String, Object>();
76 /**
77 * Keeps track of all visited nodes in a tree
78 */
79 List<Tree> visited = new ArrayList<Tree>();
80
81 Integer stackedObjects = 0;
82
83 private static boolean debug = false;
84 /**
85 * A list of node categories that can be sequenced (i.e. which can be in a sequence with any number of other nodes in this list)
86 */
87 private final List<String> sequentiableCats = Arrays.asList(new String[]{"OPWF", "OPLEM", "OPMORPH", "OPBEG", "OPEND", "OPIN", "OPBED"});
88 /**
89 * Keeps track of sequenced nodes, i.e. nodes that implicitly govern a sequence, as in (C2PQ (OPWF der) (OPWF Mann)).
90 * This is necessary in order to know when to take the sequence off the object stack, as the sequence is introduced by the
91 * first child but cannot be closed after this first child in order not to lose its siblings
92 */
93 private LinkedList<Tree> sequencedNodes = new LinkedList<Tree>();
94
95 private boolean hasSequentiableSiblings;
96
97 /**
98 * Keeps track of operands lists that are to be serialised in an inverted
99 * order (e.g. the IN() operator) compared to their AST representation.
100 */
101 private LinkedList<ArrayList<Object>> invertedOperandsLists = new LinkedList<ArrayList<Object>>();
102
103 private LinkedList<ArrayList<ArrayList<Object>>> distributedOperandsLists = new LinkedList<ArrayList<ArrayList<Object>>>();
104
105 /**
106 * @param tree The syntax tree as returned by ANTLR
107 * @param parser The ANTLR parser instance that generated the parse tree
108 * @throws QueryException
109 */
110 public CosmasTree(String query) throws QueryException {
111 this.query = query;
112 process(query);
113 System.out.println("\n" + requestMap.get("query"));
114 log.info(">>> " + requestMap.get("query") + " <<<");
115 }
116
117 @Override
118 public Map<String, Object> getRequestMap() {
119 return this.requestMap;
120 }
121
122
123 @Override
124 public void process(String query) throws QueryException {
125 Tree tree = null;
126 try {
127 tree = parseCosmasQuery(query);
128 } catch (RecognitionException e) {
129 throw new QueryException("Your query could not be processed. Please make sure it is well-formed.");
130 } catch (NullPointerException e) {
131 throw new QueryException("Your query could not be processed. Please make sure it is well-formed.");
132 }
133 log.info("Processing CosmasII query");
134 System.out.println("Processing Cosmas");
135 requestMap.put("@context", "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld");
Joachim Bingelc8a28e42014-04-24 15:06:42 +0000136// prepareContext(requestMap);
Michael Hanla60a2f22014-05-15 19:50:38 +0000137 processNode(tree);
138 log.info(requestMap.toString());
139 }
140
141 @SuppressWarnings("unchecked")
142 private void processNode(Tree node) {
143
144 // Top-down processing
145 if (visited.contains(node)) return;
146 else visited.add(node);
147
148
149 String nodeCat = getNodeCat(node);
150 openNodeCats.push(nodeCat);
151
152 stackedObjects = 0;
153
154 if (debug) {
155 System.err.println(" " + objectStack);
156 System.out.println(openNodeCats);
157 }
158
Joachim Bingel5c93f902013-11-19 14:49:04 +0000159
160 /* ***************************************
Michael Hanla60a2f22014-05-15 19:50:38 +0000161 * Processing individual node categories *
Joachim Bingel5c93f902013-11-19 14:49:04 +0000162 *****************************************/
Joachim Bingeld5161a12014-01-08 11:15:49 +0000163
Michael Hanla60a2f22014-05-15 19:50:38 +0000164
165 // Check for potential implicit sequences as in (C2PQ (OPWF der) (OPWF Mann)). The sequence is introduced
166 // by the first child if it (and its siblings) is sequentiable.
167 if (sequentiableCats.contains(nodeCat)) {
168 // for each node, check if parent has more than one child (-> could be implicit sequence)
169 Tree parent = node.getParent();
170 if (parent.getChildCount() > 1) {
171 // if node is first child of parent...
172 if (node == parent.getChild(0)) {
173 hasSequentiableSiblings = false;
174 for (int i = 1; i < parent.getChildCount(); i++) {
175 if (sequentiableCats.contains(getNodeCat(parent.getChild(i)))) {
176 hasSequentiableSiblings = true;
177 continue;
178 }
179 }
180 if (hasSequentiableSiblings) {
181 // Step I: create sequence
182 LinkedHashMap<String, Object> sequence = new LinkedHashMap<String, Object>();
183 sequence.put("@type", "korap:group");
184 sequence.put("operation", "operation:sequence");
185 sequence.put("operands", new ArrayList<Object>());
186 // push sequence on object stack but don't increment stackedObjects counter since
187 // we've got to wait until the parent node is processed - therefore, add the parent
188 // to the sequencedNodes list and remove the sequence from the stack when the parent
189 // has been processed
190 objectStack.push(sequence);
191 sequencedNodes.push(parent);
192 // Step II: decide where to put sequence
193 putIntoSuperObject(sequence, 1);
194 }
195 }
196 }
197 }
198
199 // Nodes introducing tokens. Process all in the same manner, except for the fieldMap entry
200 if (nodeCat.equals("OPWF") || nodeCat.equals("OPLEM")) {
201
202 //Step I: get info
203 LinkedHashMap<String, Object> token = new LinkedHashMap<String, Object>();
204 token.put("@type", "korap:token");
205 objectStack.push(token);
206 stackedObjects++;
207 LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
208 token.put("wrap", fieldMap);
209
210 fieldMap.put("@type", "korap:term");
211 // make category-specific fieldMap entry
212 String attr = nodeCat.equals("OPWF") ? "orth" : "lemma";
213 String value = node.getChild(0).toStringTree().replaceAll("\"", "");
214 if (value.startsWith("$")) {
215 value = value.substring(1);
216 fieldMap.put("caseInsensitive", true);
217 }
218
219 attr = ResourceMapper.descriptor2policy(attr);
220 fieldMap.put("key", value);
221 fieldMap.put("layer", attr);
222
223 // negate field (see above)
224 if (negate) {
225 fieldMap.put("match", "match:ne");
226 } else {
227 fieldMap.put("match", "match:eq");
228 }
229 //Step II: decide where to put
230 if (!hasChild(node, "TPOS")) {
231 putIntoSuperObject(token, 1);
232 } else {
233
234 }
235
236 }
237
238 if (nodeCat.equals("OPMORPH")) {
239 //Step I: get info
240 LinkedHashMap<String, Object> token = new LinkedHashMap<String, Object>();
241 token.put("@type", "korap:token");
242 LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
243 token.put("wrap", fieldMap);
244
245 fieldMap.put("@type", "korap:term");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000246// fieldMap.put("key", "morph:"+node.getChild(0).toString().replace(" ", "_"));
Michael Hanla60a2f22014-05-15 19:50:38 +0000247 String[] morphValues = node.getChild(0).toString().split(" ");
248 String pos = morphValues[0];
249
250 fieldMap.put("key", pos);
251
252 fieldMap.put("layer", ResourceMapper.descriptor2policy("pos"));
253 // make category-specific fieldMap entry
254 // negate field (see above)
255 if (negate) {
256 fieldMap.put("match", "match:ne");
257 } else {
258 fieldMap.put("match", "match:eq");
259 }
Joachim Bingelc8a28e42014-04-24 15:06:42 +0000260// List<String> morphValues = parseMorph(node.getChild(0).toStringTree());
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000261// System.err.println(morphValues);
262// if (morphValues.size() == 1) {
263// LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
Joachim Bingel11d5b152014-02-11 21:33:47 +0000264// token.put("key", fieldMap);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000265//
Joachim Bingel2daf9862014-02-12 10:18:54 +0000266// fieldMap.put("@type", "korap:term");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000267// fieldMap.put("key", morphValues.get(0));
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000268// // make category-specific fieldMap entry
269// // negate field (see above)
270// if (negate) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000271// fieldMap.put("operation", "operation:"+ "!=");
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000272// } else {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000273// fieldMap.put("operation", "operation:"+ "=");
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000274// }
275// } else {
276// LinkedHashMap<String, Object> conjGroup = new LinkedHashMap<String, Object>();
Joachim Bingel11d5b152014-02-11 21:33:47 +0000277// token.put("key", conjGroup);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000278// ArrayList<Object> conjOperands = new ArrayList<Object>();
Joachim Bingel2daf9862014-02-12 10:18:54 +0000279// conjGroup.put("@type", "korap:group");
280// conjGroup.put("operation", "operation:"+ "and");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000281// conjGroup.put("operands", conjOperands);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000282// for (String value : morphValues) {
283// LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
Joachim Bingel11d5b152014-02-11 21:33:47 +0000284// token.put("key", fieldMap);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000285//
Joachim Bingel2daf9862014-02-12 10:18:54 +0000286// fieldMap.put("@type", "korap:term");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000287// fieldMap.put("key", value);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000288// // make category-specific fieldMap entry
289// // negate field (see above)
290// if (negate) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000291// fieldMap.put("operation", "operation:"+ "!=");
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000292// } else {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000293// fieldMap.put("operation", "operation:"+ "=");
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000294// }
295// }
296// }
Michael Hanla60a2f22014-05-15 19:50:38 +0000297
298
299 //Step II: decide where to put
300 putIntoSuperObject(token, 0);
301 }
302
303 if (nodeCat.equals("OPELEM")) {
304 // Step I: create element
305 LinkedHashMap<String, Object> elem = new LinkedHashMap<String, Object>();
306 elem.put("@type", "korap:span");
307 if (node.getChild(0).toStringTree().equals("EMPTY")) {
308
309 } else {
310 int elname = 0;
311 Tree elnameNode = getFirstChildWithCat(node, "ELNAME");
312 if (elnameNode != null) {
313 elem.put("key", elnameNode.getChild(0).toStringTree().toLowerCase());
314 elname = 1;
315 }
316 if (node.getChildCount() > elname) {
317 /*
Joachim Bingelb674cca2014-05-09 14:00:58 +0000318 * Attributes can carry several values, like #ELEM(W ANA != 'N V'),
319 * denoting a word whose POS is neither N nor V.
320 * When seeing this, create a sub-termGroup and put it into the top-level
321 * term group, but only if there are other attributes in that group. If
322 * not, put the several values as distinct attr-val-pairs into the
323 * top-level group (in order to avoid a top-level group that only
324 * contains a sub-group).
325 */
Michael Hanla60a2f22014-05-15 19:50:38 +0000326 LinkedHashMap<String, Object> termGroup = makeTermGroup("and");
327 ArrayList<Object> termGroupOperands = (ArrayList<Object>) termGroup.get("operands");
328 for (int i = elname; i < node.getChildCount(); i++) {
329 Tree attrNode = node.getChild(i);
330 if (attrNode.getChildCount() == 2) {
331 LinkedHashMap<String, Object> term = makeTerm();
332 termGroupOperands.add(term);
333 String layer = attrNode.getChild(0).toStringTree();
334 String[] splitted = layer.split("/");
335 if (splitted.length > 1) {
336 term.put("foundry", splitted[0]);
337 layer = splitted[1];
338 }
339 term.put("layer", translateMorph(layer));
340 term.put("key", attrNode.getChild(1).toStringTree());
341 String match = getNodeCat(attrNode).equals("EQ") ? "eq" : "ne";
342 term.put("match", "match:" + match);
343 } else {
344 LinkedHashMap<String, Object> subTermGroup = makeTermGroup("and");
345 ArrayList<Object> subTermGroupOperands = (ArrayList<Object>) subTermGroup.get("operands");
346 int j;
347 for (j = 1; j < attrNode.getChildCount(); j++) {
348 LinkedHashMap<String, Object> term = makeTerm();
349 String layer = attrNode.getChild(0).toStringTree();
350 String[] splitted = layer.split("/");
351 if (splitted.length > 1) {
352 term.put("foundry", splitted[0]);
353 layer = splitted[1];
354 }
355 term.put("layer", translateMorph(layer));
356 term.put("key", attrNode.getChild(j).toStringTree());
357 String match = getNodeCat(attrNode).equals("EQ") ? "eq" : "ne";
358 term.put("match", "match:" + match);
359 if (node.getChildCount() == elname + 1) {
360 termGroupOperands.add(term);
361 System.err.println("taga");
362 } else {
363 subTermGroupOperands.add(term);
364 System.err.println(layer);
365 }
366 }
367 if (node.getChildCount() > elname + 1) {
368 System.err.println(termGroupOperands);
369 termGroupOperands.add(subTermGroup);
370 System.err.println(termGroupOperands);
371 }
372 }
373 if (getNodeCat(attrNode).equals("NOTEQ")) negate = true;
374 }
375 elem.put("attr", termGroup);
376 }
377 }
Joachim Bingelcd7b7252014-02-13 08:49:14 +0000378
Michael Hanla60a2f22014-05-15 19:50:38 +0000379 //Step II: decide where to put
380 putIntoSuperObject(elem);
381 }
382
383 if (nodeCat.equals("OPLABEL")) {
384 // Step I: create element
385 LinkedHashMap<String, Object> elem = new LinkedHashMap<String, Object>();
386 elem.put("@type", "korap:span");
387 elem.put("key", node.getChild(0).toStringTree().replaceAll("<|>", ""));
388 //Step II: decide where to put
389 putIntoSuperObject(elem);
390 }
391
392 if (nodeCat.equals("OPAND") || nodeCat.equals("OPNOT")) {
393 // Step I: create group
394 LinkedHashMap<String, Object> distgroup = new LinkedHashMap<String, Object>();
395 distgroup.put("@type", "korap:group");
396 distgroup.put("operation", "operation:sequence");
397 ArrayList<Object> distances = new ArrayList<Object>();
398 LinkedHashMap<String, Object> zerodistance = new LinkedHashMap<String, Object>();
399 zerodistance.put("@type", "korap:distance");
400 zerodistance.put("key", "t");
401 zerodistance.put("min", 0);
402 zerodistance.put("max", 0);
403 if (nodeCat.equals("OPNOT")) zerodistance.put("exclude", true);
404 distances.add(zerodistance);
405 distgroup.put("distances", distances);
406 distgroup.put("operands", new ArrayList<Object>());
407 objectStack.push(distgroup);
408 stackedObjects++;
409 // Step II: decide where to put
410 putIntoSuperObject(distgroup, 1);
411 }
412
413 if (nodeCat.equals("OPOR")) {
414 // Step I: create group
415 LinkedHashMap<String, Object> disjunction = new LinkedHashMap<String, Object>();
416 disjunction.put("@type", "korap:group");
417 disjunction.put("operation", "operation:or");
418 disjunction.put("operands", new ArrayList<Object>());
419 objectStack.push(disjunction);
420 stackedObjects++;
421 // Step II: decide where to put
422 putIntoSuperObject(disjunction, 1);
423 }
424
425 if (nodeCat.equals("OPPROX")) {
426 //TODO direction "both": wrap in "or" group with operands once flipped, once not
427 // collect info
428 Tree prox_opts = node.getChild(0);
429 Tree typ = prox_opts.getChild(0);
430 Tree dist_list = prox_opts.getChild(1);
431 // Step I: create group
432 LinkedHashMap<String, Object> proxSequence = new LinkedHashMap<String, Object>();
433 proxSequence.put("@type", "korap:group");
434 proxSequence.put("operation", "operation:" + "sequence");
435 objectStack.push(proxSequence);
436 stackedObjects++;
437 ArrayList<Object> constraints = new ArrayList<Object>();
438 boolean exclusion = !typ.getChild(0).toStringTree().equals("PROX");
439
440 boolean inOrder = false;
441 proxSequence.put("inOrder", inOrder);
442 proxSequence.put("distances", constraints);
443
444 ArrayList<Object> operands = new ArrayList<Object>();
445 proxSequence.put("operands", operands);
446
447 // possibly several distance constraints
448 for (int i = 0; i < dist_list.getChildCount(); i++) {
449 String direction = dist_list.getChild(i).getChild(0).getChild(0).toStringTree().toLowerCase();
450 String min = dist_list.getChild(i).getChild(1).getChild(0).toStringTree();
451 String max = dist_list.getChild(i).getChild(1).getChild(1).toStringTree();
452 String meas = dist_list.getChild(i).getChild(2).getChild(0).toStringTree();
453 if (min.equals("VAL0")) {
454 min = "0";
455 }
456 LinkedHashMap<String, Object> distance = new LinkedHashMap<String, Object>();
457 distance.put("@type", "korap:distance");
458 distance.put("key", meas);
459 distance.put("min", Integer.parseInt(min));
460 distance.put("max", Integer.parseInt(max));
461 if (exclusion) {
462 distance.put("exclude", exclusion);
463 }
464 constraints.add(distance);
465 if (direction.equals("plus")) {
466 inOrder = true;
467 } else if (direction.equals("minus")) {
468 inOrder = true;
469 invertedOperandsLists.add(operands);
470 }
471 }
472 proxSequence.put("inOrder", inOrder);
473 // Step II: decide where to put
474 putIntoSuperObject(proxSequence, 1);
475 }
476
477 // inlcusion or overlap
478 if (nodeCat.equals("OPIN") || nodeCat.equals("OPOV")) {
479 // Step I: create group
480 LinkedHashMap<String, Object> submatchgroup = new LinkedHashMap<String, Object>();
481 submatchgroup.put("@type", "korap:group");
482 submatchgroup.put("operation", "operation:" + "submatch");
483 ArrayList<Integer> classRef = new ArrayList<Integer>();
484 classRef.add(classRefCounter);
485 submatchgroup.put("classRef", classRef);
486
487 ArrayList<Object> submatchoperands = new ArrayList<Object>();
488 LinkedHashMap<String, Object> posgroup = new LinkedHashMap<String, Object>();
489 submatchgroup.put("operands", submatchoperands);
490 submatchoperands.add(posgroup);
491 posgroup.put("@type", "korap:group");
492// String relation = nodeCat.equals("OPIN") ? "position" : "overlaps";
493 posgroup.put("operation", "operation:" + "position");
494 if (nodeCat.equals("OPIN")) {
495 parseOPINOptions(node, posgroup);
496 } else {
497 parseOPOVOptions(node, posgroup);
498 }
499 ArrayList<Object> posoperands = new ArrayList<Object>();
500 posgroup.put("operands", posoperands);
501 objectStack.push(posgroup);
502 // mark this an inverted list
503 invertedOperandsLists.push(posoperands);
504 stackedObjects++;
505 // Step II: decide where to put
506 putIntoSuperObject(submatchgroup, 1);
507 }
508
509
510 // Wrap the first argument of an #IN operator in a class group
511 if (nodeCat.equals("ARG1") && (openNodeCats.get(1).equals("OPIN") || openNodeCats.get(1).equals("OPOV") || openNodeCats.get(2).equals("OPNHIT"))) {
512 // Step I: create group
513 LinkedHashMap<String, Object> classGroup = new LinkedHashMap<String, Object>();
514 classGroup.put("@type", "korap:group");
515 classGroup.put("operation", "operation:" + "class");
516 classGroup.put("class", classRefCounter);
517 classRefCounter++;
518 classGroup.put("operands", new ArrayList<Object>());
519 objectStack.push(classGroup);
520 stackedObjects++;
521 // Step II: decide where to put
522 putIntoSuperObject(classGroup, 1);
523 }
524
525 // Wrap the 2nd argument of an #IN operator embedded in NHIT in a class group
526 if (nodeCat.equals("ARG2") && openNodeCats.get(2).equals("OPNHIT")) {
527 // Step I: create group
528 LinkedHashMap<String, Object> classGroup = new LinkedHashMap<String, Object>();
529 classGroup.put("@type", "korap:group");
530 classGroup.put("operation", "operation:" + "class");
531 classGroup.put("class", classRefCounter);
532 classRefCounter++;
533 classGroup.put("operands", new ArrayList<Object>());
534 objectStack.push(classGroup);
535 stackedObjects++;
536 // Step II: decide where to put
537 putIntoSuperObject(classGroup, 1);
538 }
539
540
541 if (nodeCat.equals("OPNHIT")) {
542 LinkedHashMap<String, Object> exclGroup = new LinkedHashMap<String, Object>();
543 exclGroup.put("@type", "korap:group");
544 exclGroup.put("operation", "operation:" + "submatch");
545 ArrayList<Integer> classRef = new ArrayList<Integer>();
546
547 classRef.add(classRefCounter);
548// classRefCounter++;
549 // yes, do this twice!
550 classRef.add(classRefCounter + 1);
551// classRefCounter++;
552 exclGroup.put("classRef", classRef);
553 exclGroup.put("classRefOp", "classRefOp:" + "intersection");
554 ArrayList<Object> operands = new ArrayList<Object>();
555 exclGroup.put("operands", operands);
556 objectStack.push(exclGroup);
557 stackedObjects++;
558 putIntoSuperObject(exclGroup, 1);
559 }
560
561 if (nodeCat.equals("OPEND") || nodeCat.equals("OPBEG")) {
562 // Step I: create group
563 LinkedHashMap<String, Object> beggroup = new LinkedHashMap<String, Object>();
564 beggroup.put("@type", "korap:group");
565 beggroup.put("operation", "operation:" + "submatch");
566 ArrayList<Integer> spanRef = new ArrayList<Integer>();
567 if (nodeCat.equals("OPBEG")) {
568 spanRef.add(0);
569 spanRef.add(1);
570 } else {
571 spanRef.add(-1);
572 spanRef.add(1);
573 }
574 beggroup.put("spanRef", spanRef);
575 beggroup.put("operands", new ArrayList<Object>());
576 objectStack.push(beggroup);
577 stackedObjects++;
578
579 // Step II: decide where to put
580 putIntoSuperObject(beggroup, 1);
581 }
582
583 if (nodeCat.equals("OPBED")) {
584 // Step I: create group
585 int optsChild = node.getChildCount() - 1;
586 Tree conditions = node.getChild(optsChild).getChild(0);
587
588 // create a containing group expressing the submatch constraint on the first argument
589 LinkedHashMap<String, Object> submatchgroup = new LinkedHashMap<String, Object>();
590 submatchgroup.put("@type", "korap:group");
591 submatchgroup.put("operation", "operation:" + "submatch");
592 ArrayList<Integer> spanRef = new ArrayList<Integer>();
593 spanRef.add(1);
594 submatchgroup.put("classRef", spanRef);
595 ArrayList<Object> submatchoperands = new ArrayList<Object>();
596 submatchgroup.put("operands", submatchoperands);
597 putIntoSuperObject(submatchgroup, 0);
598
599 // Distinguish two cases. Normal case: query has just one condition, like #BED(X, sa) ...
600 if (conditions.getChildCount() == 1) {
601 CosmasCondition c = new CosmasCondition(conditions.getChild(0));
602
603 // create the group expressing the position constraint
604 LinkedHashMap<String, Object> posgroup = new LinkedHashMap<String, Object>();
605 posgroup.put("@type", "korap:group");
606 posgroup.put("operation", "operation:" + "position");
607
608 posgroup.put("frame", "frame:" + c.position);
609 if (c.negated) posgroup.put("exclude", true);
610 ArrayList<Object> operands = new ArrayList<Object>();
611 posgroup.put("operands", operands);
612
613 // create span representing the element expressed in the condition
614 LinkedHashMap<String, Object> bedElem = new LinkedHashMap<String, Object>();
615 bedElem.put("@type", "korap:span");
616 bedElem.put("key", c.elem);
617
618 // create a class group containing the argument, in order to submatch the arg.
619 LinkedHashMap<String, Object> classGroup = new LinkedHashMap<String, Object>();
620 classGroup.put("@type", "korap:group");
621 classGroup.put("operation", "operation:class");
622 classGroup.put("class", classRefCounter);
623 classRefCounter++;
624 classGroup.put("operands", new ArrayList<Object>());
625 objectStack.push(classGroup);
626 stackedObjects++;
627 operands.add(bedElem);
628 operands.add(classGroup);
629 // Step II: decide where to put
630 submatchoperands.add(posgroup);
631
632 // ... or the query has several conditions specified, like #BED(XY, sa,-pa). In that case,
633 // create an 'and' group and embed the position groups in its operands
634 } else {
635 // node has several conditions (like 'sa, -pa')
636 // -> create zero-distance sequence group and embed all position groups there
637 LinkedHashMap<String, Object> conjunct = new LinkedHashMap<String, Object>();
638 conjunct.put("@type", "korap:group");
639 conjunct.put("operation", "operation:" + "sequence");
640 ArrayList<Object> distances = new ArrayList<Object>();
641 conjunct.put("distances", distances);
642 LinkedHashMap<String, Object> zerodistance = new LinkedHashMap<String, Object>();
643 zerodistance.put("@type", "korap:distance");
644 zerodistance.put("key", "w");
645 zerodistance.put("min", 0);
646 zerodistance.put("max", 0);
647 distances.add(zerodistance);
648 ArrayList<Object> operands = new ArrayList<Object>();
649 conjunct.put("operands", operands);
650 ArrayList<ArrayList<Object>> distributedOperands = new ArrayList<ArrayList<Object>>();
651
652 for (int i = 0; i < conditions.getChildCount(); i++) {
653 // for each condition, create a position group containing a class group. problem: how to get argument into every operands list?
654 // -> use distributedOperandsLists
655 LinkedHashMap<String, Object> posGroup = new LinkedHashMap<String, Object>();
656 operands.add(posGroup);
657
658 // make position group
659 CosmasCondition c = new CosmasCondition(conditions.getChild(i));
660 posGroup.put("@type", "korap:group");
661 posGroup.put("operation", "operation:" + "position");
662 posGroup.put("frame", "frame:" + c.position);
663 if (c.negated) posGroup.put("exclude", "true");
664 ArrayList<Object> posOperands = new ArrayList<Object>();
665
666 // make class group
667 LinkedHashMap<String, Object> classGroup = new LinkedHashMap<String, Object>();
668 classGroup.put("@type", "korap:group");
669 classGroup.put("operation", "operation:class");
670 classGroup.put("class", classRefCounter);
671 classRefCounter++;
672 ArrayList<Object> classOperands = new ArrayList<Object>();
673 classGroup.put("operands", classOperands);
674 distributedOperands.add(classOperands); // subtree to be put into every class group -> distribute
675
676 // put the span and the class group into the position group
677 posGroup.put("operands", posOperands);
678 LinkedHashMap<String, Object> span = new LinkedHashMap<String, Object>();
679 posOperands.add(span);
680 posOperands.add(classGroup);
681 span.put("@type", "korap:span");
682 span.put("key", c.elem);
683 }
684 submatchoperands.add(conjunct);
685 distributedOperandsLists.push(distributedOperands);
686 }
687
688 }
689 objectsToPop.push(stackedObjects);
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000690
Joachim Bingel87480d02014-01-17 14:07:46 +0000691 /*
692 ****************************************************************
693 ****************************************************************
694 * recursion until 'request' node (root of tree) is processed *
695 ****************************************************************
696 ****************************************************************
697 */
Michael Hanla60a2f22014-05-15 19:50:38 +0000698 for (int i = 0; i < node.getChildCount(); i++) {
699 Tree child = node.getChild(i);
700 processNode(child);
701 }
Joachim Bingel5c93f902013-11-19 14:49:04 +0000702
Joachim Bingel87480d02014-01-17 14:07:46 +0000703 /*
704 **************************************************************
705 * Stuff that happens after processing the children of a node *
706 **************************************************************
707 */
Joachim Bingel5c93f902013-11-19 14:49:04 +0000708
Michael Hanla60a2f22014-05-15 19:50:38 +0000709 // remove sequence from object stack if node is implicitly sequenced
710 if (sequencedNodes.size() > 0) {
711 if (node == sequencedNodes.getFirst()) {
712 objectStack.pop();
713 sequencedNodes.pop();
714 }
715 }
Joachim Bingel5c93f902013-11-19 14:49:04 +0000716
Michael Hanla60a2f22014-05-15 19:50:38 +0000717 for (int i = 0; i < objectsToPop.get(0); i++) {
718 objectStack.pop();
719 }
720 objectsToPop.pop();
Joachim Bingelffd65e32014-01-22 14:22:57 +0000721
Michael Hanla60a2f22014-05-15 19:50:38 +0000722 if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
723 negate = false;
724 }
725 openNodeCats.pop();
726 }
Joachim Bingel402c6e12014-05-08 17:09:06 +0000727
Michael Hanla60a2f22014-05-15 19:50:38 +0000728 private Object translateMorph(String layer) {
729 // todo: not very nicely solved! Does this require extension somehow? if not, why not use simple string comparison?!
730// LinkedHashMap<String, String> map = new LinkedHashMap<String, String>();
731// map.put("ANA", "pos");
732// if (map.containsKey(layer))
733// return map.get(layer);
734// else
735// return layer;
736 if (layer.equals("ANA"))
737 return ResourceMapper.descriptor2policy(layer);
738 else
739 return layer;
740
741 }
742
743 private void parseOPINOptions(Tree node, LinkedHashMap<String, Object> posgroup) {
744 Tree posnode = getFirstChildWithCat(node, "POS");
745 Tree rangenode = getFirstChildWithCat(node, "RANGE");
746 Tree exclnode = getFirstChildWithCat(node, "EXCL");
747 Tree groupnode = getFirstChildWithCat(node, "GROUP");
748 boolean negatePosition = false;
749
750 String position = "";
751 if (posnode != null) {
752 String value = posnode.getChild(0).toStringTree();
753 position = translateTextAreaArgument(value, "in");
754 if (value.equals("N")) {
755 negatePosition = !negatePosition;
756 }
757 } else {
758 position = "contains";
759 }
760 posgroup.put("frame", "frame:" + position);
761 position = openNodeCats.get(1).equals("OPIN") ? "contains" : "full";
762
763 if (rangenode != null) {
764 String range = rangenode.getChild(0).toStringTree();
765 posgroup.put("range", range.toLowerCase());
766 }
767
768 if (exclnode != null) {
769 if (exclnode.getChild(0).toStringTree().equals("YES")) {
770 negatePosition = !negatePosition;
771 }
772 }
773 System.err.println(negatePosition);
774 if (negatePosition) {
775 posgroup.put("exclude", "true");
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000776// negate = !negate;
Michael Hanla60a2f22014-05-15 19:50:38 +0000777 }
Joachim Bingel87480d02014-01-17 14:07:46 +0000778
Michael Hanla60a2f22014-05-15 19:50:38 +0000779 if (groupnode != null) {
780 String grouping = groupnode.getChild(0).toStringTree().equals("max") ? "true" : "false";
781 posgroup.put("grouping", grouping);
782 }
783 }
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000784
Michael Hanla60a2f22014-05-15 19:50:38 +0000785 private void parseOPOVOptions(Tree node, LinkedHashMap<String, Object> posgroup) {
786 Tree posnode = getFirstChildWithCat(node, "POS");
787 Tree exclnode = getFirstChildWithCat(node, "EXCL");
788 Tree groupnode = getFirstChildWithCat(node, "GROUP");
789
790 String position = "";
791 if (posnode != null) {
792 String value = posnode.getChild(0).toStringTree();
793 position = "-" + translateTextAreaArgument(value, "ov");
794 }
795 posgroup.put("frame", "frame:" + "overlaps" + position);
796
797 if (exclnode != null) {
798 if (exclnode.getChild(0).toStringTree().equals("YES")) {
799 posgroup.put("match", "match:" + "ne");
800 }
801 }
802 if (groupnode != null) {
803 String grouping = groupnode.getChild(0).toStringTree().equals("@max") ? "true" : "false";
804 posgroup.put("grouping", grouping);
805 }
806
807 }
808
809 /**
810 * Translates the text area specifications (position option arguments) to terms used in serialisation.
811 * For the allowed argument types and their values for OPIN and OPOV, see
812 * http://www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/ARGUMENT_I.html or
813 * http://www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/ARGUMENT_O.html, respectively.
814 *
815 * @param argument
816 * @param mode
817 * @return
818 */
819 private String translateTextAreaArgument(String argument, String mode) {
820 String position = "";
821 switch (argument) {
822 case "L":
823 position = mode.equals("in") ? "startswith" : "left";
824 break;
825 case "R":
826 position = mode.equals("in") ? "endswith" : "right";
827 break;
828 case "F":
829 position = "leftrightmatch";
830 break;
831 case "FE":
832 position = "matches";
833 break;
834 case "FI":
835 position = "leftrightmatch-noident";
836 break;
837 case "N": // for OPIN only - exclusion constraint formulated in parseOPINOptions
838 position = "leftrightmatch";
839 break;
840 case "X": // for OPOV only
841 position = "residual";
842 break;
843 }
844 return position;
845 }
846
847 @SuppressWarnings("unchecked")
848 private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) {
849 if (distributedOperandsLists.size() > 0) {
850 ArrayList<ArrayList<Object>> distributedOperands = distributedOperandsLists.pop();
851 for (ArrayList<Object> operands : distributedOperands) {
852 operands.add(object);
853 }
854 } else if (objectStack.size() > objStackPosition) {
855 ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands");
856 if (!invertedOperandsLists.contains(topObjectOperands)) {
857 topObjectOperands.add(object);
858 } else {
859 topObjectOperands.add(0, object);
860 }
861
862 } else {
863 requestMap.put("query", object);
864 }
865 }
866
867 private void putIntoSuperObject(LinkedHashMap<String, Object> object) {
868 putIntoSuperObject(object, 0);
869 }
870
871
872 private Tree parseCosmasQuery(String q) throws RecognitionException {
873 Pattern p = Pattern.compile("(\\w+):((\\+|-)?(sa|se|pa|pe|ta|te),?)+");
874 Matcher m = p.matcher(q);
875
876 String rewrittenQuery = q;
877 while (m.find()) {
878 String match = m.group();
879 String conditionsString = match.split(":")[1];
880 Pattern conditionPattern = Pattern.compile("(\\+|-)?(sa|se|pa|pe|ta|te)");
881 Matcher conditionMatcher = conditionPattern.matcher(conditionsString);
882 String replacement = "#BED(" + m.group(1) + " , ";
883 while (conditionMatcher.find()) {
884 replacement = replacement + conditionMatcher.group() + ",";
885 }
886 replacement = replacement.substring(0, replacement.length() - 1) + ")"; //remove trailing comma and close parenthesis
887 System.out.println(replacement);
888 rewrittenQuery = rewrittenQuery.replace(match, replacement);
889 }
890 q = rewrittenQuery;
891 Tree tree = null;
892 ANTLRStringStream ss = new ANTLRStringStream(q);
893 c2psLexer lex = new c2psLexer(ss);
894 org.antlr.runtime.CommonTokenStream tokens = new org.antlr.runtime.CommonTokenStream(lex); //v3
895 parser = new c2psParser(tokens);
896 c2psParser.c2ps_query_return c2Return = ((c2psParser) parser).c2ps_query(); // statt t().
897 // AST Tree anzeigen:
898 tree = (Tree) c2Return.getTree();
899
900 String treestring = tree.toStringTree();
901 if (treestring.contains("<mismatched token") || treestring.contains("<error") || treestring.contains("<unexpected")) {
902 log.error("Invalid tree. Could not parse Cosmas query. Make sure it is well-formed.");
903 throw new RecognitionException();
904 }
905
906 return tree;
907 }
908
909 /**
910 * @param args
911 */
912 public static void main(String[] args) {
Joachim Bingel5c93f902013-11-19 14:49:04 +0000913 /*
Joachim Bingel87480d02014-01-17 14:07:46 +0000914 * For debugging
Joachim Bingel5c93f902013-11-19 14:49:04 +0000915 */
Michael Hanla60a2f22014-05-15 19:50:38 +0000916 String[] queries = new String[]{
Joachim Bingel5c93f902013-11-19 14:49:04 +0000917 /* COSMAS 2 */
Joachim Bingel8c640e42014-02-07 16:20:47 +0000918// "MORPH(V)",
919// "MORPH(V PRES)",
920// "wegen #IN(%, L) <s>",
921// "wegen #IN(%) <s>",
922// "(Mann oder Frau) #IN <s>",
923// "#BEG(der /w3:5 Mann) /+w10 kommt",
924// "&würde /w0 MORPH(V)",
Joachim Bingel81812632014-02-18 08:55:22 +0000925// "#NHIT(gehen /w1:10 voran)",
926// "#BED(der Mann , sa,-pa)",
927// "Mann /t0 Frau",
Michael Hanla60a2f22014-05-15 19:50:38 +0000928 "sagt der:sa Bundeskanzler",
Joachim Bingel81812632014-02-18 08:55:22 +0000929// "Der:sa,-pe,+te ",
Michael Hanla60a2f22014-05-15 19:50:38 +0000930 "#ELEM(W POS!='N V' title=tada)",
931 "#ELEM(W ANA != 'N V')",
932 "#ELEM(W ANA != 'N V' Genre = Sport)",
933 "(&Baum #IN #ELEM(xip/c=np)) #IN(L) #ELEM(s)"
Joachim Bingel402c6e12014-05-08 17:09:06 +0000934// "(&Baum #IN #ELEM(NP)) #IN(L) #ELEM(S)"
Michael Hanla60a2f22014-05-15 19:50:38 +0000935 };
Joachim Bingel5dd91682014-02-14 13:10:29 +0000936// CosmasTree.debug=true;
Michael Hanla60a2f22014-05-15 19:50:38 +0000937 for (String q : queries) {
938 try {
939 System.out.println(q);
940 try {
941 CosmasTree act = new CosmasTree(q);
942 System.out.println(act.parseCosmasQuery(q).toStringTree());
943 } catch (QueryException e) {
944 e.printStackTrace();
945 } catch (RecognitionException e) {
946 e.printStackTrace();
947 }
948 System.out.println();
949
950 } catch (NullPointerException npe) {
951 npe.printStackTrace();
952 System.out.println("null\n");
953 }
954 }
955 }
Joachim Bingel5c93f902013-11-19 14:49:04 +0000956}