blob: 4b514bde8b29e1dc20eeaad80b2930765c35ca51 [file] [log] [blame]
Joachim Bingel5c93f902013-11-19 14:49:04 +00001package de.ids_mannheim.korap.query.serialize;
2
Michael Hanla60a2f22014-05-15 19:50:38 +00003import de.ids_mannheim.korap.query.cosmas2.c2psLexer;
4import de.ids_mannheim.korap.query.cosmas2.c2psParser;
5import de.ids_mannheim.korap.query.serialize.util.CosmasCondition;
6import de.ids_mannheim.korap.query.serialize.util.ResourceMapper;
7import de.ids_mannheim.korap.util.QueryException;
Joachim Bingel5c93f902013-11-19 14:49:04 +00008import org.antlr.runtime.ANTLRStringStream;
9import org.antlr.runtime.RecognitionException;
10import org.antlr.runtime.tree.Tree;
Joachim Bingelb5f7bf02014-01-07 16:36:54 +000011import org.antlr.v4.runtime.tree.ParseTree;
12import org.slf4j.Logger;
13import org.slf4j.LoggerFactory;
Joachim Bingel5c93f902013-11-19 14:49:04 +000014
Michael Hanla60a2f22014-05-15 19:50:38 +000015import java.util.*;
16import java.util.regex.Matcher;
17import java.util.regex.Pattern;
Joachim Bingel5c93f902013-11-19 14:49:04 +000018
19/**
20 * Map representation of CosmasII syntax tree as returned by ANTLR
Joachim Bingel5c93f902013-11-19 14:49:04 +000021 *
Michael Hanla60a2f22014-05-15 19:50:38 +000022 * @author joachim
Joachim Bingel5c93f902013-11-19 14:49:04 +000023 */
Joachim Bingelc8a28e42014-04-24 15:06:42 +000024public class CosmasTree extends Antlr3AbstractSyntaxTree {
Joachim Bingelb5f7bf02014-01-07 16:36:54 +000025
Michael Hanla60a2f22014-05-15 19:50:38 +000026 private static Logger log = LoggerFactory.getLogger(CosmasTree.class);
Joachim Bingeleecc7652014-01-11 17:21:07 +000027
Michael Hanla60a2f22014-05-15 19:50:38 +000028 String query;
29 LinkedHashMap<String, Object> requestMap = new LinkedHashMap<String, Object>();
30 /**
31 * Keeps track of active object.
32 */
33 LinkedList<LinkedHashMap<String, Object>> objectStack = new LinkedList<LinkedHashMap<String, Object>>();
34 /**
35 * Makes it possible to store several distantTokenGroups
36 */
37 LinkedList<ArrayList<List<Object>>> distantTokensStack = new LinkedList<ArrayList<List<Object>>>();
38 /**
39 * Field for repetition query (Kleene + or * operations, or min/max queries: {2,4}
40 */
41 String repetition = "";
42 /**
43 * Keeps track of open node categories
44 */
45 LinkedList<String> openNodeCats = new LinkedList<String>();
46 /**
47 * Global control structure for fieldGroups, keeps track of open fieldGroups.
48 */
49 LinkedList<ArrayList<Object>> openFieldGroups = new LinkedList<ArrayList<Object>>();
50 /**
51 * Keeps track of how many objects there are to pop after every recursion of {@link #processNode(ParseTree)}
52 */
53 LinkedList<Integer> objectsToPop = new LinkedList<Integer>();
54 /**
55 * Flag that indicates whether token fields or meta fields are currently being processed
56 */
57 boolean inMeta = false;
58 /**
59 *
60 */
61 int classRefCounter = 1;
62 boolean negate = false;
Joachim Bingeleecc7652014-01-11 17:21:07 +000063
Michael Hanla60a2f22014-05-15 19:50:38 +000064 Tree cosmasTree;
65
66 LinkedHashMap<String, Object> treeMap = new LinkedHashMap<String, Object>();
67 /**
68 * Keeps track of all visited nodes in a tree
69 */
70 List<Tree> visited = new ArrayList<Tree>();
71
72 Integer stackedObjects = 0;
73
74 private static boolean debug = false;
75 /**
76 * A list of node categories that can be sequenced (i.e. which can be in a sequence with any number of other nodes in this list)
77 */
78 private final List<String> sequentiableCats = Arrays.asList(new String[]{"OPWF", "OPLEM", "OPMORPH", "OPBEG", "OPEND", "OPIN", "OPBED"});
79 /**
80 * Keeps track of sequenced nodes, i.e. nodes that implicitly govern a sequence, as in (C2PQ (OPWF der) (OPWF Mann)).
81 * This is necessary in order to know when to take the sequence off the object stack, as the sequence is introduced by the
82 * first child but cannot be closed after this first child in order not to lose its siblings
83 */
84 private LinkedList<Tree> sequencedNodes = new LinkedList<Tree>();
85
86 private boolean hasSequentiableSiblings;
87
88 /**
89 * Keeps track of operands lists that are to be serialised in an inverted
90 * order (e.g. the IN() operator) compared to their AST representation.
91 */
92 private LinkedList<ArrayList<Object>> invertedOperandsLists = new LinkedList<ArrayList<Object>>();
93
94 private LinkedList<ArrayList<ArrayList<Object>>> distributedOperandsLists = new LinkedList<ArrayList<ArrayList<Object>>>();
95
96 /**
97 * @param tree The syntax tree as returned by ANTLR
98 * @param parser The ANTLR parser instance that generated the parse tree
99 * @throws QueryException
100 */
101 public CosmasTree(String query) throws QueryException {
102 this.query = query;
103 process(query);
104 System.out.println("\n" + requestMap.get("query"));
105 log.info(">>> " + requestMap.get("query") + " <<<");
106 }
107
108 @Override
109 public Map<String, Object> getRequestMap() {
110 return this.requestMap;
111 }
112
113
114 @Override
115 public void process(String query) throws QueryException {
116 Tree tree = null;
117 try {
118 tree = parseCosmasQuery(query);
119 } catch (RecognitionException e) {
120 throw new QueryException("Your query could not be processed. Please make sure it is well-formed.");
121 } catch (NullPointerException e) {
122 throw new QueryException("Your query could not be processed. Please make sure it is well-formed.");
123 }
124 log.info("Processing CosmasII query");
125 System.out.println("Processing Cosmas");
126 requestMap.put("@context", "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld");
Joachim Bingelc8a28e42014-04-24 15:06:42 +0000127// prepareContext(requestMap);
Michael Hanla60a2f22014-05-15 19:50:38 +0000128 processNode(tree);
129 log.info(requestMap.toString());
130 }
131
132 @SuppressWarnings("unchecked")
133 private void processNode(Tree node) {
134
135 // Top-down processing
136 if (visited.contains(node)) return;
137 else visited.add(node);
138
139
140 String nodeCat = getNodeCat(node);
141 openNodeCats.push(nodeCat);
142
143 stackedObjects = 0;
144
145 if (debug) {
146 System.err.println(" " + objectStack);
147 System.out.println(openNodeCats);
148 }
149
Joachim Bingel5c93f902013-11-19 14:49:04 +0000150
151 /* ***************************************
Michael Hanla60a2f22014-05-15 19:50:38 +0000152 * Processing individual node categories *
Joachim Bingel5c93f902013-11-19 14:49:04 +0000153 *****************************************/
Joachim Bingeld5161a12014-01-08 11:15:49 +0000154
Michael Hanla60a2f22014-05-15 19:50:38 +0000155
156 // Check for potential implicit sequences as in (C2PQ (OPWF der) (OPWF Mann)). The sequence is introduced
157 // by the first child if it (and its siblings) is sequentiable.
158 if (sequentiableCats.contains(nodeCat)) {
159 // for each node, check if parent has more than one child (-> could be implicit sequence)
160 Tree parent = node.getParent();
161 if (parent.getChildCount() > 1) {
162 // if node is first child of parent...
163 if (node == parent.getChild(0)) {
164 hasSequentiableSiblings = false;
165 for (int i = 1; i < parent.getChildCount(); i++) {
166 if (sequentiableCats.contains(getNodeCat(parent.getChild(i)))) {
167 hasSequentiableSiblings = true;
168 continue;
169 }
170 }
171 if (hasSequentiableSiblings) {
172 // Step I: create sequence
173 LinkedHashMap<String, Object> sequence = new LinkedHashMap<String, Object>();
174 sequence.put("@type", "korap:group");
175 sequence.put("operation", "operation:sequence");
176 sequence.put("operands", new ArrayList<Object>());
177 // push sequence on object stack but don't increment stackedObjects counter since
178 // we've got to wait until the parent node is processed - therefore, add the parent
179 // to the sequencedNodes list and remove the sequence from the stack when the parent
180 // has been processed
181 objectStack.push(sequence);
182 sequencedNodes.push(parent);
183 // Step II: decide where to put sequence
184 putIntoSuperObject(sequence, 1);
185 }
186 }
187 }
188 }
189
190 // Nodes introducing tokens. Process all in the same manner, except for the fieldMap entry
191 if (nodeCat.equals("OPWF") || nodeCat.equals("OPLEM")) {
192
193 //Step I: get info
194 LinkedHashMap<String, Object> token = new LinkedHashMap<String, Object>();
195 token.put("@type", "korap:token");
196 objectStack.push(token);
197 stackedObjects++;
198 LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
199 token.put("wrap", fieldMap);
200
201 fieldMap.put("@type", "korap:term");
202 // make category-specific fieldMap entry
203 String attr = nodeCat.equals("OPWF") ? "orth" : "lemma";
204 String value = node.getChild(0).toStringTree().replaceAll("\"", "");
205 if (value.startsWith("$")) {
206 value = value.substring(1);
207 fieldMap.put("caseInsensitive", true);
208 }
209
Michael Hanla60a2f22014-05-15 19:50:38 +0000210 fieldMap.put("key", value);
211 fieldMap.put("layer", attr);
212
213 // negate field (see above)
214 if (negate) {
215 fieldMap.put("match", "match:ne");
216 } else {
217 fieldMap.put("match", "match:eq");
218 }
219 //Step II: decide where to put
220 if (!hasChild(node, "TPOS")) {
221 putIntoSuperObject(token, 1);
222 } else {
223
224 }
225
226 }
227
228 if (nodeCat.equals("OPMORPH")) {
229 //Step I: get info
230 LinkedHashMap<String, Object> token = new LinkedHashMap<String, Object>();
231 token.put("@type", "korap:token");
232 LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
233 token.put("wrap", fieldMap);
234
235 fieldMap.put("@type", "korap:term");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000236// fieldMap.put("key", "morph:"+node.getChild(0).toString().replace(" ", "_"));
Michael Hanla60a2f22014-05-15 19:50:38 +0000237 String[] morphValues = node.getChild(0).toString().split(" ");
238 String pos = morphValues[0];
Michael Hanla60a2f22014-05-15 19:50:38 +0000239 fieldMap.put("key", pos);
Michael Hanlc22e79d2014-06-04 21:11:07 +0000240 fieldMap.put("layer", "pos");
Michael Hanla60a2f22014-05-15 19:50:38 +0000241 // make category-specific fieldMap entry
242 // negate field (see above)
243 if (negate) {
244 fieldMap.put("match", "match:ne");
245 } else {
246 fieldMap.put("match", "match:eq");
247 }
Joachim Bingelc8a28e42014-04-24 15:06:42 +0000248// List<String> morphValues = parseMorph(node.getChild(0).toStringTree());
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000249// System.err.println(morphValues);
250// if (morphValues.size() == 1) {
251// LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
Joachim Bingel11d5b152014-02-11 21:33:47 +0000252// token.put("key", fieldMap);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000253//
Joachim Bingel2daf9862014-02-12 10:18:54 +0000254// fieldMap.put("@type", "korap:term");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000255// fieldMap.put("key", morphValues.get(0));
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000256// // make category-specific fieldMap entry
257// // negate field (see above)
258// if (negate) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000259// fieldMap.put("operation", "operation:"+ "!=");
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000260// } else {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000261// fieldMap.put("operation", "operation:"+ "=");
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000262// }
263// } else {
264// LinkedHashMap<String, Object> conjGroup = new LinkedHashMap<String, Object>();
Joachim Bingel11d5b152014-02-11 21:33:47 +0000265// token.put("key", conjGroup);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000266// ArrayList<Object> conjOperands = new ArrayList<Object>();
Joachim Bingel2daf9862014-02-12 10:18:54 +0000267// conjGroup.put("@type", "korap:group");
268// conjGroup.put("operation", "operation:"+ "and");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000269// conjGroup.put("operands", conjOperands);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000270// for (String value : morphValues) {
271// LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>();
Joachim Bingel11d5b152014-02-11 21:33:47 +0000272// token.put("key", fieldMap);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000273//
Joachim Bingel2daf9862014-02-12 10:18:54 +0000274// fieldMap.put("@type", "korap:term");
Joachim Bingel11d5b152014-02-11 21:33:47 +0000275// fieldMap.put("key", value);
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000276// // make category-specific fieldMap entry
277// // negate field (see above)
278// if (negate) {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000279// fieldMap.put("operation", "operation:"+ "!=");
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000280// } else {
Joachim Bingel2daf9862014-02-12 10:18:54 +0000281// fieldMap.put("operation", "operation:"+ "=");
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000282// }
283// }
284// }
Michael Hanla60a2f22014-05-15 19:50:38 +0000285
286
287 //Step II: decide where to put
288 putIntoSuperObject(token, 0);
289 }
290
291 if (nodeCat.equals("OPELEM")) {
292 // Step I: create element
Joachim Bingel4edf2982014-06-25 14:23:02 +0000293 LinkedHashMap<String, Object> span = makeSpan();
Michael Hanla60a2f22014-05-15 19:50:38 +0000294 if (node.getChild(0).toStringTree().equals("EMPTY")) {
295
296 } else {
297 int elname = 0;
298 Tree elnameNode = getFirstChildWithCat(node, "ELNAME");
299 if (elnameNode != null) {
Joachim Bingel4edf2982014-06-25 14:23:02 +0000300 span.put("key", elnameNode.getChild(0).toStringTree().toLowerCase());
Michael Hanla60a2f22014-05-15 19:50:38 +0000301 elname = 1;
302 }
303 if (node.getChildCount() > elname) {
304 /*
Joachim Bingelb674cca2014-05-09 14:00:58 +0000305 * Attributes can carry several values, like #ELEM(W ANA != 'N V'),
306 * denoting a word whose POS is neither N nor V.
307 * When seeing this, create a sub-termGroup and put it into the top-level
308 * term group, but only if there are other attributes in that group. If
309 * not, put the several values as distinct attr-val-pairs into the
310 * top-level group (in order to avoid a top-level group that only
311 * contains a sub-group).
312 */
Michael Hanla60a2f22014-05-15 19:50:38 +0000313 LinkedHashMap<String, Object> termGroup = makeTermGroup("and");
314 ArrayList<Object> termGroupOperands = (ArrayList<Object>) termGroup.get("operands");
315 for (int i = elname; i < node.getChildCount(); i++) {
316 Tree attrNode = node.getChild(i);
317 if (attrNode.getChildCount() == 2) {
318 LinkedHashMap<String, Object> term = makeTerm();
319 termGroupOperands.add(term);
320 String layer = attrNode.getChild(0).toStringTree();
321 String[] splitted = layer.split("/");
322 if (splitted.length > 1) {
323 term.put("foundry", splitted[0]);
324 layer = splitted[1];
325 }
326 term.put("layer", translateMorph(layer));
327 term.put("key", attrNode.getChild(1).toStringTree());
328 String match = getNodeCat(attrNode).equals("EQ") ? "eq" : "ne";
329 term.put("match", "match:" + match);
330 } else {
331 LinkedHashMap<String, Object> subTermGroup = makeTermGroup("and");
332 ArrayList<Object> subTermGroupOperands = (ArrayList<Object>) subTermGroup.get("operands");
333 int j;
334 for (j = 1; j < attrNode.getChildCount(); j++) {
335 LinkedHashMap<String, Object> term = makeTerm();
336 String layer = attrNode.getChild(0).toStringTree();
337 String[] splitted = layer.split("/");
338 if (splitted.length > 1) {
339 term.put("foundry", splitted[0]);
340 layer = splitted[1];
341 }
342 term.put("layer", translateMorph(layer));
343 term.put("key", attrNode.getChild(j).toStringTree());
344 String match = getNodeCat(attrNode).equals("EQ") ? "eq" : "ne";
345 term.put("match", "match:" + match);
346 if (node.getChildCount() == elname + 1) {
347 termGroupOperands.add(term);
348 System.err.println("taga");
349 } else {
350 subTermGroupOperands.add(term);
351 System.err.println(layer);
352 }
353 }
354 if (node.getChildCount() > elname + 1) {
355 System.err.println(termGroupOperands);
356 termGroupOperands.add(subTermGroup);
357 System.err.println(termGroupOperands);
358 }
359 }
360 if (getNodeCat(attrNode).equals("NOTEQ")) negate = true;
361 }
Joachim Bingel4edf2982014-06-25 14:23:02 +0000362 span.put("attr", termGroup);
Michael Hanla60a2f22014-05-15 19:50:38 +0000363 }
364 }
Joachim Bingelcd7b7252014-02-13 08:49:14 +0000365
Michael Hanla60a2f22014-05-15 19:50:38 +0000366 //Step II: decide where to put
Joachim Bingel4edf2982014-06-25 14:23:02 +0000367 putIntoSuperObject(span);
Michael Hanla60a2f22014-05-15 19:50:38 +0000368 }
369
370 if (nodeCat.equals("OPLABEL")) {
371 // Step I: create element
372 LinkedHashMap<String, Object> elem = new LinkedHashMap<String, Object>();
373 elem.put("@type", "korap:span");
374 elem.put("key", node.getChild(0).toStringTree().replaceAll("<|>", ""));
375 //Step II: decide where to put
376 putIntoSuperObject(elem);
377 }
378
379 if (nodeCat.equals("OPAND") || nodeCat.equals("OPNOT")) {
380 // Step I: create group
381 LinkedHashMap<String, Object> distgroup = new LinkedHashMap<String, Object>();
382 distgroup.put("@type", "korap:group");
383 distgroup.put("operation", "operation:sequence");
384 ArrayList<Object> distances = new ArrayList<Object>();
385 LinkedHashMap<String, Object> zerodistance = new LinkedHashMap<String, Object>();
386 zerodistance.put("@type", "korap:distance");
387 zerodistance.put("key", "t");
388 zerodistance.put("min", 0);
389 zerodistance.put("max", 0);
390 if (nodeCat.equals("OPNOT")) zerodistance.put("exclude", true);
391 distances.add(zerodistance);
392 distgroup.put("distances", distances);
393 distgroup.put("operands", new ArrayList<Object>());
394 objectStack.push(distgroup);
395 stackedObjects++;
396 // Step II: decide where to put
397 putIntoSuperObject(distgroup, 1);
398 }
399
400 if (nodeCat.equals("OPOR")) {
401 // Step I: create group
402 LinkedHashMap<String, Object> disjunction = new LinkedHashMap<String, Object>();
403 disjunction.put("@type", "korap:group");
404 disjunction.put("operation", "operation:or");
405 disjunction.put("operands", new ArrayList<Object>());
406 objectStack.push(disjunction);
407 stackedObjects++;
408 // Step II: decide where to put
409 putIntoSuperObject(disjunction, 1);
410 }
411
412 if (nodeCat.equals("OPPROX")) {
Michael Hanla60a2f22014-05-15 19:50:38 +0000413 // collect info
414 Tree prox_opts = node.getChild(0);
415 Tree typ = prox_opts.getChild(0);
416 Tree dist_list = prox_opts.getChild(1);
417 // Step I: create group
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000418 LinkedHashMap<String, Object> proxSequence = makeGroup("sequence");
419
Michael Hanla60a2f22014-05-15 19:50:38 +0000420 ArrayList<Object> constraints = new ArrayList<Object>();
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000421 boolean exclusion = typ.getChild(0).toStringTree().equals("EXCL");
Michael Hanla60a2f22014-05-15 19:50:38 +0000422
423 boolean inOrder = false;
424 proxSequence.put("inOrder", inOrder);
425 proxSequence.put("distances", constraints);
426
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000427 ArrayList<Object> operands = (ArrayList<Object>) proxSequence.get("operands");
Michael Hanla60a2f22014-05-15 19:50:38 +0000428
429 // possibly several distance constraints
430 for (int i = 0; i < dist_list.getChildCount(); i++) {
431 String direction = dist_list.getChild(i).getChild(0).getChild(0).toStringTree().toLowerCase();
432 String min = dist_list.getChild(i).getChild(1).getChild(0).toStringTree();
433 String max = dist_list.getChild(i).getChild(1).getChild(1).toStringTree();
434 String meas = dist_list.getChild(i).getChild(2).getChild(0).toStringTree();
435 if (min.equals("VAL0")) {
436 min = "0";
437 }
438 LinkedHashMap<String, Object> distance = new LinkedHashMap<String, Object>();
439 distance.put("@type", "korap:distance");
440 distance.put("key", meas);
441 distance.put("min", Integer.parseInt(min));
442 distance.put("max", Integer.parseInt(max));
443 if (exclusion) {
444 distance.put("exclude", exclusion);
445 }
446 constraints.add(distance);
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000447 if (i==0) {
448 if (direction.equals("plus")) {
449 inOrder = true;
450 } else if (direction.equals("minus")) {
451 inOrder = true;
452 invertedOperandsLists.add(operands);
453 } else if (direction.equals("both")) {
454 inOrder = false;
455 }
Michael Hanla60a2f22014-05-15 19:50:38 +0000456 }
457 }
458 proxSequence.put("inOrder", inOrder);
459 // Step II: decide where to put
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000460 objectStack.push(proxSequence);
461 stackedObjects++;
Michael Hanla60a2f22014-05-15 19:50:38 +0000462 putIntoSuperObject(proxSequence, 1);
463 }
464
465 // inlcusion or overlap
466 if (nodeCat.equals("OPIN") || nodeCat.equals("OPOV")) {
467 // Step I: create group
Joachim Bingel0de1b412014-07-02 14:51:48 +0000468 LinkedHashMap<String, Object> submatchgroup = makeReference(classRefCounter);
Michael Hanla60a2f22014-05-15 19:50:38 +0000469
470 ArrayList<Object> submatchoperands = new ArrayList<Object>();
Joachim Bingel0de1b412014-07-02 14:51:48 +0000471 LinkedHashMap<String, Object> posgroup = makePosition(null);
Michael Hanla60a2f22014-05-15 19:50:38 +0000472 submatchgroup.put("operands", submatchoperands);
473 submatchoperands.add(posgroup);
Michael Hanla60a2f22014-05-15 19:50:38 +0000474 if (nodeCat.equals("OPIN")) {
475 parseOPINOptions(node, posgroup);
476 } else {
477 parseOPOVOptions(node, posgroup);
478 }
Michael Hanla60a2f22014-05-15 19:50:38 +0000479 objectStack.push(posgroup);
480 // mark this an inverted list
Joachim Bingel0de1b412014-07-02 14:51:48 +0000481 invertedOperandsLists.push((ArrayList<Object>) posgroup.get("operands"));
Michael Hanla60a2f22014-05-15 19:50:38 +0000482 stackedObjects++;
483 // Step II: decide where to put
484 putIntoSuperObject(submatchgroup, 1);
485 }
486
487
488 // Wrap the first argument of an #IN operator in a class group
489 if (nodeCat.equals("ARG1") && (openNodeCats.get(1).equals("OPIN") || openNodeCats.get(1).equals("OPOV") || openNodeCats.get(2).equals("OPNHIT"))) {
490 // Step I: create group
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000491 LinkedHashMap<String, Object> classGroup = makeSpanClass(classRefCounter);
Michael Hanla60a2f22014-05-15 19:50:38 +0000492 classRefCounter++;
Michael Hanla60a2f22014-05-15 19:50:38 +0000493 objectStack.push(classGroup);
494 stackedObjects++;
495 // Step II: decide where to put
496 putIntoSuperObject(classGroup, 1);
497 }
498
499 // Wrap the 2nd argument of an #IN operator embedded in NHIT in a class group
500 if (nodeCat.equals("ARG2") && openNodeCats.get(2).equals("OPNHIT")) {
501 // Step I: create group
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000502 LinkedHashMap<String, Object> classGroup = makeSpanClass(classRefCounter);
Michael Hanla60a2f22014-05-15 19:50:38 +0000503 classRefCounter++;
Michael Hanla60a2f22014-05-15 19:50:38 +0000504 objectStack.push(classGroup);
505 stackedObjects++;
506 // Step II: decide where to put
507 putIntoSuperObject(classGroup, 1);
508 }
509
510
511 if (nodeCat.equals("OPNHIT")) {
Michael Hanla60a2f22014-05-15 19:50:38 +0000512 ArrayList<Integer> classRef = new ArrayList<Integer>();
Michael Hanla60a2f22014-05-15 19:50:38 +0000513 classRef.add(classRefCounter);
Michael Hanla60a2f22014-05-15 19:50:38 +0000514 // yes, do this twice!
515 classRef.add(classRefCounter + 1);
Joachim Bingel0de1b412014-07-02 14:51:48 +0000516 LinkedHashMap<String, Object> exclGroup = makeReference(classRef);
Michael Hanla60a2f22014-05-15 19:50:38 +0000517 exclGroup.put("classRefOp", "classRefOp:" + "intersection");
518 ArrayList<Object> operands = new ArrayList<Object>();
519 exclGroup.put("operands", operands);
520 objectStack.push(exclGroup);
521 stackedObjects++;
522 putIntoSuperObject(exclGroup, 1);
523 }
524
525 if (nodeCat.equals("OPEND") || nodeCat.equals("OPBEG")) {
526 // Step I: create group
527 LinkedHashMap<String, Object> beggroup = new LinkedHashMap<String, Object>();
Joachim Bingel0de1b412014-07-02 14:51:48 +0000528 beggroup.put("@type", "korap:reference");
529 beggroup.put("operation", "operation:focus");
Michael Hanla60a2f22014-05-15 19:50:38 +0000530 ArrayList<Integer> spanRef = new ArrayList<Integer>();
531 if (nodeCat.equals("OPBEG")) {
532 spanRef.add(0);
533 spanRef.add(1);
534 } else {
535 spanRef.add(-1);
536 spanRef.add(1);
537 }
538 beggroup.put("spanRef", spanRef);
539 beggroup.put("operands", new ArrayList<Object>());
540 objectStack.push(beggroup);
541 stackedObjects++;
542
543 // Step II: decide where to put
544 putIntoSuperObject(beggroup, 1);
545 }
546
547 if (nodeCat.equals("OPBED")) {
548 // Step I: create group
549 int optsChild = node.getChildCount() - 1;
550 Tree conditions = node.getChild(optsChild).getChild(0);
551
552 // create a containing group expressing the submatch constraint on the first argument
Michael Hanla60a2f22014-05-15 19:50:38 +0000553 ArrayList<Integer> spanRef = new ArrayList<Integer>();
554 spanRef.add(1);
Joachim Bingel0de1b412014-07-02 14:51:48 +0000555 LinkedHashMap<String, Object> submatchgroup = makeReference(spanRef);
Michael Hanla60a2f22014-05-15 19:50:38 +0000556 ArrayList<Object> submatchoperands = new ArrayList<Object>();
557 submatchgroup.put("operands", submatchoperands);
Joachim Bingel0de1b412014-07-02 14:51:48 +0000558 putIntoSuperObject(submatchgroup);
Michael Hanla60a2f22014-05-15 19:50:38 +0000559
560 // Distinguish two cases. Normal case: query has just one condition, like #BED(X, sa) ...
561 if (conditions.getChildCount() == 1) {
562 CosmasCondition c = new CosmasCondition(conditions.getChild(0));
563
564 // create the group expressing the position constraint
Joachim Bingel0de1b412014-07-02 14:51:48 +0000565 LinkedHashMap<String, Object> posgroup = makePosition(c.position);
566 ArrayList<Object> operands = (ArrayList<Object>) posgroup.get("operands");
Michael Hanla60a2f22014-05-15 19:50:38 +0000567 if (c.negated) posgroup.put("exclude", true);
Michael Hanla60a2f22014-05-15 19:50:38 +0000568
569 // create span representing the element expressed in the condition
570 LinkedHashMap<String, Object> bedElem = new LinkedHashMap<String, Object>();
571 bedElem.put("@type", "korap:span");
572 bedElem.put("key", c.elem);
573
574 // create a class group containing the argument, in order to submatch the arg.
Joachim Bingel0de1b412014-07-02 14:51:48 +0000575 LinkedHashMap<String, Object> classGroup = makeSpanClass(classRefCounter);
Michael Hanla60a2f22014-05-15 19:50:38 +0000576 classRefCounter++;
577 classGroup.put("operands", new ArrayList<Object>());
578 objectStack.push(classGroup);
579 stackedObjects++;
580 operands.add(bedElem);
581 operands.add(classGroup);
582 // Step II: decide where to put
583 submatchoperands.add(posgroup);
584
585 // ... or the query has several conditions specified, like #BED(XY, sa,-pa). In that case,
586 // create an 'and' group and embed the position groups in its operands
587 } else {
588 // node has several conditions (like 'sa, -pa')
589 // -> create zero-distance sequence group and embed all position groups there
590 LinkedHashMap<String, Object> conjunct = new LinkedHashMap<String, Object>();
591 conjunct.put("@type", "korap:group");
592 conjunct.put("operation", "operation:" + "sequence");
593 ArrayList<Object> distances = new ArrayList<Object>();
594 conjunct.put("distances", distances);
595 LinkedHashMap<String, Object> zerodistance = new LinkedHashMap<String, Object>();
596 zerodistance.put("@type", "korap:distance");
597 zerodistance.put("key", "w");
598 zerodistance.put("min", 0);
599 zerodistance.put("max", 0);
600 distances.add(zerodistance);
601 ArrayList<Object> operands = new ArrayList<Object>();
602 conjunct.put("operands", operands);
603 ArrayList<ArrayList<Object>> distributedOperands = new ArrayList<ArrayList<Object>>();
604
605 for (int i = 0; i < conditions.getChildCount(); i++) {
606 // for each condition, create a position group containing a class group. problem: how to get argument into every operands list?
607 // -> use distributedOperandsLists
608 LinkedHashMap<String, Object> posGroup = new LinkedHashMap<String, Object>();
609 operands.add(posGroup);
610
611 // make position group
612 CosmasCondition c = new CosmasCondition(conditions.getChild(i));
613 posGroup.put("@type", "korap:group");
614 posGroup.put("operation", "operation:" + "position");
615 posGroup.put("frame", "frame:" + c.position);
616 if (c.negated) posGroup.put("exclude", "true");
617 ArrayList<Object> posOperands = new ArrayList<Object>();
618
619 // make class group
620 LinkedHashMap<String, Object> classGroup = new LinkedHashMap<String, Object>();
621 classGroup.put("@type", "korap:group");
622 classGroup.put("operation", "operation:class");
623 classGroup.put("class", classRefCounter);
624 classRefCounter++;
625 ArrayList<Object> classOperands = new ArrayList<Object>();
626 classGroup.put("operands", classOperands);
627 distributedOperands.add(classOperands); // subtree to be put into every class group -> distribute
628
629 // put the span and the class group into the position group
630 posGroup.put("operands", posOperands);
631 LinkedHashMap<String, Object> span = new LinkedHashMap<String, Object>();
632 posOperands.add(span);
633 posOperands.add(classGroup);
634 span.put("@type", "korap:span");
635 span.put("key", c.elem);
636 }
637 submatchoperands.add(conjunct);
638 distributedOperandsLists.push(distributedOperands);
639 }
640
641 }
642 objectsToPop.push(stackedObjects);
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000643
Joachim Bingel87480d02014-01-17 14:07:46 +0000644 /*
645 ****************************************************************
646 ****************************************************************
647 * recursion until 'request' node (root of tree) is processed *
648 ****************************************************************
649 ****************************************************************
650 */
Michael Hanla60a2f22014-05-15 19:50:38 +0000651 for (int i = 0; i < node.getChildCount(); i++) {
652 Tree child = node.getChild(i);
653 processNode(child);
654 }
Joachim Bingel5c93f902013-11-19 14:49:04 +0000655
Joachim Bingel87480d02014-01-17 14:07:46 +0000656 /*
657 **************************************************************
658 * Stuff that happens after processing the children of a node *
659 **************************************************************
660 */
Joachim Bingel5c93f902013-11-19 14:49:04 +0000661
Michael Hanla60a2f22014-05-15 19:50:38 +0000662 // remove sequence from object stack if node is implicitly sequenced
663 if (sequencedNodes.size() > 0) {
664 if (node == sequencedNodes.getFirst()) {
665 objectStack.pop();
666 sequencedNodes.pop();
667 }
668 }
Joachim Bingel5c93f902013-11-19 14:49:04 +0000669
Michael Hanla60a2f22014-05-15 19:50:38 +0000670 for (int i = 0; i < objectsToPop.get(0); i++) {
671 objectStack.pop();
672 }
673 objectsToPop.pop();
Joachim Bingelffd65e32014-01-22 14:22:57 +0000674
Michael Hanla60a2f22014-05-15 19:50:38 +0000675 if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) {
676 negate = false;
677 }
678 openNodeCats.pop();
679 }
Joachim Bingel402c6e12014-05-08 17:09:06 +0000680
Michael Hanla60a2f22014-05-15 19:50:38 +0000681 private Object translateMorph(String layer) {
682 // todo: not very nicely solved! Does this require extension somehow? if not, why not use simple string comparison?!
683// LinkedHashMap<String, String> map = new LinkedHashMap<String, String>();
684// map.put("ANA", "pos");
685// if (map.containsKey(layer))
686// return map.get(layer);
687// else
688// return layer;
689 if (layer.equals("ANA"))
Joachim Bingel4edf2982014-06-25 14:23:02 +0000690 return ResourceMapper.descriptor2policy("ANA");
Michael Hanla60a2f22014-05-15 19:50:38 +0000691 else
692 return layer;
693
694 }
695
696 private void parseOPINOptions(Tree node, LinkedHashMap<String, Object> posgroup) {
697 Tree posnode = getFirstChildWithCat(node, "POS");
698 Tree rangenode = getFirstChildWithCat(node, "RANGE");
699 Tree exclnode = getFirstChildWithCat(node, "EXCL");
700 Tree groupnode = getFirstChildWithCat(node, "GROUP");
701 boolean negatePosition = false;
702
703 String position = "";
704 if (posnode != null) {
705 String value = posnode.getChild(0).toStringTree();
706 position = translateTextAreaArgument(value, "in");
707 if (value.equals("N")) {
708 negatePosition = !negatePosition;
709 }
710 } else {
711 position = "contains";
712 }
713 posgroup.put("frame", "frame:" + position);
714 position = openNodeCats.get(1).equals("OPIN") ? "contains" : "full";
715
716 if (rangenode != null) {
717 String range = rangenode.getChild(0).toStringTree();
718 posgroup.put("range", range.toLowerCase());
719 }
720
721 if (exclnode != null) {
722 if (exclnode.getChild(0).toStringTree().equals("YES")) {
723 negatePosition = !negatePosition;
724 }
725 }
726 System.err.println(negatePosition);
727 if (negatePosition) {
728 posgroup.put("exclude", "true");
Joachim Bingelba9a0ab2014-01-29 10:12:25 +0000729// negate = !negate;
Michael Hanla60a2f22014-05-15 19:50:38 +0000730 }
Joachim Bingel87480d02014-01-17 14:07:46 +0000731
Michael Hanla60a2f22014-05-15 19:50:38 +0000732 if (groupnode != null) {
733 String grouping = groupnode.getChild(0).toStringTree().equals("max") ? "true" : "false";
734 posgroup.put("grouping", grouping);
735 }
736 }
Joachim Bingelb5f7bf02014-01-07 16:36:54 +0000737
Michael Hanla60a2f22014-05-15 19:50:38 +0000738 private void parseOPOVOptions(Tree node, LinkedHashMap<String, Object> posgroup) {
739 Tree posnode = getFirstChildWithCat(node, "POS");
740 Tree exclnode = getFirstChildWithCat(node, "EXCL");
741 Tree groupnode = getFirstChildWithCat(node, "GROUP");
742
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000743
744
745 String position = "overlaps";
Michael Hanla60a2f22014-05-15 19:50:38 +0000746 if (posnode != null) {
747 String value = posnode.getChild(0).toStringTree();
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000748 position = translateTextAreaArgument(value, "ov");
Michael Hanla60a2f22014-05-15 19:50:38 +0000749 }
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000750 posgroup.put("frame", "frame:" + position);
Michael Hanla60a2f22014-05-15 19:50:38 +0000751
752 if (exclnode != null) {
753 if (exclnode.getChild(0).toStringTree().equals("YES")) {
754 posgroup.put("match", "match:" + "ne");
755 }
756 }
757 if (groupnode != null) {
758 String grouping = groupnode.getChild(0).toStringTree().equals("@max") ? "true" : "false";
759 posgroup.put("grouping", grouping);
760 }
761
762 }
763
764 /**
765 * Translates the text area specifications (position option arguments) to terms used in serialisation.
766 * For the allowed argument types and their values for OPIN and OPOV, see
767 * http://www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/ARGUMENT_I.html or
768 * http://www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/ARGUMENT_O.html, respectively.
769 *
770 * @param argument
771 * @param mode
772 * @return
773 */
774 private String translateTextAreaArgument(String argument, String mode) {
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000775 String position = "overlaps";
776 // POSTYP : 'L'|'l'|'R'|'r'|'F'|'f'|'FE'|'fe'|'FI'|'fi'|'N'|'n'|'X'|'x' ;
777 argument = argument.toUpperCase();
Michael Hanla60a2f22014-05-15 19:50:38 +0000778 switch (argument) {
779 case "L":
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000780 position = mode.equals("in") ? "startswith" : "overlapsLeft";
Michael Hanla60a2f22014-05-15 19:50:38 +0000781 break;
782 case "R":
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000783 position = mode.equals("in") ? "endswith" : "overlapsRight";
Michael Hanla60a2f22014-05-15 19:50:38 +0000784 break;
785 case "F":
786 position = "leftrightmatch";
787 break;
788 case "FE":
789 position = "matches";
790 break;
791 case "FI":
792 position = "leftrightmatch-noident";
793 break;
794 case "N": // for OPIN only - exclusion constraint formulated in parseOPINOptions
795 position = "leftrightmatch";
796 break;
797 case "X": // for OPOV only
798 position = "residual";
799 break;
800 }
801 return position;
802 }
803
804 @SuppressWarnings("unchecked")
805 private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) {
806 if (distributedOperandsLists.size() > 0) {
807 ArrayList<ArrayList<Object>> distributedOperands = distributedOperandsLists.pop();
808 for (ArrayList<Object> operands : distributedOperands) {
809 operands.add(object);
810 }
811 } else if (objectStack.size() > objStackPosition) {
812 ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands");
813 if (!invertedOperandsLists.contains(topObjectOperands)) {
814 topObjectOperands.add(object);
815 } else {
816 topObjectOperands.add(0, object);
817 }
818
819 } else {
820 requestMap.put("query", object);
821 }
822 }
823
824 private void putIntoSuperObject(LinkedHashMap<String, Object> object) {
825 putIntoSuperObject(object, 0);
826 }
827
828
829 private Tree parseCosmasQuery(String q) throws RecognitionException {
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000830 q = rewritePositionQuery(q);
831
Michael Hanla60a2f22014-05-15 19:50:38 +0000832 Tree tree = null;
833 ANTLRStringStream ss = new ANTLRStringStream(q);
834 c2psLexer lex = new c2psLexer(ss);
835 org.antlr.runtime.CommonTokenStream tokens = new org.antlr.runtime.CommonTokenStream(lex); //v3
836 parser = new c2psParser(tokens);
837 c2psParser.c2ps_query_return c2Return = ((c2psParser) parser).c2ps_query(); // statt t().
838 // AST Tree anzeigen:
839 tree = (Tree) c2Return.getTree();
840
841 String treestring = tree.toStringTree();
842 if (treestring.contains("<mismatched token") || treestring.contains("<error") || treestring.contains("<unexpected")) {
843 log.error("Invalid tree. Could not parse Cosmas query. Make sure it is well-formed.");
844 throw new RecognitionException();
845 }
846
847 return tree;
848 }
849
850 /**
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000851 * Normalises position operators to equivalents using #BED
852 */
853 private String rewritePositionQuery(String q) {
854 Pattern p = Pattern.compile("(\\w+):((\\+|-)?(sa|se|pa|pe|ta|te),?)+");
855 Matcher m = p.matcher(q);
856
857 String rewrittenQuery = q;
858 while (m.find()) {
859 String match = m.group();
860 String conditionsString = match.split(":")[1];
861 Pattern conditionPattern = Pattern.compile("(\\+|-)?(sa|se|pa|pe|ta|te)");
862 Matcher conditionMatcher = conditionPattern.matcher(conditionsString);
863 String replacement = "#BED(" + m.group(1) + " , ";
864 while (conditionMatcher.find()) {
865 replacement = replacement + conditionMatcher.group() + ",";
866 }
867 replacement = replacement.substring(0, replacement.length() - 1) + ")"; //remove trailing comma and close parenthesis
868 rewrittenQuery = rewrittenQuery.replace(match, replacement);
869 }
870 return rewrittenQuery;
871 }
872
873 /**
Michael Hanla60a2f22014-05-15 19:50:38 +0000874 * @param args
875 */
876 public static void main(String[] args) {
Joachim Bingel5c93f902013-11-19 14:49:04 +0000877 /*
Joachim Bingel87480d02014-01-17 14:07:46 +0000878 * For debugging
Joachim Bingel5c93f902013-11-19 14:49:04 +0000879 */
Michael Hanla60a2f22014-05-15 19:50:38 +0000880 String[] queries = new String[]{
Joachim Bingel5c93f902013-11-19 14:49:04 +0000881 /* COSMAS 2 */
Joachim Bingel24aa5b12014-07-04 13:35:51 +0000882 "wegen #OV(x) <s>",
883 "wegen #OV(L) <s>",
884 "wegen #OV <s>",
885 "Der:pa Mann:se ",
886 "Der /+w1:1 Mann"
887
Michael Hanla60a2f22014-05-15 19:50:38 +0000888 };
Joachim Bingel5dd91682014-02-14 13:10:29 +0000889// CosmasTree.debug=true;
Michael Hanla60a2f22014-05-15 19:50:38 +0000890 for (String q : queries) {
891 try {
892 System.out.println(q);
893 try {
894 CosmasTree act = new CosmasTree(q);
895 System.out.println(act.parseCosmasQuery(q).toStringTree());
896 } catch (QueryException e) {
897 e.printStackTrace();
898 } catch (RecognitionException e) {
899 e.printStackTrace();
900 }
901 System.out.println();
902
903 } catch (NullPointerException npe) {
904 npe.printStackTrace();
905 System.out.println("null\n");
906 }
907 }
908 }
Joachim Bingel5c93f902013-11-19 14:49:04 +0000909}