| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.serialize; |
| 2 | |
| Joachim Bingel | 6003b85 | 2014-12-18 14:20:55 +0000 | [diff] [blame] | 3 | import de.ids_mannheim.korap.query.parse.poliqarpplus.PoliqarpPlusLexer; |
| 4 | import de.ids_mannheim.korap.query.parse.poliqarpplus.PoliqarpPlusParser; |
| Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame] | 5 | import de.ids_mannheim.korap.query.serialize.util.Antlr4DescriptiveErrorListener; |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 6 | import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator; |
| Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame] | 7 | import de.ids_mannheim.korap.query.serialize.util.StatusCodes; |
| Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame] | 8 | |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 9 | import org.antlr.v4.runtime.*; |
| 10 | import org.antlr.v4.runtime.tree.ParseTree; |
| Michael Hanl | 27e5058 | 2013-12-07 18:04:13 +0000 | [diff] [blame] | 11 | import org.slf4j.Logger; |
| 12 | import org.slf4j.LoggerFactory; |
| 13 | |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 14 | import java.lang.reflect.Method; |
| 15 | import java.util.*; |
| 16 | |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 17 | /** |
| 18 | * Map representation of Poliqarp syntax tree as returned by ANTLR |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 19 | * |
| Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame] | 20 | * @author Joachim Bingel (bingel@ids-mannheim.de) |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 21 | */ |
| Joachim Bingel | 1faf8a5 | 2015-01-09 13:17:34 +0000 | [diff] [blame] | 22 | public class PoliqarpPlusQueryProcessor extends Antlr4AbstractQueryProcessor { |
| Michael Hanl | 27e5058 | 2013-12-07 18:04:13 +0000 | [diff] [blame] | 23 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 24 | private static Logger log = LoggerFactory |
| 25 | .getLogger(PoliqarpPlusQueryProcessor.class); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 26 | private int classCounter = 1; |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 27 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 28 | /** |
| 29 | * Most centrally, this class maintains a set of nested maps and |
| 30 | * lists which represent the JSON tree, which is built by the JSON |
| 31 | * serialiser on basis of the {@link #requestMap} at the root of |
| 32 | * the tree. <br/> |
| 33 | * The class further maintains a set of stacks which effectively |
| 34 | * keep track of which objects to embed in which containing |
| 35 | * objects. |
| 36 | * |
| 37 | * @param query |
| 38 | * The syntax tree as returned by ANTLR |
| 39 | * @throws QueryException |
| 40 | */ |
| 41 | public PoliqarpPlusQueryProcessor (String query) { |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 42 | KoralObjectGenerator.setQueryProcessor(this); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 43 | process(query); |
| 44 | log.info(">>> " + requestMap.get("query") + " <<<"); |
| 45 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 46 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 47 | @Override |
| 48 | public void process(String query) { |
| 49 | ParseTree tree; |
| 50 | tree = parsePoliqarpQuery(query); |
| 51 | super.parser = this.parser; |
| 52 | log.info("Processing PoliqarpPlus query: " + query); |
| 53 | if (tree != null) { |
| 54 | log.debug("ANTLR parse tree: " + tree.toStringTree(parser)); |
| 55 | processNode(tree); |
| 56 | } |
| 57 | else { |
| 58 | addError(StatusCodes.MALFORMED_QUERY, "Could not parse query >>> " |
| 59 | + query + " <<<."); |
| 60 | } |
| 61 | } |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 62 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 63 | /** |
| 64 | * Recursively calls itself with the children of the currently |
| 65 | * active node, traversing the tree nodes in a top-down, |
| 66 | * depth-first fashion. A list is maintained that contains all |
| 67 | * visited nodes which have been directly addressed by their |
| 68 | * (grand-/grand-grand-/...) parent nodes, such that some |
| 69 | * processing time is saved, as these node will not be processed. |
| 70 | * This method is effectively a list of if-statements that are |
| 71 | * responsible for treating the different node types correctly and |
| 72 | * filling the respective maps/lists. |
| 73 | * |
| 74 | * @param node |
| 75 | * The currently processed node. The process(String |
| 76 | * query) method calls this method with the root. |
| 77 | * @throws QueryException |
| 78 | */ |
| 79 | private void processNode(ParseTree node) { |
| 80 | // Top-down processing |
| 81 | if (visited.contains(node)) |
| 82 | return; |
| 83 | else |
| 84 | visited.add(node); |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 85 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 86 | String nodeCat = getNodeCat(node); |
| 87 | openNodeCats.push(nodeCat); |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 88 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 89 | stackedObjects = 0; |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 90 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 91 | if (verbose) { |
| 92 | System.err.println(" " + objectStack); |
| 93 | System.out.println(openNodeCats); |
| 94 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 95 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 96 | /* |
| 97 | * *************************************************************** |
| 98 | * *************************************************************** |
| 99 | * *********** Processing individual node categories ************* |
| 100 | * *************************************************************** |
| 101 | * *************************************************************** |
| 102 | */ |
| Joachim Bingel | 832800e | 2014-10-17 14:46:39 +0000 | [diff] [blame] | 103 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 104 | if (nodeCat.equals("segment")) { |
| 105 | processSegment(node); |
| 106 | } |
| Joachim Bingel | 832800e | 2014-10-17 14:46:39 +0000 | [diff] [blame] | 107 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 108 | if (nodeCat.equals("sequence")) { |
| 109 | processSequence(node); |
| 110 | } |
| Joachim Bingel | 832800e | 2014-10-17 14:46:39 +0000 | [diff] [blame] | 111 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 112 | if (nodeCat.equals("emptyTokenSequence")) { |
| 113 | processEmptyTokenSequence(node); |
| 114 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 115 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 116 | if (nodeCat.equals("emptyTokenSequenceClass")) { |
| 117 | processEmptyTokenSequenceClass(node); |
| 118 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 119 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 120 | if (nodeCat.equals("token")) { |
| 121 | processToken(node); |
| 122 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 123 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 124 | if (nodeCat.equals("alignment")) { |
| 125 | processAlignment(node); |
| 126 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 127 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 128 | if (nodeCat.equals("span")) { |
| 129 | processSpan(node); |
| 130 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 131 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 132 | if (nodeCat.equals("disjunction")) { |
| 133 | processDisjunction(node); |
| 134 | } |
| Joachim Bingel | 832800e | 2014-10-17 14:46:39 +0000 | [diff] [blame] | 135 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 136 | if (nodeCat.equals("position")) { |
| 137 | processPosition(node); |
| 138 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 139 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 140 | if (nodeCat.equals("relation")) { |
| 141 | processRelation(node); |
| 142 | } |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 143 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 144 | if (nodeCat.equals("spanclass")) { |
| 145 | processSpanclass(node); |
| 146 | } |
| Joachim Bingel | 832800e | 2014-10-17 14:46:39 +0000 | [diff] [blame] | 147 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 148 | if (nodeCat.equals("matching")) { |
| 149 | processMatching(node); |
| 150 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 151 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 152 | if (nodeCat.equals("submatch")) { |
| 153 | processSubmatch(node); |
| 154 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 155 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 156 | if (nodeCat.equals("meta")) { |
| 157 | processMeta(node); |
| 158 | } |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 159 | |
| 160 | // if (nodeCat.equals("term") || nodeCat.equals("termGroup")) { |
| 161 | // if (inMeta ) putIntoSuperObject(parseTermOrTermGroup(node, false)); |
| 162 | // } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 163 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 164 | if (nodeCat.equals("within") |
| 165 | && !getNodeCat(node.getParent()).equals("position")) { |
| 166 | processWithin(node); |
| 167 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 168 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 169 | objectsToPop.push(stackedObjects); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 170 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 171 | /* |
| 172 | * *************************************************************** |
| 173 | * *************************************************************** |
| 174 | * recursion until 'request' node (root of tree) is processed |
| 175 | * * |
| 176 | * *********************************************************** |
| 177 | * **** |
| 178 | * ******************************************************** |
| 179 | * ******* |
| 180 | */ |
| 181 | for (int i = 0; i < node.getChildCount(); i++) { |
| 182 | ParseTree child = node.getChild(i); |
| 183 | processNode(child); |
| 184 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 185 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 186 | // Stuff that happens when leaving a node (taking items off |
| 187 | // the stacks) |
| 188 | for (int i = 0; i < objectsToPop.get(0); i++) { |
| 189 | objectStack.pop(); |
| 190 | } |
| 191 | objectsToPop.pop(); |
| 192 | openNodeCats.pop(); |
| 193 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 194 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 195 | private void processSegment(ParseTree node) { |
| 196 | // Cover possible quantification (i.e. repetition) of segment |
| 197 | ParseTree quantification = getFirstChildWithCat(node, "repetition"); |
| 198 | if (quantification != null) { |
| 199 | LinkedHashMap<String, Object> quantGroup = |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 200 | KoralObjectGenerator.makeGroup("repetition"); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 201 | Integer[] minmax = parseRepetition(quantification); |
| 202 | quantGroup.put("boundary", |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 203 | KoralObjectGenerator.makeBoundary(minmax[0], minmax[1])); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 204 | putIntoSuperObject(quantGroup); |
| 205 | objectStack.push(quantGroup); |
| 206 | stackedObjects++; |
| 207 | } |
| 208 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 209 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 210 | private void processSequence(ParseTree node) { |
| Joachim Bingel | 07ef042 | 2015-01-30 16:05:38 +0000 | [diff] [blame] | 211 | // skipe in case of emptyTokenSequence or emptyTokenSequenceClass |
| 212 | if (node.getChildCount() == 1 && |
| 213 | getNodeCat(node.getChild(0)).startsWith("emptyTokenSequence")) { |
| 214 | return; |
| 215 | } |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 216 | LinkedHashMap<String, Object> sequence = |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 217 | KoralObjectGenerator.makeGroup("sequence"); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 218 | ParseTree distanceNode = getFirstChildWithCat(node, "distance"); |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 219 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 220 | if (distanceNode != null) { |
| 221 | Integer[] minmax = parseDistance(distanceNode); |
| 222 | LinkedHashMap<String, Object> distance = |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 223 | KoralObjectGenerator.makeDistance("w", minmax[0], minmax[1]); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 224 | sequence.put("inOrder", true); |
| 225 | ArrayList<Object> distances = new ArrayList<Object>(); |
| 226 | distances.add(distance); |
| 227 | sequence.put("distances", distances); |
| 228 | // don't re-visit the emptyTokenSequence node |
| 229 | visited.add(distanceNode.getChild(0)); |
| 230 | } |
| 231 | putIntoSuperObject(sequence); |
| 232 | objectStack.push(sequence); |
| 233 | stackedObjects++; |
| 234 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 235 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 236 | @SuppressWarnings("unchecked") |
| 237 | /** |
| 238 | * empty tokens at beginning/end of sequence |
| 239 | * @param node |
| 240 | */ |
| 241 | private void processEmptyTokenSequence(ParseTree node) { |
| 242 | Integer[] minmax = parseEmptySegments(node); |
| 243 | // object will be either a repetition group or a single empty |
| 244 | // token |
| 245 | LinkedHashMap<String, Object> object; |
| 246 | LinkedHashMap<String, Object> emptyToken = |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 247 | KoralObjectGenerator.makeToken(); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 248 | if (minmax[0] != 1 || minmax[1] == null || minmax[1] != 1) { |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 249 | object = KoralObjectGenerator.makeRepetition(minmax[0], minmax[1]); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 250 | ((ArrayList<Object>) object.get("operands")).add(emptyToken); |
| 251 | } |
| 252 | else { |
| 253 | object = emptyToken; |
| 254 | } |
| 255 | putIntoSuperObject(object); |
| 256 | objectStack.push(object); |
| 257 | stackedObjects++; |
| 258 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 259 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 260 | private void processEmptyTokenSequenceClass(ParseTree node) { |
| 261 | int classId = 1; |
| 262 | if (hasChild(node, "spanclass_id")) { |
| 263 | classId = Integer.parseInt(node.getChild(1).getChild(0) |
| 264 | .toStringTree(parser)); |
| 265 | } |
| 266 | LinkedHashMap<String, Object> classGroup = |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 267 | KoralObjectGenerator.makeSpanClass(classId); |
| 268 | addHighlightClass(classId); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 269 | putIntoSuperObject(classGroup); |
| 270 | objectStack.push(classGroup); |
| 271 | stackedObjects++; |
| 272 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 273 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 274 | private void processToken(ParseTree node) { |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 275 | LinkedHashMap<String, Object> token = KoralObjectGenerator.makeToken(); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 276 | // handle negation |
| 277 | List<ParseTree> negations = getChildrenWithCat(node, "!"); |
| 278 | boolean negated = false; |
| 279 | boolean isRegex = false; |
| 280 | if (negations.size() % 2 == 1) |
| 281 | negated = true; |
| 282 | if (getNodeCat(node.getChild(0)).equals("key")) { |
| 283 | // no 'term' child, but direct key specification: process here |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 284 | LinkedHashMap<String, Object> term = KoralObjectGenerator.makeTerm(); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 285 | String key = node.getChild(0).getText(); |
| 286 | if (getNodeCat(node.getChild(0).getChild(0)).equals("regex")) { |
| 287 | isRegex = true; |
| 288 | term.put("type", "type:regex"); |
| 289 | key = key.substring(1, key.length() - 1); |
| 290 | } |
| 291 | term.put("layer", "orth"); |
| 292 | term.put("key", key); |
| 293 | String matches = negated ? "ne" : "eq"; |
| 294 | term.put("match", "match:" + matches); |
| 295 | ParseTree flagNode = getFirstChildWithCat(node, "flag"); |
| 296 | if (flagNode != null) { |
| 297 | // substring removes leading slash '/' |
| 298 | String flag = getNodeCat(flagNode.getChild(0)).substring(1); |
| 299 | if (flag.contains("i")) |
| 300 | term.put("caseInsensitive", true); |
| 301 | else if (flag.contains("I")) |
| 302 | term.put("caseInsensitive", false); |
| 303 | if (flag.contains("x")) { |
| 304 | term.put("type", "type:regex"); |
| 305 | if (!isRegex) { |
| 306 | key = QueryUtils.escapeRegexSpecialChars(key); |
| 307 | } |
| 308 | // overwrite key |
| 309 | term.put("key", ".*?" + key + ".*?"); |
| 310 | } |
| 311 | } |
| 312 | token.put("wrap", term); |
| 313 | } |
| 314 | else { |
| 315 | // child is 'term' or 'termGroup' -> process in extra method |
| 316 | LinkedHashMap<String, Object> termOrTermGroup = |
| 317 | parseTermOrTermGroup(node.getChild(1), negated); |
| 318 | token.put("wrap", termOrTermGroup); |
| 319 | } |
| 320 | putIntoSuperObject(token); |
| 321 | visited.add(node.getChild(0)); |
| 322 | visited.add(node.getChild(2)); |
| 323 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 324 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 325 | @SuppressWarnings("unchecked") |
| 326 | private void processAlignment(ParseTree node) { |
| 327 | LinkedHashMap<String, Object> alignClass = |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 328 | KoralObjectGenerator.makeSpanClass(classCounter); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 329 | LinkedHashMap<String, Object> metaMap = |
| 330 | (LinkedHashMap<String, Object>) requestMap.get("meta"); |
| 331 | if (metaMap.containsKey("alignment")) { |
| 332 | ArrayList<Integer> alignedClasses = new ArrayList<Integer>(); |
| 333 | try { |
| 334 | alignedClasses = (ArrayList<Integer>) metaMap.get("alignment"); |
| 335 | } |
| 336 | catch (ClassCastException cce) { |
| 337 | alignedClasses.add((Integer) metaMap.get("alignment")); |
| 338 | } |
| 339 | alignedClasses.add(classCounter); |
| 340 | metaMap.put("alignment", alignedClasses); |
| 341 | } |
| 342 | else { |
| 343 | metaMap.put("alignment", classCounter); |
| 344 | } |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 345 | classCounter++; |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 346 | putIntoSuperObject(alignClass); |
| 347 | objectStack.push(alignClass); |
| 348 | stackedObjects++; |
| 349 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 350 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 351 | private void processSpan(ParseTree node) { |
| 352 | List<ParseTree> negations = getChildrenWithCat(node, "!"); |
| 353 | boolean negated = false; |
| 354 | if (negations.size() % 2 == 1) |
| 355 | negated = true; |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 356 | LinkedHashMap<String, Object> span = KoralObjectGenerator.makeSpan(); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 357 | ParseTree keyNode = getFirstChildWithCat(node, "key"); |
| 358 | ParseTree layerNode = getFirstChildWithCat(node, "layer"); |
| 359 | ParseTree foundryNode = getFirstChildWithCat(node, "foundry"); |
| 360 | ParseTree termOpNode = getFirstChildWithCat(node, "termOp"); |
| 361 | ParseTree termNode = getFirstChildWithCat(node, "term"); |
| 362 | ParseTree termGroupNode = getFirstChildWithCat(node, "termGroup"); |
| 363 | if (foundryNode != null) |
| 364 | span.put("foundry", foundryNode.getText()); |
| 365 | if (layerNode != null) { |
| 366 | String layer = layerNode.getText(); |
| 367 | if (layer.equals("base")) |
| 368 | layer = "lemma"; |
| 369 | span.put("layer", layer); |
| 370 | } |
| 371 | span.put("key", keyNode.getText()); |
| 372 | if (termOpNode != null) { |
| 373 | String termOp = termOpNode.getText(); |
| 374 | if (termOp.equals("==")) |
| 375 | span.put("match", "match:eq"); |
| 376 | else if (termOp.equals("!=")) |
| 377 | span.put("match", "match:ne"); |
| 378 | } |
| 379 | if (termNode != null) { |
| 380 | LinkedHashMap<String, Object> termOrTermGroup = |
| 381 | parseTermOrTermGroup(termNode, negated, "span"); |
| 382 | span.put("attr", termOrTermGroup); |
| 383 | } |
| 384 | if (termGroupNode != null) { |
| 385 | LinkedHashMap<String, Object> termOrTermGroup = |
| 386 | parseTermOrTermGroup(termGroupNode, negated, "span"); |
| 387 | span.put("attr", termOrTermGroup); |
| 388 | } |
| 389 | putIntoSuperObject(span); |
| 390 | objectStack.push(span); |
| 391 | stackedObjects++; |
| 392 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 393 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 394 | private void processDisjunction(ParseTree node) { |
| 395 | LinkedHashMap<String, Object> disjunction = |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 396 | KoralObjectGenerator.makeGroup("or"); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 397 | putIntoSuperObject(disjunction); |
| 398 | objectStack.push(disjunction); |
| 399 | stackedObjects++; |
| 400 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 401 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 402 | private void processPosition(ParseTree node) { |
| 403 | LinkedHashMap<String, Object> position = parseFrame(node.getChild(0)); |
| 404 | putIntoSuperObject(position); |
| 405 | objectStack.push(position); |
| 406 | stackedObjects++; |
| 407 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 408 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 409 | private void processRelation(ParseTree node) { |
| 410 | LinkedHashMap<String, Object> relationGroup = |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 411 | KoralObjectGenerator.makeGroup("relation"); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 412 | LinkedHashMap<String, Object> relation = |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 413 | KoralObjectGenerator.makeRelation(); |
| Joachim Bingel | a6bf8d8 | 2015-01-26 14:43:36 +0000 | [diff] [blame] | 414 | LinkedHashMap<String, Object> term = |
| 415 | KoralObjectGenerator.makeTerm(); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 416 | relationGroup.put("relation", relation); |
| Joachim Bingel | a6bf8d8 | 2015-01-26 14:43:36 +0000 | [diff] [blame] | 417 | relation.put("wrap", term); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 418 | if (node.getChild(0).getText().equals("dominates")) { |
| Joachim Bingel | a6bf8d8 | 2015-01-26 14:43:36 +0000 | [diff] [blame] | 419 | term.put("layer", "c"); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 420 | } else if (node.getChild(0).getText().equals("dependency")) { |
| 421 | term.put("layer", "d"); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 422 | } |
| 423 | ParseTree relSpec = getFirstChildWithCat(node, "relSpec"); |
| 424 | ParseTree repetition = getFirstChildWithCat(node, "repetition"); |
| 425 | if (relSpec != null) { |
| 426 | ParseTree foundry = getFirstChildWithCat(relSpec, "foundry"); |
| 427 | ParseTree layer = getFirstChildWithCat(relSpec, "layer"); |
| 428 | ParseTree key = getFirstChildWithCat(relSpec, "key"); |
| 429 | if (foundry != null) |
| Joachim Bingel | a6bf8d8 | 2015-01-26 14:43:36 +0000 | [diff] [blame] | 430 | term.put("foundry", foundry.getText()); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 431 | if (layer != null) |
| Joachim Bingel | a6bf8d8 | 2015-01-26 14:43:36 +0000 | [diff] [blame] | 432 | term.put("layer", layer.getText()); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 433 | if (key != null) |
| Joachim Bingel | a6bf8d8 | 2015-01-26 14:43:36 +0000 | [diff] [blame] | 434 | term.put("key", key.getText()); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 435 | } |
| 436 | if (repetition != null) { |
| 437 | Integer[] minmax = parseRepetition(repetition); |
| 438 | relation.put("boundary", |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 439 | KoralObjectGenerator.makeBoundary(minmax[0], minmax[1])); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 440 | } |
| 441 | putIntoSuperObject(relationGroup); |
| 442 | objectStack.push(relationGroup); |
| 443 | stackedObjects++; |
| 444 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 445 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 446 | private void processSpanclass(ParseTree node) { |
| 447 | // Step I: get info |
| 448 | int classId = 1; |
| 449 | if (getNodeCat(node.getChild(1)).equals("spanclass_id")) { |
| 450 | String ref = node.getChild(1).getChild(0).toStringTree(parser); |
| 451 | try { |
| 452 | classId = Integer.parseInt(ref); |
| 453 | } |
| 454 | catch (NumberFormatException e) { |
| 455 | String msg = "The specified class reference in the " |
| 456 | + "focus/split-Operator is not a number: " + ref; |
| 457 | log.error(msg); |
| 458 | addError(StatusCodes.UNDEFINED_CLASS_REFERENCE, msg); |
| 459 | } |
| 460 | // only allow class id up to 127 |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 461 | if (classId > 128) { |
| 462 | addWarning("Only class IDs up to 128 are allowed. Your class " |
| 463 | + classId + " has been set back to 128. " |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 464 | + "Check for possible conflict with other classes."); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 465 | classId = 128; |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 466 | } |
| 467 | } |
| 468 | LinkedHashMap<String, Object> classGroup = |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 469 | KoralObjectGenerator.makeSpanClass(classId); |
| 470 | addHighlightClass(classId); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 471 | putIntoSuperObject(classGroup); |
| 472 | objectStack.push(classGroup); |
| 473 | stackedObjects++; |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 474 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 475 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 476 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 477 | private void processMatching(ParseTree node) { |
| 478 | // Step I: get info |
| 479 | ArrayList<Integer> classRefs = new ArrayList<Integer>(); |
| 480 | String classRefOp = null; |
| 481 | if (getNodeCat(node.getChild(2)).equals("spanclass_id")) { |
| 482 | ParseTree spanNode = node.getChild(2); |
| 483 | for (int i = 0; i < spanNode.getChildCount() - 1; i++) { |
| 484 | String ref = spanNode.getChild(i).getText(); |
| 485 | if (ref.equals("|") || ref.equals("&")) { |
| 486 | classRefOp = ref.equals("|") ? "intersection" : "union"; |
| 487 | } |
| 488 | else { |
| 489 | try { |
| 490 | int classRef = Integer.parseInt(ref); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 491 | // only allow class id up to 128 |
| 492 | if (classRef > 128) { |
| 493 | addWarning("Only class references up to 128 are " |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 494 | + "allowed. Your reference to class " |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 495 | + classRef + " has been set back to 128. " |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 496 | + "Check for possible conflict with " |
| 497 | + "other classes."); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 498 | classRef = 128; |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 499 | } |
| 500 | classRefs.add(classRef); |
| 501 | } |
| 502 | catch (NumberFormatException e) { |
| 503 | String err = "The specified class reference in the " |
| 504 | + "shrink/split-Operator is not a number."; |
| 505 | addError(StatusCodes.UNDEFINED_CLASS_REFERENCE, err); |
| 506 | } |
| 507 | } |
| 508 | } |
| 509 | } |
| 510 | else { |
| 511 | classRefs.add(1); |
| 512 | } |
| 513 | LinkedHashMap<String, Object> referenceGroup = |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 514 | KoralObjectGenerator.makeReference(classRefs); |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 515 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 516 | String type = node.getChild(0).toStringTree(parser); |
| 517 | // Default is focus(), if deviating catch here |
| 518 | if (type.equals("split")) |
| 519 | referenceGroup.put("operation", "operation:split"); |
| 520 | if (type.equals("submatch") || type.equals("shrink")) { |
| 521 | String warning = "Deprecated 2014-07-24: " |
| 522 | + type |
| 523 | + "() as a match reducer " |
| 524 | + "to a specific class is deprecated in favor of focus() and will " |
| 525 | + "only be supported for 3 months after deprecation date."; |
| 526 | addMessage(StatusCodes.DEPRECATED_QUERY_ELEMENT, warning); |
| 527 | } |
| 528 | if (classRefOp != null) { |
| 529 | referenceGroup.put("classRefOp", "classRefOp:" + classRefOp); |
| 530 | } |
| 531 | ArrayList<Object> referenceOperands = new ArrayList<Object>(); |
| 532 | referenceGroup.put("operands", referenceOperands); |
| 533 | // Step II: decide where to put the group |
| 534 | putIntoSuperObject(referenceGroup); |
| 535 | objectStack.push(referenceGroup); |
| 536 | stackedObjects++; |
| 537 | visited.add(node.getChild(0)); |
| 538 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 539 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 540 | private void processSubmatch(ParseTree node) { |
| 541 | LinkedHashMap<String, Object> submatch = |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 542 | KoralObjectGenerator.makeReference(null); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 543 | submatch.put("operands", new ArrayList<Object>()); |
| 544 | ParseTree startpos = getFirstChildWithCat(node, "startpos"); |
| 545 | ParseTree length = getFirstChildWithCat(node, "length"); |
| 546 | ArrayList<Integer> spanRef = new ArrayList<Integer>(); |
| 547 | spanRef.add(Integer.parseInt(startpos.getText())); |
| 548 | if (length != null) { |
| 549 | spanRef.add(Integer.parseInt(length.getText())); |
| 550 | } |
| 551 | submatch.put("spanRef", spanRef); |
| 552 | putIntoSuperObject(submatch); |
| 553 | objectStack.push(submatch); |
| 554 | stackedObjects++; |
| 555 | visited.add(node.getChild(0)); |
| 556 | } |
| Joachim Bingel | 84395b2 | 2014-12-18 10:46:18 +0000 | [diff] [blame] | 557 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 558 | /** |
| 559 | * Creates meta field in requestMap, later filled by terms |
| 560 | * |
| 561 | * @param node |
| 562 | */ |
| 563 | private void processMeta(ParseTree node) { |
| Joachim Bingel | 3c37eb2 | 2015-01-15 13:38:42 +0000 | [diff] [blame] | 564 | addWarning("You used the 'meta' keyword in a PoliqarpPlus query. This" |
| 565 | + " feature is currently not supported. Please use virtual " |
| 566 | + "collections to restrict documents by metadata."); |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 567 | CollectionQueryProcessor cq = new CollectionQueryProcessor(node.getChild(1).getText()); |
| 568 | requestMap.put("collection", cq.getRequestMap().get("collection")); |
| Joachim Bingel | 3c37eb2 | 2015-01-15 13:38:42 +0000 | [diff] [blame] | 569 | for (ParseTree child : getChildren(node)) { |
| 570 | visited.add(child); |
| 571 | } |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 572 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 573 | |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 574 | @SuppressWarnings("unchecked") |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 575 | private void processWithin(ParseTree node) { |
| Joachim Bingel | 3c37eb2 | 2015-01-15 13:38:42 +0000 | [diff] [blame] | 576 | ParseTree domainNode = node.getChild(1); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 577 | String domain = getNodeCat(domainNode); |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 578 | LinkedHashMap<String, Object> span = KoralObjectGenerator.makeSpan(domain); |
| Joachim Bingel | 3c37eb2 | 2015-01-15 13:38:42 +0000 | [diff] [blame] | 579 | LinkedHashMap<String, Object> queryObj = (LinkedHashMap<String, Object>) requestMap.get("query"); |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 580 | LinkedHashMap<String, Object> contains = KoralObjectGenerator.makePosition(new String[]{"frames:contains"}, null); |
| Joachim Bingel | 3c37eb2 | 2015-01-15 13:38:42 +0000 | [diff] [blame] | 581 | ArrayList<Object> operands = (ArrayList<Object>) contains.get("operands"); |
| 582 | operands.add(span); |
| 583 | operands.add(queryObj); |
| 584 | requestMap.put("query", contains); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 585 | visited.add(node.getChild(0)); |
| 586 | visited.add(node.getChild(1)); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 587 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 588 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 589 | /** |
| 590 | * Parses a repetition node |
| 591 | * |
| 592 | * @param node |
| 593 | * @return A two-element array, of which the first element is an |
| 594 | * int representing the minimal number of repetitions of |
| 595 | * the quantified element, and the second element |
| 596 | * representing the maximal number of repetitions |
| 597 | */ |
| 598 | private Integer[] parseRepetition(ParseTree node) { |
| 599 | Integer min = 0, max = 0; |
| 600 | boolean maxInfinite = false; |
| 601 | // (repetition) node can be of two types: 'kleene' or 'range' |
| 602 | ParseTree repetitionTypeNode = node.getChild(0); |
| 603 | String repetitionType = getNodeCat(repetitionTypeNode); |
| 604 | if (repetitionType.equals("kleene")) { |
| 605 | // kleene operators (+ and *) as well as optionality (?) |
| 606 | String kleeneOp = repetitionTypeNode.getText(); |
| 607 | if (kleeneOp.equals("*")) { |
| 608 | maxInfinite = true; |
| 609 | } |
| 610 | else if (kleeneOp.equals("+")) { |
| 611 | min = 1; |
| 612 | maxInfinite = true; |
| 613 | } |
| 614 | if (kleeneOp.equals("?")) { |
| 615 | max = 1; |
| 616 | } |
| 617 | } |
| 618 | else { |
| 619 | // Range node of form "{ min , max }" or "{ max }" or |
| 620 | // "{ , max }" or "{ min , }" |
| 621 | ParseTree minNode = getFirstChildWithCat(repetitionTypeNode, "min"); |
| 622 | ParseTree maxNode = getFirstChildWithCat(repetitionTypeNode, "max"); |
| 623 | if (maxNode != null) |
| 624 | max = Integer.parseInt(maxNode.getText()); |
| 625 | else |
| 626 | maxInfinite = true; |
| 627 | // min is optional: if not specified, min = max |
| 628 | if (minNode != null) |
| 629 | min = Integer.parseInt(minNode.getText()); |
| 630 | else if (hasChild(repetitionTypeNode, ",")) |
| 631 | min = 0; |
| 632 | else { |
| 633 | min = max; |
| 634 | // addWarning("Your query contains a segment of the form {n}, where n is some number. This expression is ambiguous. " |
| 635 | // + "It could mean a repetition (\"Repeat the previous element n times!\") or a word form that equals the number, " |
| 636 | // + "enclosed by a \"class\" (which is denoted by braces like '{x}', see the documentation on classes)." |
| 637 | // + "KorAP has by default interpreted the segment as a repetition statement. If you want to express the" |
| 638 | // + "number as a word form inside a class, use the non-shorthand form {[orth=n]}."); |
| 639 | } |
| 640 | } |
| 641 | if (maxInfinite) { |
| 642 | max = null; |
| 643 | } |
| 644 | return new Integer[] { min, max }; |
| 645 | } |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 646 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 647 | private LinkedHashMap<String, Object> parseFrame(ParseTree node) { |
| 648 | String operator = node.toStringTree(parser).toLowerCase(); |
| 649 | String[] frames = new String[] { "" }; |
| 650 | String[] classRefCheck = new String[] { "classRefCheck:includes" }; |
| 651 | switch (operator) { |
| 652 | case "contains": |
| 653 | frames = new String[] { "frames:contains" }; |
| 654 | break; |
| 655 | case "matches": |
| 656 | frames = new String[] { "frames:matches" }; |
| 657 | break; |
| 658 | case "startswith": |
| 659 | frames = new String[] { "frames:startswith" }; |
| 660 | break; |
| 661 | case "endswith": |
| 662 | frames = new String[] { "frames:endswith" }; |
| 663 | break; |
| 664 | case "overlaps": |
| 665 | frames = new String[] { "frames:overlapsLeft", |
| 666 | "frames:overlapsRight" }; |
| 667 | classRefCheck = new String[] { "classRefCheck:intersects" }; |
| 668 | break; |
| 669 | } |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 670 | return KoralObjectGenerator.makePosition(frames, classRefCheck); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 671 | } |
| Joachim Bingel | 832800e | 2014-10-17 14:46:39 +0000 | [diff] [blame] | 672 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 673 | private LinkedHashMap<String, Object> parseTermOrTermGroup(ParseTree node, |
| 674 | boolean negated) { |
| 675 | return parseTermOrTermGroup(node, negated, "token"); |
| 676 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 677 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 678 | /** |
| 679 | * Parses a (term) or (termGroup) node |
| 680 | * |
| 681 | * @param node |
| 682 | * @param negatedGlobal |
| 683 | * Indicates whether the term/termGroup is globally |
| 684 | * negated, e.g. through a negation operator preceding |
| 685 | * the related token like "![base=foo]". Global |
| 686 | * negation affects the term's "match" parameter. |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 687 | * @param mode 'token' or 'span' (tokens and spans are treated |
| 688 | * differently). |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 689 | * @return A term or termGroup object, depending on input |
| 690 | */ |
| 691 | @SuppressWarnings("unchecked") |
| 692 | private LinkedHashMap<String, Object> parseTermOrTermGroup(ParseTree node, |
| 693 | boolean negatedGlobal, String mode) { |
| 694 | String nodeCat = getNodeCat(node); |
| 695 | if (nodeCat.equals("term")) { |
| 696 | String key = null; |
| 697 | LinkedHashMap<String, Object> term = |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 698 | KoralObjectGenerator.makeTerm(); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 699 | // handle negation |
| 700 | boolean negated = negatedGlobal; |
| 701 | boolean isRegex = false; |
| 702 | List<ParseTree> negations = getChildrenWithCat(node, "!"); |
| 703 | if (negations.size() % 2 == 1) |
| 704 | negated = !negated; |
| 705 | // retrieve possible nodes |
| 706 | ParseTree keyNode = getFirstChildWithCat(node, "key"); |
| 707 | ParseTree valueNode = getFirstChildWithCat(node, "value"); |
| 708 | ParseTree layerNode = getFirstChildWithCat(node, "layer"); |
| 709 | ParseTree foundryNode = getFirstChildWithCat(node, "foundry"); |
| 710 | ParseTree termOpNode = getFirstChildWithCat(node, "termOp"); |
| 711 | ParseTree flagNode = getFirstChildWithCat(node, "flag"); |
| 712 | // process foundry |
| 713 | if (foundryNode != null) |
| 714 | term.put("foundry", foundryNode.getText()); |
| 715 | // process layer: map "base" -> "lemma" |
| 716 | if (layerNode != null) { |
| 717 | String layer = layerNode.getText(); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 718 | if (mode.equals("span")) { |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 719 | term.put("key", layer); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 720 | } else if (mode.equals("token")) { |
| 721 | if (layer.equals("base")) { |
| 722 | layer = "lemma"; } |
| 723 | else if (layer.equals("punct")) { |
| 724 | layer = "orth"; |
| 725 | term.put("type", "type:punct"); |
| 726 | } |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 727 | term.put("layer", layer); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 728 | } |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 729 | } |
| 730 | // process key: 'normal' or regex? |
| 731 | key = keyNode.getText(); |
| 732 | if (getNodeCat(keyNode.getChild(0)).equals("regex")) { |
| 733 | isRegex = true; |
| 734 | term.put("type", "type:regex"); |
| 735 | // remove leading and trailing quotes |
| 736 | key = key.substring(1, key.length() - 1); |
| 737 | } |
| 738 | if (mode.equals("span")) |
| 739 | term.put("value", key); |
| 740 | else |
| 741 | term.put("key", key); |
| 742 | // process value |
| 743 | if (valueNode != null) |
| 744 | term.put("value", valueNode.getText()); |
| 745 | // process operator ("match" property) |
| 746 | if (termOpNode != null) { |
| 747 | String termOp = termOpNode.getText(); |
| 748 | negated = termOp.contains("!") ? !negated : negated; |
| 749 | if (!negated) |
| 750 | term.put("match", "match:eq"); |
| 751 | else |
| 752 | term.put("match", "match:ne"); |
| 753 | } |
| 754 | // process possible flags |
| 755 | if (flagNode != null) { |
| 756 | // substring removes leading slash |
| 757 | String flag = getNodeCat(flagNode.getChild(0)).substring(1); |
| 758 | if (flag.contains("i")) |
| 759 | term.put("caseInsensitive", true); |
| 760 | else if (flag.contains("I")) |
| 761 | term.put("caseInsensitive", false); |
| 762 | if (flag.contains("x")) { |
| 763 | if (!isRegex) { |
| 764 | key = QueryUtils.escapeRegexSpecialChars(key); |
| 765 | } |
| 766 | // flag 'x' allows submatches: |
| 767 | // overwrite key with appended .*? |
| 768 | term.put("key", ".*?" + key + ".*?"); // |
| 769 | term.put("type", "type:regex"); |
| 770 | } |
| 771 | } |
| 772 | return term; |
| 773 | } |
| 774 | else if (nodeCat.equals("termGroup")) { |
| 775 | // For termGroups, establish a boolean relation between |
| 776 | // operands and recursively call this function with |
| 777 | // the term or termGroup operands |
| 778 | LinkedHashMap<String, Object> termGroup = null; |
| 779 | ParseTree leftOp = null; |
| 780 | ParseTree rightOp = null; |
| 781 | // check for leading/trailing parantheses |
| 782 | if (!getNodeCat(node.getChild(0)).equals("(")) |
| 783 | leftOp = node.getChild(0); |
| 784 | else |
| 785 | leftOp = node.getChild(1); |
| 786 | if (!getNodeCat(node.getChild(node.getChildCount() - 1)) |
| 787 | .equals(")")) |
| 788 | rightOp = node.getChild(node.getChildCount() - 1); |
| 789 | else |
| 790 | rightOp = node.getChild(node.getChildCount() - 2); |
| 791 | // establish boolean relation |
| 792 | ParseTree boolOp = getFirstChildWithCat(node, "boolOp"); |
| 793 | String operator = boolOp.getText().equals("&") ? "and" : "or"; |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 794 | termGroup = KoralObjectGenerator.makeTermGroup(operator); |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 795 | ArrayList<Object> operands = (ArrayList<Object>) termGroup |
| 796 | .get("operands"); |
| 797 | // recursion with left/right operands |
| 798 | operands.add(parseTermOrTermGroup(leftOp, negatedGlobal, mode)); |
| 799 | operands.add(parseTermOrTermGroup(rightOp, negatedGlobal, mode)); |
| 800 | return termGroup; |
| 801 | } |
| 802 | return null; |
| 803 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 804 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 805 | /** |
| 806 | * Puts an object into the operands list of its governing (or |
| 807 | * "super") object which had been placed on the |
| 808 | * {@link #objectStack} before and is still on top of the stack. |
| 809 | * If this is the top object of the tree, it is put there instead |
| 810 | * of into some (non-existent) operand stack. |
| 811 | * |
| 812 | * @param object |
| 813 | * The object to be inserted |
| 814 | */ |
| 815 | private void putIntoSuperObject(LinkedHashMap<String, Object> object) { |
| 816 | putIntoSuperObject(object, 0); |
| 817 | } |
| Joachim Bingel | 33bd45f | 2014-06-25 15:00:54 +0000 | [diff] [blame] | 818 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 819 | /** |
| 820 | * Puts an object into the operands list of its governing (or |
| 821 | * "super") object which had been placed on the |
| 822 | * {@link #objectStack} before. If this is the top object of the |
| 823 | * tree, it is put there instead of into some (non-existent) |
| 824 | * operand stack. |
| 825 | * |
| 826 | * @param object |
| 827 | * The object to be inserted |
| 828 | * @param objStackPosition |
| 829 | * Indicated the position of the super object on the |
| 830 | * {@link #objectStack} (in case not the top element of |
| 831 | * the stack is the super object. |
| 832 | */ |
| 833 | @SuppressWarnings({ "unchecked" }) |
| 834 | private void putIntoSuperObject(LinkedHashMap<String, Object> object, |
| 835 | int objStackPosition) { |
| 836 | if (objectStack.size() > objStackPosition) { |
| 837 | ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack |
| 838 | .get(objStackPosition).get("operands"); |
| 839 | topObjectOperands.add(object); |
| 840 | } |
| 841 | else { |
| 842 | requestMap.put("query", object); |
| 843 | } |
| 844 | } |
| Joachim Bingel | 832800e | 2014-10-17 14:46:39 +0000 | [diff] [blame] | 845 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 846 | /** |
| 847 | * Basically only increases the min and max counters as required |
| 848 | * by Poliqarp |
| 849 | * |
| 850 | * @param distanceNode |
| 851 | * @return |
| 852 | */ |
| 853 | private Integer[] parseDistance(ParseTree distanceNode) { |
| 854 | int emptyTokenSeqIndex = getNodeCat(distanceNode).equals("distance") ? 0 |
| 855 | : 2; |
| 856 | Integer[] minmax = parseEmptySegments(distanceNode |
| 857 | .getChild(emptyTokenSeqIndex)); |
| 858 | Integer min = minmax[0]; |
| 859 | Integer max = minmax[1]; |
| 860 | min++; |
| 861 | if (max != null) |
| 862 | max++; |
| 863 | // min = cropToMaxValue(min); |
| 864 | // max = cropToMaxValue(max); |
| 865 | return new Integer[] { min, max }; |
| 866 | } |
| Joachim Bingel | 94a1ccd | 2013-12-10 10:37:29 +0000 | [diff] [blame] | 867 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 868 | private Integer[] parseEmptySegments(ParseTree emptySegments) { |
| 869 | Integer min = 0; |
| 870 | Integer max = 0; |
| 871 | ParseTree child; |
| 872 | for (int i = 0; i < emptySegments.getChildCount(); i++) { |
| 873 | child = emptySegments.getChild(i); |
| 874 | ParseTree nextSibling = emptySegments.getChild(i + 1); |
| 875 | if (child.toStringTree(parser).equals("(emptyToken [ ])")) { |
| 876 | if (nextSibling != null |
| 877 | && getNodeCat(nextSibling).equals("repetition")) { |
| 878 | Integer[] minmax = parseRepetition(nextSibling); |
| 879 | min += minmax[0]; |
| 880 | if (minmax[1] != null) { |
| 881 | max += minmax[1]; |
| 882 | } |
| 883 | else { |
| 884 | max = null; |
| 885 | } |
| 886 | } |
| 887 | else { |
| 888 | min++; |
| 889 | max++; |
| 890 | } |
| 891 | } |
| 892 | } |
| 893 | // min = cropToMaxValue(min); |
| 894 | // max = cropToMaxValue(max); |
| 895 | return new Integer[] { min, max }; |
| 896 | } |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 897 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 898 | private ParserRuleContext parsePoliqarpQuery(String query) { |
| 899 | Lexer lexer = new PoliqarpPlusLexer((CharStream) null); |
| 900 | ParserRuleContext tree = null; |
| 901 | Antlr4DescriptiveErrorListener errorListener = |
| 902 | new Antlr4DescriptiveErrorListener(query); |
| 903 | // Like p. 111 |
| 904 | try { |
| 905 | // Tokenize input data |
| 906 | ANTLRInputStream input = new ANTLRInputStream(query); |
| 907 | lexer.setInputStream(input); |
| 908 | CommonTokenStream tokens = new CommonTokenStream(lexer); |
| 909 | parser = new PoliqarpPlusParser(tokens); |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 910 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 911 | // Don't throw out erroneous stuff |
| 912 | parser.setErrorHandler(new BailErrorStrategy()); |
| 913 | lexer.removeErrorListeners(); |
| Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame] | 914 | lexer.addErrorListener(errorListener); |
| 915 | parser.removeErrorListeners(); |
| 916 | parser.addErrorListener(errorListener); |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 917 | |
| Joachim Bingel | 20e06ac | 2015-01-15 10:31:33 +0000 | [diff] [blame] | 918 | // Get starting rule from parser |
| 919 | Method startRule = PoliqarpPlusParser.class.getMethod("request"); |
| 920 | tree = |
| 921 | (ParserRuleContext) startRule.invoke(parser, (Object[]) null); |
| 922 | } |
| 923 | // Some things went wrong ... |
| 924 | catch (Exception e) { |
| 925 | log.error("Could not parse query. " |
| 926 | + "Please make sure it is well-formed."); |
| 927 | log.error(errorListener.generateFullErrorMsg().toString()); |
| 928 | addError(errorListener.generateFullErrorMsg()); |
| 929 | } |
| 930 | return tree; |
| 931 | } |
| Nils Diewald | 4128a92 | 2014-07-18 14:39:24 +0000 | [diff] [blame] | 932 | } |