| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.serialize; |
| 2 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 3 | import de.ids_mannheim.korap.query.poliqarp.PoliqarpPlusLexer; |
| 4 | import de.ids_mannheim.korap.query.poliqarp.PoliqarpPlusParser; |
| Joachim Bingel | 16da4e1 | 2013-12-17 09:48:12 +0000 | [diff] [blame] | 5 | import de.ids_mannheim.korap.util.QueryException; |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 6 | import org.antlr.v4.runtime.*; |
| 7 | import org.antlr.v4.runtime.tree.ParseTree; |
| Michael Hanl | 27e5058 | 2013-12-07 18:04:13 +0000 | [diff] [blame] | 8 | import org.slf4j.Logger; |
| 9 | import org.slf4j.LoggerFactory; |
| 10 | |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 11 | import java.lang.reflect.Method; |
| 12 | import java.util.*; |
| Joachim Bingel | 998954a | 2014-07-14 15:58:34 +0000 | [diff] [blame] | 13 | import java.util.regex.Pattern; |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 14 | |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 15 | /** |
| 16 | * Map representation of Poliqarp syntax tree as returned by ANTLR |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 17 | * |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 18 | * @author joachim |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 19 | */ |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 20 | public class PoliqarpPlusTree extends Antlr4AbstractSyntaxTree { |
| Michael Hanl | 27e5058 | 2013-12-07 18:04:13 +0000 | [diff] [blame] | 21 | |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 22 | private static Logger log = LoggerFactory.getLogger(PoliqarpPlusTree.class); |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 23 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 24 | /** |
| 25 | * Most centrally, this class maintains a set of nested maps and lists which represent the JSON tree, which is built by the JSON serialiser |
| 26 | * on basis of the {@link #requestMap} at the root of the tree. |
| 27 | * <br/> |
| 28 | * The class further maintains a set of stacks which effectively keep track of which objects to embed in which containing objects. |
| 29 | * |
| 30 | * @param query The syntax tree as returned by ANTLR |
| 31 | * @throws QueryException |
| 32 | */ |
| 33 | public PoliqarpPlusTree(String query) throws QueryException { |
| 34 | process(query); |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 35 | System.out.println(">>> " + requestMap + " <<<"); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 36 | log.info(">>> " + requestMap.get("query") + " <<<"); |
| 37 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 38 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 39 | @Override |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 40 | public void process(String query) throws QueryException { |
| 41 | ParseTree tree; |
| 42 | tree = parsePoliqarpQuery(query); |
| 43 | super.parser = this.parser; |
| 44 | log.info("Processing PoliqarpPlus"); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 45 | processNode(tree); |
| 46 | } |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 47 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 48 | /** |
| 49 | * Recursively calls itself with the children of the currently active node, traversing the tree nodes in a top-down, depth-first fashion. |
| 50 | * A list is maintained that contains all visited nodes |
| Joachim Bingel | bea1ec6 | 2014-07-11 15:00:14 +0000 | [diff] [blame] | 51 | * which have been directly addressed by their (grand-/grand-grand-/...) parent nodes, such that some processing time is saved, as these node will |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 52 | * not be processed. This method is effectively a list of if-statements that are responsible for treating the different node types correctly and filling the |
| 53 | * respective maps/lists. |
| 54 | * |
| 55 | * @param node The currently processed node. The process(String query) method calls this method with the root. |
| 56 | * @throws QueryException |
| 57 | */ |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 58 | @SuppressWarnings("unchecked") |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 59 | private void processNode(ParseTree node) throws QueryException { |
| 60 | // Top-down processing |
| 61 | if (visited.contains(node)) return; |
| 62 | else visited.add(node); |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 63 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 64 | currentNode = node; |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 65 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 66 | String nodeCat = getNodeCat(node); |
| 67 | openNodeCats.push(nodeCat); |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 68 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 69 | stackedObjects = 0; |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 70 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 71 | if (verbose) { |
| 72 | System.err.println(" " + objectStack); |
| 73 | System.out.println(openNodeCats); |
| 74 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 75 | |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 76 | /* |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 77 | **************************************************************** |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 78 | **************************************************************** |
| 79 | * Processing individual node categories * |
| 80 | **************************************************************** |
| 81 | **************************************************************** |
| 82 | */ |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 83 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 84 | if (nodeCat.equals("segment")) { |
| 85 | // Cover possible quantification (i.e. repetition) of segment |
| 86 | ParseTree quantification = getFirstChildWithCat(node, "repetition"); |
| 87 | if (quantification != null) { |
| 88 | LinkedHashMap<String,Object> quantGroup = makeGroup("repetition"); |
| Joachim Bingel | 6163156 | 2014-07-24 14:26:02 +0000 | [diff] [blame] | 89 | Integer[] minmax = parseRepetition(quantification); |
| 90 | quantGroup.put("boundary", makeBoundary(minmax[0], minmax[1])); |
| 91 | if (minmax[0] != null) quantGroup.put("min", minmax[0]); |
| 92 | if (minmax[1] != null) quantGroup.put("max", minmax[1]); |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 93 | announcements.add("Deprecated 2014-07-24: 'min' and 'max' to be " + |
| 94 | "supported until 3 months from deprecation date."); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 95 | putIntoSuperObject(quantGroup); |
| 96 | objectStack.push(quantGroup); |
| 97 | stackedObjects++; |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | if (nodeCat.equals("sequence")) { |
| 102 | LinkedHashMap<String,Object> sequence = makeGroup("sequence"); |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 103 | ParseTree distanceNode = getFirstChildWithCat(node, "distance"); |
| 104 | if (distanceNode!=null) { |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 105 | Integer[] minmax = parseDistance(distanceNode); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 106 | LinkedHashMap<String,Object> distance = makeDistance("w", minmax[0], minmax[1]); |
| 107 | sequence.put("inOrder", true); |
| 108 | ArrayList<Object> distances = new ArrayList<Object>(); |
| 109 | distances.add(distance); |
| 110 | sequence.put("distances", distances); |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 111 | visited.add(distanceNode.getChild(0)); // don't re-visit the emptyTokenSequence node |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 112 | } |
| 113 | putIntoSuperObject(sequence); |
| 114 | objectStack.push(sequence); |
| 115 | stackedObjects++; |
| 116 | } |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 117 | |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 118 | /* |
| 119 | * empty tokens at beginning/end of sequence |
| 120 | */ |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 121 | if (nodeCat.equals("emptyTokenSequence")) { |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 122 | Integer[] minmax = parseEmptySegments(node); |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 123 | // object will be either a repetition group or a single empty token |
| 124 | LinkedHashMap<String,Object> object; |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 125 | LinkedHashMap<String,Object> emptyToken = makeToken(); |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 126 | if (minmax[0] != 1 || minmax[1] == null || minmax[1] != 1) { |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 127 | object = makeRepetition(minmax[0], minmax[1]); |
| 128 | ((ArrayList<Object>) object.get("operands")).add(emptyToken); |
| 129 | } else { |
| 130 | object = emptyToken; |
| 131 | } |
| 132 | putIntoSuperObject(object); |
| 133 | objectStack.push(object); |
| 134 | stackedObjects++; |
| 135 | } |
| 136 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 137 | |
| 138 | if (nodeCat.equals("token")) { |
| 139 | LinkedHashMap<String,Object> token = makeToken(); |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 140 | // handle negation |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 141 | List<ParseTree> negations = getChildrenWithCat(node, "!"); |
| 142 | boolean negated = false; |
| Joachim Bingel | 723ced0 | 2014-07-14 16:17:22 +0000 | [diff] [blame] | 143 | boolean isRegex = false; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 144 | if (negations.size() % 2 == 1) negated = true; |
| 145 | if (getNodeCat(node.getChild(0)).equals("key")) { |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 146 | // no 'term' child, but direct key specification: process here |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 147 | LinkedHashMap<String,Object> term = makeTerm(); |
| 148 | |
| 149 | String key = node.getChild(0).getText(); |
| 150 | if (getNodeCat(node.getChild(0).getChild(0)).equals("regex")) { |
| Joachim Bingel | 723ced0 | 2014-07-14 16:17:22 +0000 | [diff] [blame] | 151 | isRegex = true; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 152 | term.put("type", "type:regex"); |
| 153 | key = key.substring(1,key.length()-1); |
| 154 | } |
| 155 | term.put("layer", "orth"); |
| 156 | term.put("key", key); |
| 157 | String matches = negated ? "ne" : "eq"; |
| 158 | term.put("match", "match:"+matches); |
| 159 | ParseTree flagNode = getFirstChildWithCat(node, "flag"); |
| 160 | if (flagNode != null) { |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 161 | // substring removes leading slash '/' |
| 162 | String flag = getNodeCat(flagNode.getChild(0)).substring(1); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 163 | if (flag.contains("i")) term.put("caseInsensitive", true); |
| 164 | else if (flag.contains("I")) term.put("caseInsensitive", false); |
| 165 | if (flag.contains("x")) { |
| Joachim Bingel | 899bdcf | 2014-07-14 14:55:06 +0000 | [diff] [blame] | 166 | term.put("type", "type:regex"); |
| Joachim Bingel | 723ced0 | 2014-07-14 16:17:22 +0000 | [diff] [blame] | 167 | if (!isRegex) { |
| Joachim Bingel | a83f8cc | 2014-08-05 14:12:59 +0000 | [diff] [blame] | 168 | key = QueryUtils.escapeRegexSpecialChars(key); |
| Joachim Bingel | 723ced0 | 2014-07-14 16:17:22 +0000 | [diff] [blame] | 169 | } |
| 170 | term.put("key", ".*?"+key+".*?"); // overwrite key |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 171 | } |
| 172 | } |
| 173 | token.put("wrap", term); |
| 174 | } else { |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 175 | // child is 'term' or 'termGroup' -> process in extra method |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 176 | LinkedHashMap<String,Object> termOrTermGroup = |
| 177 | parseTermOrTermGroup(node.getChild(1), negated); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 178 | token.put("wrap", termOrTermGroup); |
| 179 | } |
| 180 | putIntoSuperObject(token); |
| 181 | visited.add(node.getChild(0)); |
| 182 | visited.add(node.getChild(2)); |
| 183 | } |
| 184 | |
| 185 | if (nodeCat.equals("alignment")) { |
| 186 | LinkedHashMap<String,Object> aligned = makeGroup("alignment"); |
| 187 | aligned.put("align", "align:left"); |
| 188 | putIntoSuperObject(aligned); |
| 189 | objectStack.push(aligned); |
| 190 | stackedObjects++; |
| 191 | } |
| 192 | |
| 193 | if (nodeCat.equals("span")) { |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 194 | List<ParseTree> negations = getChildrenWithCat(node, "!"); |
| 195 | boolean negated = false; |
| 196 | boolean isRegex = false; |
| 197 | if (negations.size() % 2 == 1) negated = true; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 198 | LinkedHashMap<String,Object> span = makeSpan(); |
| 199 | ParseTree keyNode = getFirstChildWithCat(node, "key"); |
| 200 | ParseTree layerNode = getFirstChildWithCat(node, "layer"); |
| 201 | ParseTree foundryNode = getFirstChildWithCat(node, "foundry"); |
| 202 | ParseTree termOpNode = getFirstChildWithCat(node, "termOp"); |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 203 | ParseTree termNode = getFirstChildWithCat(node, "term"); |
| 204 | ParseTree termGroupNode = getFirstChildWithCat(node, "termGroup"); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 205 | if (foundryNode != null) span.put("foundry", foundryNode.getText()); |
| 206 | if (layerNode != null) { |
| 207 | String layer = layerNode.getText(); |
| 208 | if (layer.equals("base")) layer="lemma"; |
| 209 | span.put("layer", layer); |
| 210 | } |
| 211 | span.put("key", keyNode.getText()); |
| 212 | if (termOpNode != null) { |
| 213 | String termOp = termOpNode.getText(); |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 214 | if (termOp.equals("==")) span.put("match", "match:eq"); |
| 215 | else if (termOp.equals("!=")) span.put("match", "match:ne"); |
| 216 | } |
| 217 | if (termNode != null) { |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 218 | LinkedHashMap<String,Object> termOrTermGroup = |
| 219 | parseTermOrTermGroup(termNode, negated, "span"); |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 220 | span.put("attr", termOrTermGroup); |
| 221 | } |
| 222 | if (termGroupNode != null) { |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 223 | LinkedHashMap<String,Object> termOrTermGroup = |
| 224 | parseTermOrTermGroup(termGroupNode, negated, "span"); |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 225 | span.put("attr", termOrTermGroup); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 226 | } |
| 227 | putIntoSuperObject(span); |
| 228 | objectStack.push(span); |
| 229 | stackedObjects++; |
| 230 | } |
| 231 | |
| 232 | if (nodeCat.equals("disjunction")) { |
| 233 | LinkedHashMap<String,Object> disjunction = makeGroup("or"); |
| 234 | putIntoSuperObject(disjunction); |
| 235 | objectStack.push(disjunction); |
| 236 | stackedObjects++; |
| 237 | } |
| 238 | |
| 239 | if (nodeCat.equals("position")) { |
| Joachim Bingel | ceb7906 | 2014-09-22 11:50:37 +0000 | [diff] [blame] | 240 | LinkedHashMap<String,Object> position = parseFrame(node.getChild(0)); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 241 | putIntoSuperObject(position); |
| 242 | objectStack.push(position); |
| 243 | stackedObjects++; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 244 | } |
| 245 | |
| Joachim Bingel | 23c31ad | 2014-08-11 09:44:46 +0000 | [diff] [blame] | 246 | if (nodeCat.equals("relation")) { |
| 247 | LinkedHashMap<String, Object> relationGroup = makeGroup("relation"); |
| 248 | LinkedHashMap<String, Object> relation = makeRelation(); |
| 249 | relationGroup.put("relation", relation); |
| 250 | if (node.getChild(0).getText().equals("dominates")) { |
| 251 | relation.put("layer", "c"); |
| 252 | } |
| 253 | ParseTree relSpec = getFirstChildWithCat(node, "relSpec"); |
| 254 | ParseTree repetition = getFirstChildWithCat(node, "repetition"); |
| 255 | if (relSpec != null) { |
| 256 | ParseTree foundry = getFirstChildWithCat(relSpec, "foundry"); |
| 257 | ParseTree layer = getFirstChildWithCat(relSpec, "layer"); |
| 258 | ParseTree key = getFirstChildWithCat(relSpec, "key"); |
| 259 | if (foundry != null) relation.put("foundry", foundry.getText()); |
| 260 | if (layer != null) relation.put("layer", layer.getText()); |
| 261 | if (key != null) relation.put("key", key.getText()); |
| 262 | } |
| 263 | if (repetition != null) { |
| 264 | Integer[] minmax = parseRepetition(repetition); |
| 265 | relation.put("boundary", makeBoundary(minmax[0], minmax[1])); |
| 266 | } |
| 267 | putIntoSuperObject(relationGroup); |
| 268 | objectStack.push(relationGroup); |
| 269 | stackedObjects++; |
| 270 | } |
| 271 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 272 | if (nodeCat.equals("spanclass")) { |
| 273 | // Step I: get info |
| 274 | int classId = 0; |
| 275 | if (getNodeCat(node.getChild(1)).equals("spanclass_id")) { |
| 276 | String ref = node.getChild(1).getChild(0).toStringTree(parser); |
| 277 | try { |
| 278 | classId = Integer.parseInt(ref); |
| 279 | } catch (NumberFormatException e) { |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 280 | String msg = "The specified class reference in the " + |
| 281 | "focus/split-Operator is not a number: " + ref; |
| 282 | log.error(msg); |
| 283 | throw new QueryException(msg); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 284 | } |
| 285 | // only allow class id up to 255 |
| 286 | if (classId > 255) { |
| 287 | classId = 0; |
| 288 | } |
| 289 | } |
| 290 | LinkedHashMap<String, Object> classGroup = makeSpanClass(classId); |
| 291 | putIntoSuperObject(classGroup); |
| 292 | objectStack.push(classGroup); |
| 293 | stackedObjects++; |
| 294 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 295 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 296 | if (nodeCat.equals("matching")) { |
| 297 | // Step I: get info |
| 298 | ArrayList<Integer> classRefs = new ArrayList<Integer>(); |
| 299 | String classRefOp = null; |
| 300 | if (getNodeCat(node.getChild(2)).equals("spanclass_id")) { |
| 301 | ParseTree spanNode = node.getChild(2); |
| 302 | for (int i = 0; i < spanNode.getChildCount() - 1; i++) { |
| 303 | String ref = spanNode.getChild(i).getText(); |
| 304 | if (ref.equals("|") || ref.equals("&")) { |
| 305 | classRefOp = ref.equals("|") ? "intersection" : "union"; |
| 306 | } else { |
| 307 | try { |
| 308 | int classRef = Integer.parseInt(ref); |
| 309 | classRefs.add(classRef); |
| 310 | } catch (NumberFormatException e) { |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 311 | String err = "The specified class reference in the " + |
| 312 | "shrink/split-Operator is not a number."; |
| Joachim Bingel | 23c31ad | 2014-08-11 09:44:46 +0000 | [diff] [blame] | 313 | errorMsgs.add(err); |
| 314 | throw new QueryException(err); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 315 | } |
| 316 | } |
| 317 | } |
| 318 | } else { |
| 319 | classRefs.add(0); |
| 320 | } |
| 321 | LinkedHashMap<String, Object> referenceGroup = makeReference(classRefs); |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 322 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 323 | String type = node.getChild(0).toStringTree(parser); |
| Joachim Bingel | 23c31ad | 2014-08-11 09:44:46 +0000 | [diff] [blame] | 324 | // Default is focus(), if deviating catch here |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 325 | if (type.equals("split")) referenceGroup.put("operation", "operation:split"); |
| Joachim Bingel | 899bdcf | 2014-07-14 14:55:06 +0000 | [diff] [blame] | 326 | if (type.equals("submatch") || type.equals("shrink")) { |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 327 | String warning = "Deprecated 2014-07-24: "+type + "() as a match reducer " + |
| 328 | "to a specific class is deprecated in favor of focus() and will " + |
| 329 | "only be supported for 3 months after deprecation date."; |
| Joachim Bingel | 899bdcf | 2014-07-14 14:55:06 +0000 | [diff] [blame] | 330 | log.warn(warning); |
| 331 | requestMap.put("warning", warning); |
| 332 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 333 | if (classRefOp != null) { |
| 334 | referenceGroup.put("classRefOp", "classRefOp:" + classRefOp); |
| 335 | } |
| 336 | ArrayList<Object> referenceOperands = new ArrayList<Object>(); |
| 337 | referenceGroup.put("operands", referenceOperands); |
| 338 | // Step II: decide where to put the group |
| 339 | putIntoSuperObject(referenceGroup); |
| 340 | objectStack.push(referenceGroup); |
| 341 | stackedObjects++; |
| 342 | visited.add(node.getChild(0)); |
| 343 | } |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 344 | |
| Joachim Bingel | 9bbd4fc | 2014-08-11 14:56:48 +0000 | [diff] [blame] | 345 | if (nodeCat.equals("submatch")) { |
| 346 | LinkedHashMap<String,Object> submatch = makeReference(null); |
| 347 | submatch.put("operands", new ArrayList<Object>()); |
| Joachim Bingel | 23c31ad | 2014-08-11 09:44:46 +0000 | [diff] [blame] | 348 | ParseTree startpos = getFirstChildWithCat(node,"startpos"); |
| 349 | ParseTree length = getFirstChildWithCat(node,"length"); |
| 350 | ArrayList<Integer> spanRef = new ArrayList<Integer>(); |
| 351 | spanRef.add(Integer.parseInt(startpos.getText())); |
| 352 | if (length != null) { |
| 353 | spanRef.add(Integer.parseInt(length.getText())); |
| 354 | } |
| Joachim Bingel | 9bbd4fc | 2014-08-11 14:56:48 +0000 | [diff] [blame] | 355 | submatch.put("spanRef", spanRef); |
| 356 | putIntoSuperObject(submatch); |
| 357 | objectStack.push(submatch); |
| Joachim Bingel | 23c31ad | 2014-08-11 09:44:46 +0000 | [diff] [blame] | 358 | stackedObjects++; |
| 359 | visited.add(node.getChild(0)); |
| 360 | } |
| 361 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 362 | if (nodeCat.equals("meta")) { |
| 363 | LinkedHashMap<String, Object> metaFilter = new LinkedHashMap<String, Object>(); |
| 364 | requestMap.put("meta", metaFilter); |
| 365 | metaFilter.put("@type", "korap:meta"); |
| 366 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 367 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 368 | if (nodeCat.equals("within") && !getNodeCat(node.getParent()).equals("position")) { |
| 369 | ParseTree domainNode = node.getChild(2); |
| 370 | String domain = getNodeCat(domainNode); |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 371 | LinkedHashMap<String, Object> curObject = |
| 372 | (LinkedHashMap<String, Object>) objectStack.getFirst(); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 373 | curObject.put("within", domain); |
| 374 | visited.add(node.getChild(0)); |
| 375 | visited.add(node.getChild(1)); |
| 376 | visited.add(domainNode); |
| 377 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 378 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 379 | objectsToPop.push(stackedObjects); |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 380 | |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 381 | /* |
| 382 | **************************************************************** |
| 383 | **************************************************************** |
| 384 | * recursion until 'request' node (root of tree) is processed * |
| Joachim Bingel | 7fd4b1b | 2013-12-04 09:04:40 +0000 | [diff] [blame] | 385 | **************************************************************** |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 386 | **************************************************************** |
| 387 | */ |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 388 | for (int i = 0; i < node.getChildCount(); i++) { |
| 389 | ParseTree child = node.getChild(i); |
| 390 | processNode(child); |
| 391 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 392 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 393 | // Stuff that happens when leaving a node (taking items off the stacks) |
| 394 | for (int i = 0; i < objectsToPop.get(0); i++) { |
| 395 | objectStack.pop(); |
| 396 | } |
| 397 | objectsToPop.pop(); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 398 | openNodeCats.pop(); |
| 399 | } |
| 400 | |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 401 | /** |
| 402 | * Parses a repetition node |
| 403 | * @param node |
| 404 | * @return A two-element array, of which the first element is an int representing |
| 405 | * the minimal number of repetitions of the quantified element, and the second |
| 406 | * element representing the maximal number of repetitions |
| 407 | */ |
| Joachim Bingel | 6163156 | 2014-07-24 14:26:02 +0000 | [diff] [blame] | 408 | private Integer[] parseRepetition(ParseTree node) { |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 409 | Integer min = 0, max = 0; |
| 410 | boolean maxInfinite = false; |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 411 | // (repetition) node can be of two types: 'kleene' or 'range' |
| 412 | ParseTree repetitionTypeNode = node.getChild(0); |
| 413 | String repetitionType = getNodeCat(repetitionTypeNode); |
| 414 | if (repetitionType.equals("kleene")) { |
| 415 | // kleene operators (+ and *) as well as optionality (?) |
| 416 | String kleeneOp = repetitionTypeNode.getText(); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 417 | if (kleeneOp.equals("*")) { |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 418 | maxInfinite = true; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 419 | } else if (kleeneOp.equals("+")) { |
| 420 | min = 1; |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 421 | maxInfinite = true; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 422 | } if (kleeneOp.equals("?")) { |
| 423 | max = 1; |
| 424 | } |
| 425 | } else { |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 426 | // Range node of form "{ min , max }" or "{ max }" or "{ , max }" or "{ min , }" |
| 427 | ParseTree minNode = getFirstChildWithCat(repetitionTypeNode, "min"); |
| 428 | ParseTree maxNode = getFirstChildWithCat(repetitionTypeNode, "max"); |
| 429 | if (maxNode!=null) max = Integer.parseInt(maxNode.getText()); |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 430 | else maxInfinite = true; |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 431 | // min is optional: if not specified, min = max |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 432 | if (minNode!=null) min = Integer.parseInt(minNode.getText()); |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 433 | else if (hasChild(repetitionTypeNode, ",")) min = 0; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 434 | else min = max; |
| 435 | } |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 436 | if (maxInfinite) { |
| 437 | max = null; |
| 438 | } |
| Joachim Bingel | 6163156 | 2014-07-24 14:26:02 +0000 | [diff] [blame] | 439 | return new Integer[]{min,max}; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 440 | } |
| 441 | |
| Joachim Bingel | ceb7906 | 2014-09-22 11:50:37 +0000 | [diff] [blame] | 442 | private LinkedHashMap<String,Object> parseFrame(ParseTree node) { |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 443 | String operator = node.toStringTree(parser).toLowerCase(); |
| Joachim Bingel | ceb7906 | 2014-09-22 11:50:37 +0000 | [diff] [blame] | 444 | String[] frames = new String[]{""}; |
| 445 | String[] sharedClasses = new String[]{"includes"}; |
| 446 | switch (operator) { |
| 447 | case "contains": |
| 448 | frames = new String[]{}; |
| 449 | break; |
| 450 | case "matches": |
| 451 | frames = new String[]{"matches"}; |
| 452 | break; |
| 453 | case "startswith": |
| 454 | frames = new String[]{"startswith"}; |
| 455 | break; |
| 456 | case "endswith": |
| 457 | frames = new String[]{"endswith"}; |
| 458 | break; |
| 459 | case "overlaps": |
| 460 | frames = new String[]{"overlapsLeft","overlapsRight"}; |
| 461 | sharedClasses = new String[]{"intersects"}; |
| 462 | break; |
| 463 | } |
| 464 | return makePosition(frames,sharedClasses); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 465 | } |
| 466 | |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 467 | |
| 468 | private LinkedHashMap<String, Object> parseTermOrTermGroup( |
| 469 | ParseTree node, boolean negated) { |
| 470 | return parseTermOrTermGroup(node, negated, "token"); |
| 471 | } |
| 472 | |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 473 | /** |
| 474 | * Parses a (term) or (termGroup) node |
| 475 | * @param node |
| 476 | * @param negatedGlobal Indicates whether the term/termGroup is globally negated, e.g. through a negation |
| 477 | * operator preceding the related token like "![base=foo]". Global negation affects the term's "match" parameter. |
| 478 | * @return A term or termGroup object, depending on input |
| 479 | */ |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 480 | @SuppressWarnings("unchecked") |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 481 | private LinkedHashMap<String, Object> parseTermOrTermGroup(ParseTree node, boolean negatedGlobal, String mode) { |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 482 | if (getNodeCat(node).equals("term")) { |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 483 | String key = null; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 484 | LinkedHashMap<String,Object> term = makeTerm(); |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 485 | // handle negation |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 486 | boolean negated = negatedGlobal; |
| Joachim Bingel | 723ced0 | 2014-07-14 16:17:22 +0000 | [diff] [blame] | 487 | boolean isRegex = false; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 488 | List<ParseTree> negations = getChildrenWithCat(node, "!"); |
| 489 | if (negations.size() % 2 == 1) negated = !negated; |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 490 | // retrieve possible nodes |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 491 | ParseTree keyNode = getFirstChildWithCat(node, "key"); |
| Joachim Bingel | 998954a | 2014-07-14 15:58:34 +0000 | [diff] [blame] | 492 | ParseTree valueNode = getFirstChildWithCat(node, "value"); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 493 | ParseTree layerNode = getFirstChildWithCat(node, "layer"); |
| 494 | ParseTree foundryNode = getFirstChildWithCat(node, "foundry"); |
| 495 | ParseTree termOpNode = getFirstChildWithCat(node, "termOp"); |
| 496 | ParseTree flagNode = getFirstChildWithCat(node, "flag"); |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 497 | // process foundry |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 498 | if (foundryNode != null) term.put("foundry", foundryNode.getText()); |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 499 | // process layer: map "base" -> "lemma" |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 500 | if (layerNode != null) { |
| 501 | String layer = layerNode.getText(); |
| 502 | if (layer.equals("base")) layer="lemma"; |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 503 | if (mode.equals("span")) term.put("key", layer); |
| 504 | else term.put("layer", layer); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 505 | } |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 506 | // process key: 'normal' or regex? |
| 507 | key = keyNode.getText(); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 508 | if (getNodeCat(keyNode.getChild(0)).equals("regex")) { |
| Joachim Bingel | 723ced0 | 2014-07-14 16:17:22 +0000 | [diff] [blame] | 509 | isRegex = true; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 510 | term.put("type", "type:regex"); |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 511 | key = key.substring(1, key.length()-1); // remove leading and trailing quotes |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 512 | } |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 513 | if (mode.equals("span")) term.put("value", key); |
| 514 | else term.put("key", key); |
| Joachim Bingel | 998954a | 2014-07-14 15:58:34 +0000 | [diff] [blame] | 515 | // process value |
| 516 | if (valueNode != null) term.put("value", valueNode.getText()); |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 517 | // process operator ("match" property) |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 518 | if (termOpNode != null) { |
| 519 | String termOp = termOpNode.getText(); |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 520 | negated = termOp.contains("!") ? !negated : negated; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 521 | if (!negated) term.put("match", "match:eq"); |
| 522 | else term.put("match", "match:ne"); |
| 523 | } |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 524 | // process possible flags |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 525 | if (flagNode != null) { |
| 526 | String flag = getNodeCat(flagNode.getChild(0)).substring(1); //substring removes leading slash '/' |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 527 | if (flag.contains("i")) term.put("caseInsensitive", true); |
| 528 | else if (flag.contains("I")) term.put("caseInsensitive", false); |
| 529 | if (flag.contains("x")) { |
| Joachim Bingel | 723ced0 | 2014-07-14 16:17:22 +0000 | [diff] [blame] | 530 | if (!isRegex) { |
| Joachim Bingel | a83f8cc | 2014-08-05 14:12:59 +0000 | [diff] [blame] | 531 | key = QueryUtils.escapeRegexSpecialChars(key); |
| Joachim Bingel | 723ced0 | 2014-07-14 16:17:22 +0000 | [diff] [blame] | 532 | } |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 533 | term.put("key", ".*?"+key+".*?"); // flag 'x' allows submatches: overwrite key with appended .*? |
| Joachim Bingel | 723ced0 | 2014-07-14 16:17:22 +0000 | [diff] [blame] | 534 | term.put("type", "type:regex"); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 535 | } |
| 536 | } |
| 537 | return term; |
| 538 | } else { |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 539 | // For termGroups, establish a boolean relation between operands and recursively call this function with |
| 540 | // the term or termGroup operands |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 541 | LinkedHashMap<String,Object> termGroup = null; |
| 542 | ParseTree leftOp = null; |
| 543 | ParseTree rightOp = null; |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 544 | // check for leading/trailing parantheses |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 545 | if (!getNodeCat(node.getChild(0)).equals("(")) leftOp = node.getChild(0); |
| 546 | else leftOp = node.getChild(1); |
| 547 | if (!getNodeCat(node.getChild(node.getChildCount()-1)).equals(")")) rightOp = node.getChild(node.getChildCount()-1); |
| 548 | else rightOp = node.getChild(node.getChildCount()-2); |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 549 | // establish boolean relation |
| Joachim Bingel | 23c31ad | 2014-08-11 09:44:46 +0000 | [diff] [blame] | 550 | ParseTree boolOp = getFirstChildWithCat(node, "boolOp"); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 551 | String operator = boolOp.getText().equals("&") ? "and" : "or"; |
| 552 | termGroup = makeTermGroup(operator); |
| 553 | ArrayList<Object> operands = (ArrayList<Object>) termGroup.get("operands"); |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 554 | // recursion with left/right operands |
| Joachim Bingel | 3a41a44 | 2014-07-22 12:16:16 +0000 | [diff] [blame] | 555 | operands.add(parseTermOrTermGroup(leftOp, negatedGlobal, mode)); |
| 556 | operands.add(parseTermOrTermGroup(rightOp, negatedGlobal, mode)); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 557 | return termGroup; |
| 558 | } |
| 559 | } |
| 560 | |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 561 | /** |
| 562 | * Puts an object into the operands list of its governing (or "super") object which had been placed on the |
| 563 | * {@link #objectStack} before and is still on top of the stack. If this is the top object of the tree, it is put there |
| 564 | * instead of into some (non-existent) operand stack. |
| 565 | * @param object The object to be inserted |
| 566 | */ |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 567 | private void putIntoSuperObject(LinkedHashMap<String, Object> object) { |
| Joachim Bingel | 33bd45f | 2014-06-25 15:00:54 +0000 | [diff] [blame] | 568 | putIntoSuperObject(object, 0); |
| 569 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 570 | |
| Joachim Bingel | b2aa1c7 | 2014-07-01 12:48:46 +0000 | [diff] [blame] | 571 | /** |
| 572 | * Puts an object into the operands list of its governing (or "super") object which had been placed on the |
| 573 | * {@link #objectStack} before. If this is the top object of the tree, it is put there |
| 574 | * instead of into some (non-existent) operand stack. |
| 575 | * @param object The object to be inserted |
| 576 | * @param objStackPosition Indicated the position of the super object on the {@link #objectStack} (in case not the top |
| 577 | * element of the stack is the super object. |
| 578 | */ |
| Joachim Bingel | 33bd45f | 2014-06-25 15:00:54 +0000 | [diff] [blame] | 579 | @SuppressWarnings({ "unchecked" }) |
| 580 | private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) { |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 581 | if (objectStack.size()>objStackPosition) { |
| Joachim Bingel | 33bd45f | 2014-06-25 15:00:54 +0000 | [diff] [blame] | 582 | ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands"); |
| 583 | topObjectOperands.add(object); |
| Joachim Bingel | 33bd45f | 2014-06-25 15:00:54 +0000 | [diff] [blame] | 584 | } else { |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 585 | requestMap.put("query", object); |
| Joachim Bingel | 33bd45f | 2014-06-25 15:00:54 +0000 | [diff] [blame] | 586 | } |
| 587 | } |
| 588 | |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 589 | /** |
| 590 | * Basically only increases the min and max counters as required by Poliqarp |
| 591 | * @param distanceNode |
| 592 | * @return |
| 593 | */ |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 594 | private Integer[] parseDistance(ParseTree distanceNode) { |
| 595 | Integer[] minmax = parseEmptySegments(distanceNode.getChild(0)); |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 596 | Integer min = minmax[0]; |
| 597 | Integer max = minmax[1]; |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 598 | min++; |
| 599 | if (max != null) max++; |
| 600 | // min = cropToMaxValue(min); |
| 601 | // max = cropToMaxValue(max); |
| 602 | return new Integer[]{min, max}; |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 603 | } |
| 604 | |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 605 | private Integer[] parseEmptySegments(ParseTree emptySegments) { |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 606 | Integer min = 0; |
| 607 | Integer max = 0; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 608 | ParseTree child; |
| 609 | for (int i = 0; i < emptySegments.getChildCount(); i++) { |
| 610 | child = emptySegments.getChild(i); |
| 611 | ParseTree nextSibling = emptySegments.getChild(i + 1); |
| 612 | if (child.toStringTree(parser).equals("(emptyToken [ ])")) { |
| 613 | if (nextSibling != null && getNodeCat(nextSibling).equals("repetition")) { |
| Joachim Bingel | 6163156 | 2014-07-24 14:26:02 +0000 | [diff] [blame] | 614 | Integer[] minmax = parseRepetition(nextSibling); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 615 | min += minmax[0]; |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 616 | if (minmax[1] != null) { |
| 617 | max += minmax[1]; |
| 618 | } else { |
| 619 | max = null; |
| 620 | } |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 621 | } else { |
| 622 | min++; |
| 623 | max++; |
| 624 | } |
| Joachim Bingel | bef2362 | 2014-04-29 15:04:02 +0000 | [diff] [blame] | 625 | } |
| Joachim Bingel | bef2362 | 2014-04-29 15:04:02 +0000 | [diff] [blame] | 626 | } |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 627 | // min = cropToMaxValue(min); |
| 628 | // max = cropToMaxValue(max); |
| 629 | return new Integer[]{min, max}; |
| Joachim Bingel | bef2362 | 2014-04-29 15:04:02 +0000 | [diff] [blame] | 630 | } |
| Joachim Bingel | 94a1ccd | 2013-12-10 10:37:29 +0000 | [diff] [blame] | 631 | |
| Joachim Bingel | 14239d8 | 2014-07-22 09:55:04 +0000 | [diff] [blame] | 632 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 633 | private ParserRuleContext parsePoliqarpQuery(String p) throws QueryException { |
| 634 | checkUnbalancedPars(p); |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 635 | Lexer poliqarpLexer = new PoliqarpPlusLexer((CharStream) null); |
| 636 | ParserRuleContext tree = null; |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 637 | // Like p. 111 |
| 638 | try { |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 639 | // Tokenize input data |
| 640 | ANTLRInputStream input = new ANTLRInputStream(p); |
| 641 | poliqarpLexer.setInputStream(input); |
| 642 | CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer); |
| 643 | parser = new PoliqarpPlusParser(tokens); |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 644 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 645 | // Don't throw out erroneous stuff |
| 646 | parser.setErrorHandler(new BailErrorStrategy()); |
| 647 | parser.removeErrorListeners(); |
| Joachim Bingel | 4b405f5 | 2013-11-15 15:29:30 +0000 | [diff] [blame] | 648 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 649 | // Get starting rule from parser |
| 650 | Method startRule = PoliqarpPlusParser.class.getMethod("request"); |
| 651 | tree = (ParserRuleContext) startRule.invoke(parser, (Object[]) null); |
| 652 | log.debug(tree.toStringTree(parser)); |
| 653 | } |
| 654 | // Some things went wrong ... |
| 655 | catch (Exception e) { |
| 656 | log.error("Could not parse query. Please make sure it is well-formed.");; |
| 657 | log.error("Underlying error is: "+e.getMessage()); |
| 658 | System.err.println(e.getMessage()); |
| 659 | } |
| Michael Hanl | d8116e5 | 2014-04-25 20:31:29 +0000 | [diff] [blame] | 660 | |
| Joachim Bingel | 0900a89 | 2014-06-30 16:26:21 +0000 | [diff] [blame] | 661 | if (tree == null) { |
| 662 | throw new QueryException("The query you specified could not be processed. Please make sure it is well-formed."); |
| 663 | } |
| 664 | // Return the generated tree |
| 665 | return tree; |
| 666 | } |
| Nils Diewald | 4128a92 | 2014-07-18 14:39:24 +0000 | [diff] [blame] | 667 | } |