| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.serialize; |
| 2 | |
| 3 | import java.lang.reflect.Method; |
| 4 | import java.util.ArrayList; |
| Joachim Bingel | 7ee0786 | 2014-04-28 15:22:41 +0000 | [diff] [blame] | 5 | import java.util.Arrays; |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 6 | import java.util.HashMap; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 7 | import java.util.LinkedHashMap; |
| 8 | import java.util.LinkedList; |
| 9 | import java.util.List; |
| 10 | import java.util.Map; |
| Joachim Bingel | fb9d5fd | 2014-06-25 09:32:43 +0000 | [diff] [blame] | 11 | import java.util.NoSuchElementException; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 12 | |
| 13 | import org.antlr.v4.runtime.ANTLRInputStream; |
| 14 | import org.antlr.v4.runtime.BailErrorStrategy; |
| 15 | import org.antlr.v4.runtime.CharStream; |
| 16 | import org.antlr.v4.runtime.CommonTokenStream; |
| 17 | import org.antlr.v4.runtime.Lexer; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 18 | import org.antlr.v4.runtime.ParserRuleContext; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 19 | import org.antlr.v4.runtime.tree.ParseTree; |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 20 | import org.slf4j.LoggerFactory; |
| Joachim Bingel | c63f781 | 2014-07-30 09:12:25 +0000 | [diff] [blame] | 21 | import org.slf4j.Logger; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 22 | |
| Joachim Bingel | 6003b85 | 2014-12-18 14:20:55 +0000 | [diff] [blame] | 23 | import de.ids_mannheim.korap.query.parse.annis.AqlLexer; |
| 24 | import de.ids_mannheim.korap.query.parse.annis.AqlParser; |
| Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame] | 25 | import de.ids_mannheim.korap.query.serialize.util.Antlr4DescriptiveErrorListener; |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 26 | import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator; |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 27 | import de.ids_mannheim.korap.query.serialize.util.StatusCodes; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 28 | |
| 29 | /** |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 30 | * Map representation of ANNIS QL syntax tree as returned by ANTLR |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 31 | * @author Joachim Bingel (bingel@ids-mannheim.de) |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 32 | * |
| 33 | */ |
| Joachim Bingel | 1faf8a5 | 2015-01-09 13:17:34 +0000 | [diff] [blame] | 34 | public class AnnisQueryProcessor extends Antlr4AbstractQueryProcessor { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 35 | private static Logger log = LoggerFactory.getLogger(AnnisQueryProcessor.class); |
| 36 | /** |
| 37 | * Flag that indicates whether token fields or meta fields are currently being processed |
| 38 | */ |
| 39 | boolean inMeta = false; |
| 40 | /** |
| 41 | * Keeps track of operands that are to be integrated into yet uncreated objects. |
| 42 | */ |
| 43 | LinkedList<LinkedHashMap<String,Object>> operandStack = new LinkedList<LinkedHashMap<String,Object>>(); |
| 44 | /** |
| 45 | * Keeps track of explicitly (by #-var definition) or implicitly (number as reference) introduced entities (for later reference by #-operator) |
| 46 | */ |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 47 | Map<String, LinkedHashMap<String,Object>> nodeVariables = new LinkedHashMap<String, LinkedHashMap<String,Object>>(); |
| 48 | /** |
| 49 | * Keeps track of explicitly (by #-var definition) or implicitly (number as reference) introduced entities (for later reference by #-operator) |
| 50 | */ |
| 51 | Map<ParseTree, String> nodes2refs= new LinkedHashMap<ParseTree, String>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 52 | /** |
| 53 | * Counter for variable definitions. |
| 54 | */ |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 55 | Integer variableCount = 1; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 56 | /** |
| 57 | * Marks the currently active token in order to know where to add flags (might already have been taken away from token stack). |
| 58 | */ |
| 59 | LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>(); |
| 60 | /** |
| 61 | * Keeps track of operands lists that are to be serialised in an inverted |
| 62 | * order (e.g. the IN() operator) compared to their AST representation. |
| 63 | */ |
| 64 | private LinkedList<ArrayList<Object>> invertedOperandsLists = new LinkedList<ArrayList<Object>>(); |
| 65 | /** |
| 66 | * Keeps track of operation:class numbers. |
| 67 | */ |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 68 | int classCounter = 1; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 69 | /** |
| 70 | * Keeps track of numers of relations processed (important when dealing with multiple predications). |
| 71 | */ |
| 72 | int relationCounter = 0; |
| 73 | /** |
| 74 | * Keeps track of references to nodes that are operands of groups (e.g. tree relations). Those nodes appear on the top level of the parse tree |
| 75 | * but are to be integrated into the AqlTree at a later point (namely as operands of the respective group). Therefore, store references to these |
| 76 | * nodes here and exclude the operands from being written into the query map individually. |
| 77 | */ |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 78 | private int totalRelationCount = 0; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 79 | /** |
| 80 | * Keeps a record of reference-class-mapping, i.e. which 'class' has been assigned to which #n reference. This is important when introducing korap:reference |
| 81 | * spans to refer back to previously established classes for entities. |
| 82 | */ |
| 83 | private LinkedHashMap<String, Integer> refClassMapping = new LinkedHashMap<String, Integer>(); |
| 84 | /** |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 85 | * Keeps a record of unary relations on spans/tokens. |
| 86 | */ |
| Joachim Bingel | ef0b5b0 | 2015-01-30 09:37:43 +0000 | [diff] [blame] | 87 | private LinkedHashMap<String, ArrayList<ParseTree>> unaryRelations = new LinkedHashMap<String, ArrayList<ParseTree>>(); |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 88 | /** |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 89 | * Keeps track of the number of references to a node/token by means of #n. E.g. in the query |
| 90 | * <tt>tok="x" & tok="y" & tok="z" & #1 . #2 & #2 . #3</tt>, the 2nd token ("y") is referenced twice, the others once. |
| 91 | */ |
| 92 | private LinkedHashMap<String, Integer> nodeReferencesTotal = new LinkedHashMap<String, Integer>(); |
| 93 | /** |
| 94 | * Keeps track of the number of references to a node/token that have already been processed. |
| 95 | */ |
| 96 | private LinkedHashMap<String, Integer> nodeReferencesProcessed = new LinkedHashMap<String, Integer>(); |
| 97 | /** |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 98 | * Keeps track of queued relations. Relations sometimes cannot be processed directly, namely in case it does not share |
| 99 | * any operands with the previous relation. Then wait until a relation with a shared operand has been processed. |
| 100 | */ |
| 101 | private LinkedList<ParseTree> queuedRelations = new LinkedList<ParseTree>(); |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 102 | /** |
| 103 | * For some objects, it may be decided in the initial scan (processAndTopExpr()) that they |
| 104 | * need to be wrapped in a class operation when retrieved later. This map stores this information. |
| 105 | * More precisely, it stores for every node in the tree which class ID its derived Koral |
| 106 | * object will receive. |
| 107 | */ |
| 108 | private LinkedHashMap<ParseTree, Integer> objectsToWrapInClass = new LinkedHashMap<ParseTree, Integer>(); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 109 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 110 | public AnnisQueryProcessor(String query) { |
| 111 | KoralObjectGenerator.setQueryProcessor(this); |
| 112 | process(query); |
| 113 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 114 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 115 | @Override |
| 116 | public void process(String query) { |
| 117 | ParseTree tree = parseAnnisQuery(query); |
| 118 | if (this.parser != null) { |
| 119 | super.parser = this.parser; |
| 120 | } else { |
| 121 | throw new NullPointerException("Parser has not been instantiated!"); |
| 122 | } |
| 123 | log.info("Processing Annis query: "+query); |
| 124 | if (tree != null) { |
| 125 | log.debug("ANTLR parse tree: "+tree.toStringTree(parser)); |
| 126 | processNode(tree); |
| 127 | // Last check to see if all relations have left the queue |
| 128 | if (!queuedRelations.isEmpty()) { |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 129 | ParseTree queued = queuedRelations.pop(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 130 | if (verbose) System.out.println("Taking off queue (last rel): "+ queued.getText()); |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 131 | if (checkOperandsProcessedPreviously(queued)) { |
| 132 | processNode(queued); |
| 133 | } else { |
| 134 | addError(StatusCodes.UNBOUND_ANNIS_RELATION, "The relation "+queued.getText() |
| 135 | +" is not bound to any other relations."); |
| 136 | requestMap.put("query", new LinkedHashMap<String, Object>()); |
| 137 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 138 | } |
| 139 | } |
| 140 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 141 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 142 | private void processNode(ParseTree node) { |
| 143 | String nodeCat = getNodeCat(node); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 144 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 145 | // Top-down processing |
| 146 | if (visited.contains(node)) return; |
| 147 | openNodeCats.push(nodeCat); |
| 148 | stackedObjects = 0; |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 149 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 150 | // Before doing anything else, check if any relations are queued |
| 151 | // and need to be processed first |
| 152 | if (nodeCat.equals("n_ary_linguistic_term")) { |
| 153 | if (!queuedRelations.isEmpty()) { |
| 154 | ParseTree queued = queuedRelations.getFirst(); |
| 155 | if (checkOperandsProcessedPreviously(queued)) { |
| 156 | if (verbose) System.out.println("Taking off queue: "+ |
| 157 | queued.getText()); |
| 158 | queuedRelations.removeFirst(); |
| 159 | processNode(queued); |
| 160 | } |
| 161 | } |
| 162 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 163 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 164 | if (verbose) { |
| 165 | System.err.println(" "+objectStack); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 166 | System.out.println(openNodeCats); |
| 167 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 168 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 169 | /* |
| 170 | **************************************************************** |
| 171 | **************************************************************** |
| 172 | * Processing individual node categories * |
| 173 | **************************************************************** |
| 174 | **************************************************************** |
| 175 | */ |
| 176 | if (nodeCat.equals("exprTop")) { |
| 177 | processExprTop(node); |
| 178 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 179 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 180 | if (nodeCat.equals("andTopExpr")) { |
| 181 | processAndTopExpr(node); |
| 182 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 183 | |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 184 | // if (nodeCat.equals("unary_linguistic_term")) { |
| 185 | // processUnary_linguistic_term(node); |
| 186 | // } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 187 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 188 | if (nodeCat.equals("n_ary_linguistic_term")) { |
| 189 | processN_ary_linguistic_term(node); |
| 190 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 191 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 192 | if (nodeCat.equals("variableExpr")) { |
| 193 | processVariableExpr(node); |
| 194 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 195 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 196 | objectsToPop.push(stackedObjects); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 197 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 198 | /* |
| 199 | **************************************************************** |
| 200 | **************************************************************** |
| 201 | * recursion until 'request' node (root of tree) is processed * |
| 202 | **************************************************************** |
| 203 | **************************************************************** |
| 204 | */ |
| 205 | for (int i=0; i<node.getChildCount(); i++) { |
| 206 | ParseTree child = node.getChild(i); |
| 207 | processNode(child); |
| 208 | } |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 209 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 210 | /* |
| 211 | ************************************************************** |
| 212 | * Stuff that happens after processing the children of a node * |
| 213 | ************************************************************** |
| 214 | */ |
| 215 | if (!objectsToPop.isEmpty()) { |
| 216 | for (int i=0; i<objectsToPop.pop(); i++) { |
| 217 | objectStack.pop(); |
| 218 | } |
| 219 | } |
| 220 | openNodeCats.pop(); |
| 221 | } |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 222 | |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 223 | |
| 224 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 225 | private void processAndTopExpr(ParseTree node) { |
| 226 | // Before processing any child expr node, check if it has one or more "*ary_linguistic_term" nodes. |
| 227 | // Those nodes may use references to earlier established operand nodes. |
| 228 | // Those operand nodes are not to be included into the query map individually but |
| 229 | // naturally as operands of the relations/groups introduced by the |
| 230 | // *node. For that purpose, this section mines all used references |
| 231 | // and stores them in a list for later reference. |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 232 | for (ParseTree unaryTermNode : getDescendantsWithCat(node, "unary_linguistic_term")) { |
| 233 | String ref = getNodeCat(unaryTermNode.getChild(0)).substring(1); |
| Joachim Bingel | ef0b5b0 | 2015-01-30 09:37:43 +0000 | [diff] [blame] | 234 | ArrayList<ParseTree> unaryTermsForRef = unaryRelations.get(ref); |
| 235 | if (unaryTermsForRef == null) unaryTermsForRef = new ArrayList<ParseTree>(); |
| 236 | unaryTermsForRef.add(unaryTermNode); |
| 237 | unaryRelations.put(ref, unaryTermsForRef); |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 238 | } |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 239 | for (ParseTree lingTermNode : getDescendantsWithCat(node, "n_ary_linguistic_term")) { |
| 240 | for (ParseTree refOrNode : getChildrenWithCat(lingTermNode, "refOrNode")) { |
| 241 | String refOrNodeString = refOrNode.getChild(0).toStringTree(parser); |
| 242 | if (refOrNodeString.startsWith("#")) { |
| 243 | String ref = refOrNode.getChild(0).toStringTree(parser).substring(1); |
| 244 | if (nodeReferencesTotal.containsKey(ref)) { |
| 245 | nodeReferencesTotal.put(ref, nodeReferencesTotal.get(ref)+1); |
| 246 | } else { |
| 247 | nodeReferencesTotal.put(ref, 1); |
| 248 | nodeReferencesProcessed.put(ref, 0); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 249 | } |
| 250 | } |
| 251 | } |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 252 | totalRelationCount++; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 253 | } |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 254 | // Then, mine all object definitions. |
| 255 | for (ParseTree variableExprNode : getDescendantsWithCat(node, "variableExpr")) { |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 256 | String ref; |
| 257 | // might be a ref label rather than a counting number |
| 258 | ParseTree varDef = getFirstChildWithCat(variableExprNode.getParent(), "varDef"); |
| 259 | if (varDef != null) { |
| 260 | ref = varDef.getText().replaceFirst("#", ""); // remove trailing # |
| 261 | } else { |
| 262 | ref = variableCount.toString(); |
| 263 | } |
| 264 | nodes2refs.put(variableExprNode, ref); |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 265 | LinkedHashMap<String,Object> object = processVariableExpr(variableExprNode); |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 266 | nodeVariables.put(ref, object); |
| 267 | variableCount++; |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 268 | // Check if this object definition is part of a "direct declaration relation", |
| 269 | // i.e. a relation which declares its operands directly rather than using |
| 270 | // references to earlier declared objects. These objects must still be |
| 271 | // available for later reference, handle this here. |
| 272 | // Direct declaration relation is present when grandparent is n_ary_linguistic_term node. |
| 273 | if (getNodeCat(variableExprNode.getParent().getParent()).equals("n_ary_linguistic_term")) { |
| 274 | if (nodeReferencesTotal.containsKey(ref)) { |
| 275 | nodeReferencesTotal.put(ref, nodeReferencesTotal.get(ref)+1); |
| 276 | } else { |
| 277 | nodeReferencesTotal.put(ref, 1); |
| 278 | } |
| 279 | // This is important for later relations wrapping the present relation. |
| 280 | // If the object isn't registered as processed, it won't be available |
| 281 | // for referencing. |
| 282 | nodeReferencesProcessed.put(ref, 1); |
| 283 | // Register this node for latter wrapping in class. |
| 284 | if (nodeReferencesTotal.get(ref) > 1) { |
| 285 | refClassMapping.put(ref, classCounter); |
| 286 | objectsToWrapInClass.put(variableExprNode, classCounter++); |
| 287 | } |
| 288 | } |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 289 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 290 | } |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 291 | // |
| 292 | // private void processUnary_linguistic_term(ParseTree node) { |
| 293 | // LinkedHashMap<String, Object> unaryOperator = parseUnaryOperator(node); |
| 294 | // String reference = node.getChild(0).toStringTree(parser).substring(1); |
| 295 | // LinkedHashMap<String, Object> object = nodeVariables.get(reference); |
| 296 | // object.putAll(unaryOperator); |
| 297 | // } |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 298 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 299 | private void processExprTop(ParseTree node) { |
| 300 | List<ParseTree> andTopExprs = getChildrenWithCat(node, "andTopExpr"); |
| 301 | if (andTopExprs.size() > 1) { |
| 302 | LinkedHashMap<String, Object> topOr = KoralObjectGenerator.makeGroup("or"); |
| 303 | requestMap.put("query", topOr); |
| 304 | objectStack.push(topOr); |
| 305 | } |
| 306 | } |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 307 | |
| Joachim Bingel | ef0b5b0 | 2015-01-30 09:37:43 +0000 | [diff] [blame] | 308 | @SuppressWarnings("unchecked") |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 309 | private LinkedHashMap<String, Object> processVariableExpr(ParseTree node) { |
| 310 | // simplex word or complex assignment (like qname = textSpec)? |
| 311 | String firstChildNodeCat = getNodeCat(node.getChild(0)); |
| 312 | LinkedHashMap<String, Object> object = null; |
| 313 | if (firstChildNodeCat.equals("node")) { |
| 314 | object = KoralObjectGenerator.makeSpan(); |
| 315 | } else if (firstChildNodeCat.equals("tok")) { |
| 316 | object = KoralObjectGenerator.makeToken(); |
| 317 | if (node.getChildCount() > 1) { // empty tokens do not wrap a term |
| 318 | LinkedHashMap<String, Object> term = KoralObjectGenerator.makeTerm(); |
| 319 | term.put("layer", "orth"); |
| 320 | object.put("wrap", term); |
| 321 | } |
| 322 | } else if (firstChildNodeCat.equals("qName")) { // only (foundry/)?layer specified |
| 323 | // may be token or span, depending on indicated layer! (e.g. cnx/cat=NP vs mate/pos=NN) |
| 324 | // TODO generalize the list below -> look up layers associated with tokens rather than spans somewhere |
| 325 | HashMap<String, Object> qNameParse = parseQNameNode(node.getChild(0)); |
| 326 | if (Arrays.asList(new String[]{"p", "lemma", "m", "orth"}).contains(qNameParse.get("layer"))) { |
| 327 | object = KoralObjectGenerator.makeToken(); |
| 328 | LinkedHashMap<String, Object> term = KoralObjectGenerator.makeTerm(); |
| 329 | object.put("wrap", term); |
| 330 | term.putAll(qNameParse); |
| 331 | } else { |
| 332 | object = KoralObjectGenerator.makeSpan(); |
| 333 | object.putAll(qNameParse); |
| 334 | } |
| 335 | } else if (firstChildNodeCat.equals("textSpec")) { |
| 336 | object = KoralObjectGenerator.makeToken(); |
| 337 | LinkedHashMap<String, Object> term = KoralObjectGenerator.makeTerm(); |
| 338 | object.put("wrap", term); |
| 339 | term.put("layer", "orth"); |
| 340 | term.putAll(parseTextSpec(node.getChild(0))); |
| 341 | } |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 342 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 343 | if (node.getChildCount() == 3) { // (foundry/)?layer=key specification |
| 344 | if (object.get("@type").equals("korap:token")) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 345 | HashMap<String, Object> term = (HashMap<String, Object>) object.get("wrap"); |
| 346 | term.putAll(parseTextSpec(node.getChild(2))); |
| 347 | term.put("match", parseMatchOperator(getFirstChildWithCat(node, "eqOperator"))); |
| 348 | } else { |
| 349 | object.putAll(parseTextSpec(node.getChild(2))); |
| 350 | object.put("match", parseMatchOperator(getFirstChildWithCat(node, "eqOperator"))); |
| 351 | } |
| 352 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 353 | |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 354 | // Check if there's a unary relation defined for this node |
| 355 | // If yes, parse and retrieve it and put it in the object. |
| 356 | String ref = nodes2refs.get(node); |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 357 | if (unaryRelations.containsKey(ref)) { |
| Joachim Bingel | ef0b5b0 | 2015-01-30 09:37:43 +0000 | [diff] [blame] | 358 | ArrayList<ParseTree> unaryTermsForRef = unaryRelations.get(ref); |
| 359 | if (unaryTermsForRef.size() == 1) { |
| 360 | object.put("attr", |
| 361 | parseUnaryOperator(unaryTermsForRef.get(0))); |
| 362 | } else { |
| 363 | LinkedHashMap<String, Object> termGroup = KoralObjectGenerator.makeTermGroup("and"); |
| 364 | ArrayList<Object> operands = (ArrayList<Object>) termGroup.get("operands"); |
| 365 | for (ParseTree unaryTerm : unaryTermsForRef) { |
| 366 | operands.add(parseUnaryOperator(unaryTerm)); |
| 367 | } |
| 368 | object.put("attr", termGroup); |
| 369 | } |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 370 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 371 | if (object != null) { |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 372 | // query: object only, no relation |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 373 | if (totalRelationCount == 0) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 374 | putIntoSuperObject(object); |
| 375 | } |
| 376 | ParseTree parentsFirstChild = node.getParent().getChild(0); |
| 377 | if (getNodeCat(parentsFirstChild).endsWith("#")) { |
| 378 | nodeVariables.put(getNodeCat(parentsFirstChild).replaceAll("#", ""), object); |
| 379 | } |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 380 | if (objectsToWrapInClass.containsKey(node)) { |
| 381 | int classId = objectsToWrapInClass.get(node); |
| 382 | object = KoralObjectGenerator.wrapInClass(object, classId); |
| 383 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 384 | } |
| 385 | return object; |
| 386 | } |
| Joachim Bingel | a07b8e7 | 2014-05-09 15:06:07 +0000 | [diff] [blame] | 387 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 388 | /** |
| 389 | * Processes an operand node, creating a map for the operand containing all its information |
| 390 | * given in the node definition (referenced via '#'). If this node has been referred to and used earlier, |
| 391 | * a reference is created in its place. |
| 392 | * The operand will be wrapped in a class group if necessary. |
| 393 | * @param operandNode |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 394 | * @return A map object with the appropriate Koral representation of the operand |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 395 | */ |
| 396 | private LinkedHashMap<String, Object> retrieveOperand(ParseTree operandNode) { |
| 397 | LinkedHashMap<String, Object> operand = null; |
| 398 | if (!getNodeCat(operandNode.getChild(0)).equals("variableExpr")) { |
| 399 | String ref = operandNode.getChild(0).toStringTree(parser).substring(1); |
| 400 | operand = nodeVariables.get(ref); |
| 401 | if (nodeReferencesTotal.get(ref) > 1) { |
| 402 | if (nodeReferencesProcessed.get(ref)==0) { |
| 403 | refClassMapping.put(ref, classCounter); |
| 404 | operand = KoralObjectGenerator.wrapInClass(operand, classCounter++); |
| 405 | } else if (nodeReferencesProcessed.get(ref)>0 && nodeReferencesTotal.get(ref)>1) { |
| 406 | try { |
| 407 | operand = KoralObjectGenerator.wrapInReference(operandStack.pop(), refClassMapping.get(ref), true); |
| 408 | } catch (NoSuchElementException e) { |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 409 | operand = KoralObjectGenerator.makeReference(refClassMapping.get(ref)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 410 | } |
| 411 | } |
| 412 | nodeReferencesProcessed.put(ref, nodeReferencesProcessed.get(ref)+1); |
| 413 | } |
| 414 | } else { |
| 415 | operand = processVariableExpr(operandNode.getChild(0)); |
| 416 | } |
| 417 | return operand; |
| 418 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 419 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 420 | /** |
| 421 | * @param node |
| 422 | * @return |
| 423 | */ |
| 424 | private boolean checkOperandsProcessedPreviously(ParseTree node) { |
| 425 | // We can assume two operands. |
| 426 | ParseTree operand1 = node.getChild(0); |
| 427 | ParseTree operand2 = node.getChild(2); |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 428 | if (checkOperandProcessedPreviously(operand1) || checkOperandProcessedPreviously(operand2)) { |
| 429 | return true; |
| 430 | } |
| 431 | return false; |
| 432 | } |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 433 | |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 434 | /** |
| 435 | * @param operand |
| 436 | * @return |
| 437 | */ |
| 438 | private boolean checkOperandProcessedPreviously(ParseTree operand) { |
| 439 | String operandRef = operand.getText(); |
| 440 | if (operandRef.startsWith("#")) { |
| 441 | operandRef = operandRef.substring(1, operandRef.length()); |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 442 | if (nodeReferencesProcessed.get(operandRef) > 0) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 443 | return true; |
| 444 | } |
| 445 | } |
| 446 | return false; |
| 447 | } |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 448 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 449 | @SuppressWarnings("unchecked") |
| 450 | private void processN_ary_linguistic_term(ParseTree node) { |
| 451 | relationCounter++; |
| 452 | // get operator and determine type of group (sequence/treeRelation/relation/...) |
| 453 | // It's possible in Annis QL to concatenate operators, so there may be several operators under one n_ary_linguistic_term node. |
| 454 | // Counter 'i' will iteratively point to all operator nodes (odd-numbered) under this node. |
| 455 | for (int i=1; i<node.getChildCount(); i = i+2) { |
| 456 | ParseTree operandTree1 = node.getChild(i-1); |
| 457 | ParseTree operandTree2 = node.getChild(i+1); |
| 458 | String reltype = getNodeCat(node.getChild(i).getChild(0)); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 459 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 460 | LinkedHashMap<String,Object> group = null; |
| 461 | ArrayList<Object> operands = null; |
| 462 | // make sure one of the operands has already been put into a |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 463 | // relation (if this is not the 1st relation). If none of the |
| 464 | // operands has been ingested at a lower level (and is therefore |
| 465 | // unavailable for refrencing), queue this relation for later |
| 466 | // processing. |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 467 | if (relationCounter != 1) { |
| 468 | if (! checkOperandsProcessedPreviously(node)) { |
| 469 | queuedRelations.add(node); |
| 470 | relationCounter--; |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 471 | if (verbose) { |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 472 | System.out.println("Adding to queue: "+node.getText()); |
| 473 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 474 | objectsToPop.push(stackedObjects); |
| 475 | return; |
| 476 | } |
| 477 | } |
| 478 | // Retrieve operands. |
| 479 | LinkedHashMap<String, Object> operand1 = retrieveOperand(operandTree1); |
| 480 | LinkedHashMap<String, Object> operand2 = retrieveOperand(operandTree2); |
| 481 | // 'Proper' n_ary_linguistic_operators receive a considerably different serialisation than 'commonparent' and 'commonancestor'. |
| 482 | // For the latter cases, a dummy span is introduced and declared as a span class that has a dominance relation towards |
| 483 | // the two operands, one after the other, thus resulting in two nested relations! A Poliqarp+ equivalent for A $ B would be |
| 484 | // contains(focus(1:contains({1:<>},A)), B). |
| 485 | // This is modeled here... |
| 486 | if (reltype.equals("commonparent") || reltype.equals("commonancestor")) { |
| 487 | // make an (outer) group and an inner group containing the dummy node or previous relations |
| 488 | group = KoralObjectGenerator.makeGroup("relation"); |
| 489 | LinkedHashMap<String,Object> innerGroup = KoralObjectGenerator.makeGroup("relation"); |
| 490 | LinkedHashMap<String,Object> relation = KoralObjectGenerator.makeRelation(); |
| 491 | LinkedHashMap<String,Object> term = KoralObjectGenerator.makeTerm(); |
| 492 | term.put("layer", "c"); |
| 493 | relation.put("wrap", term); |
| 494 | // commonancestor is an indirect commonparent relation |
| 495 | if (reltype.equals("commonancestor")) relation.put("boundary", KoralObjectGenerator.makeBoundary(1, null)); |
| 496 | group.put("relation", relation); |
| 497 | innerGroup.put("relation", relation); |
| 498 | // Get operands list before possible re-assignment of 'group' (see following 'if') |
| 499 | ArrayList<Object> outerOperands = (ArrayList<Object>) group.get("operands"); |
| 500 | ArrayList<Object> innerOperands = (ArrayList<Object>) innerGroup.get("operands"); |
| 501 | // for lowest level, add the underspecified node as first operand and wrap it in a class group |
| 502 | if (i == 1) { |
| 503 | innerOperands.add(KoralObjectGenerator.wrapInClass(KoralObjectGenerator.makeSpan(), classCounter)); |
| 504 | // add the first operand and wrap the whole group in a focusing reference |
| 505 | innerOperands.add(operand1); |
| 506 | innerGroup = KoralObjectGenerator.wrapInReference(innerGroup, classCounter, true); |
| 507 | outerOperands.add(innerGroup); |
| 508 | } else { |
| 509 | outerOperands.add(operandStack.pop()); |
| 510 | } |
| 511 | // Lookahead: if next operator is not commonparent or commonancestor, wrap in class for accessibility |
| 512 | if (i < node.getChildCount()-2 && !getNodeCat(node.getChild(i+2).getChild(0)).startsWith("common")) { |
| 513 | operand2 = KoralObjectGenerator.wrapInClass(operand2, ++classCounter); |
| 514 | } |
| 515 | outerOperands.add(operand2); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 516 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 517 | // Wrap in another reference object in case other relations are following |
| 518 | if (i < node.getChildCount()-2) { |
| 519 | group = KoralObjectGenerator.wrapInReference(group, classCounter, true); |
| 520 | } |
| 521 | // All other n-ary linguistic relations have special 'relation' attributes defined in CQLF and can be |
| 522 | // handled more easily... |
| 523 | } else { |
| 524 | LinkedHashMap<String, Object> operatorGroup = parseOperatorNode(node.getChild(i).getChild(0)); |
| 525 | String groupType; |
| 526 | try { |
| 527 | groupType = (String) operatorGroup.get("groupType"); |
| 528 | } catch (ClassCastException | NullPointerException n) { |
| 529 | groupType = "relation"; |
| 530 | } |
| 531 | if (groupType.equals("relation") || groupType.equals("treeRelation")) { |
| 532 | group = KoralObjectGenerator.makeGroup(groupType); |
| 533 | LinkedHashMap<String, Object> relation = new LinkedHashMap<String, Object>(); |
| 534 | putAllButGroupType(relation, operatorGroup); |
| 535 | group.put("relation", relation); |
| 536 | } else if (groupType.equals("sequence")) { |
| 537 | group = KoralObjectGenerator.makeGroup(groupType); |
| 538 | putAllButGroupType(group, operatorGroup); |
| 539 | } else if (groupType.equals("position")) { |
| 540 | group = new LinkedHashMap<String,Object>(); |
| 541 | putAllButGroupType(group, operatorGroup); |
| 542 | } |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 543 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 544 | // Get operands list before possible re-assignment of 'group' (see following 'if') |
| 545 | operands = (ArrayList<Object>) group.get("operands"); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 546 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 547 | ParseTree leftChildSpec = getFirstChildWithCat(node.getChild(i).getChild(0), "@l"); |
| 548 | ParseTree rightChildSpec = getFirstChildWithCat(node.getChild(i).getChild(0), "@r"); |
| 549 | if (leftChildSpec != null || rightChildSpec != null) { |
| Joachim Bingel | 1d79104 | 2015-02-03 10:19:47 +0000 | [diff] [blame] | 550 | String frame = (leftChildSpec!=null) ? "frames:startsWith" : "frames:endsWith"; |
| 551 | LinkedHashMap<String,Object> positionGroup = KoralObjectGenerator.makePosition(new String[]{frame}); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 552 | operand2 = KoralObjectGenerator.wrapInClass(operand2, ++classCounter); |
| 553 | ((ArrayList<Object>) positionGroup.get("operands")).add(group); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 554 | ((ArrayList<Object>) positionGroup.get("operands")).add(KoralObjectGenerator.makeReference(classCounter)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 555 | group = positionGroup; |
| 556 | } |
| 557 | |
| 558 | // Wrap in reference object in case other relations are following |
| 559 | if (i < node.getChildCount()-2) { |
| 560 | group = KoralObjectGenerator.wrapInReference(group, classCounter, true); |
| 561 | } |
| 562 | |
| 563 | // Inject operands. |
| 564 | // -> Case distinction: |
| 565 | if (node.getChildCount()==3) { |
| 566 | // Things are easy when there's just one operator (thus 3 children incl. operands)... |
| 567 | if (operand1 != null) operands.add(operand1); |
| 568 | if (operand2 != null) operands.add(operand2); |
| 569 | } else { |
| 570 | // ... but things get a little more complicated here. The AST is of this form: (operand1 operator1 operand2 operator2 operand3 operator3 ...) |
| 571 | // but we'll have to serialize it in a nested, binary way: (((operand1 operator1 operand2) operator2 operand3) operator3 ...) |
| 572 | // the following code will do just that: |
| 573 | if (i == 1) { |
| 574 | // for the first operator, include both operands |
| 575 | if (operand1 != null) operands.add(operand1); |
| 576 | if (operand2 != null) operands.add(KoralObjectGenerator.wrapInClass(operand2, classCounter++)); |
| 577 | // Don't put this into the super object directly but store on operandStack |
| 578 | // (because this group will have to be an operand of a subsequent operator) |
| 579 | operandStack.push(group); |
| 580 | // for all subsequent operators, only take the 2nd operand (first was already added by previous operator) |
| 581 | } else if (i < node.getChildCount()-2) { |
| 582 | // for all intermediate operators, include other previous groups and 2nd operand. Store this on the operandStack, too. |
| 583 | if (operand2 != null) operands.add(KoralObjectGenerator.wrapInClass(operand2, classCounter++)); |
| 584 | operands.add(0, operandStack.pop()); |
| 585 | operandStack.push(group); |
| 586 | } else if (i == node.getChildCount()-2) { |
| 587 | // This is the last operator. Include 2nd operand only |
| 588 | if (operand2 != null) operands.add(operand2); |
| 589 | } |
| 590 | } |
| 591 | } |
| 592 | // Final step: decide what to do with the 'group' object, depending on whether all relations have been processed |
| 593 | if (i == node.getChildCount()-2 && relationCounter == totalRelationCount) { |
| 594 | putIntoSuperObject(group); |
| 595 | if (!operandStack.isEmpty()) { |
| 596 | operands.add(0, operandStack.pop()); |
| 597 | } |
| 598 | objectStack.push(group); |
| 599 | stackedObjects++; |
| 600 | } else { |
| 601 | operandStack.push(group); |
| 602 | } |
| 603 | } |
| 604 | } |
| 605 | |
| 606 | |
| 607 | |
| 608 | /** |
| 609 | * Parses a unary_linguistic_operator node. Possible operators are: root, arity, tokenarity. |
| 610 | * Operators are embedded into a korap:term, in turn wrapped by an 'attr' property in a korap:span. |
| 611 | * @param node The unary_linguistic_operator node |
| 612 | * @return A map containing the attr key, to be inserted into korap:span |
| 613 | */ |
| 614 | private LinkedHashMap<String, Object> parseUnaryOperator(ParseTree node) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 615 | LinkedHashMap<String, Object> term = KoralObjectGenerator.makeTerm(); |
| 616 | String op = node.getChild(1).toStringTree(parser).substring(1); |
| 617 | if (op.equals("arity") || op.equals("tokenarity")) { |
| 618 | LinkedHashMap<String, Object> boundary = boundaryFromRangeSpec(node.getChild(3), false); |
| 619 | term.put(op, boundary); |
| 620 | } else { |
| 621 | term.put(op, true); |
| 622 | } |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 623 | return term; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 624 | } |
| 625 | |
| 626 | @SuppressWarnings("unchecked") |
| 627 | private LinkedHashMap<String, Object> parseOperatorNode(ParseTree operatorNode) { |
| 628 | LinkedHashMap<String, Object> relation = null; |
| 629 | String operator = getNodeCat(operatorNode); |
| 630 | // DOMINANCE |
| 631 | if (operator.equals("dominance")) { |
| 632 | relation = KoralObjectGenerator.makeRelation(); |
| 633 | relation.put("groupType", "relation"); |
| 634 | ParseTree qName = getFirstChildWithCat(operatorNode, "qName"); |
| 635 | ParseTree edgeSpecNode = getFirstChildWithCat(operatorNode, "edgeSpec"); |
| 636 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| 637 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, "rangeSpec"); |
| 638 | LinkedHashMap<String,Object> term = KoralObjectGenerator.makeTerm(); |
| 639 | term.put("layer", "c"); |
| 640 | if (qName != null) term = parseQNameNode(qName); |
| 641 | if (edgeSpecNode != null) { |
| 642 | LinkedHashMap<String,Object> edgeSpec = parseEdgeSpec(edgeSpecNode); |
| 643 | String edgeSpecType = (String) edgeSpec.get("@type"); |
| 644 | if (edgeSpecType.equals("korap:termGroup")) { |
| 645 | ((ArrayList<Object>) edgeSpec.get("operands")).add(term); |
| 646 | term = edgeSpec; |
| 647 | } else { |
| 648 | term = KoralObjectGenerator.makeTermGroup("and"); |
| 649 | ArrayList<Object> termGroupOperands = (ArrayList<Object>) term.get("operands"); |
| 650 | termGroupOperands.add(edgeSpec); |
| 651 | LinkedHashMap<String,Object> constTerm = KoralObjectGenerator.makeTerm(); |
| 652 | constTerm.put("layer", "c"); |
| 653 | termGroupOperands.add(constTerm); |
| 654 | } |
| 655 | } |
| 656 | if (star != null) relation.put("boundary", KoralObjectGenerator.makeBoundary(0, null)); |
| 657 | if (rangeSpec != null) relation.put("boundary", boundaryFromRangeSpec(rangeSpec)); |
| 658 | relation.put("wrap", term); |
| 659 | } |
| 660 | else if (operator.equals("pointing")) { |
| 661 | // String reltype = operatorNode.getChild(1).toStringTree(parser); |
| 662 | relation = KoralObjectGenerator.makeRelation(); |
| 663 | relation.put("groupType", "relation"); |
| 664 | ParseTree qName = getFirstChildWithCat(operatorNode, "qName"); |
| 665 | ParseTree edgeSpec = getFirstChildWithCat(operatorNode, "edgeSpec"); |
| 666 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| 667 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, "rangeSpec"); |
| 668 | // if (qName != null) relation.putAll(parseQNameNode(qName)); |
| 669 | LinkedHashMap<String,Object> term = KoralObjectGenerator.makeTerm(); |
| 670 | if (qName != null) term.putAll(parseQNameNode(qName)); |
| 671 | if (edgeSpec != null) term.putAll(parseEdgeSpec(edgeSpec)); |
| 672 | if (star != null) relation.put("boundary", KoralObjectGenerator.makeBoundary(0, null)); |
| 673 | if (rangeSpec != null) relation.put("boundary", boundaryFromRangeSpec(rangeSpec)); |
| 674 | relation.put("wrap", term); |
| 675 | } |
| 676 | else if (operator.equals("precedence")) { |
| 677 | relation = new LinkedHashMap<String, Object>(); |
| 678 | relation.put("groupType", "sequence"); |
| 679 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, "rangeSpec"); |
| 680 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| 681 | ArrayList<Object> distances = new ArrayList<Object>(); |
| 682 | if (star != null) { |
| 683 | distances.add(KoralObjectGenerator.makeDistance("w", 0, null)); |
| 684 | relation.put("distances", distances); |
| 685 | } |
| 686 | if (rangeSpec != null) { |
| 687 | distances.add(parseDistance(rangeSpec)); |
| 688 | relation.put("distances", distances); |
| 689 | } |
| 690 | relation.put("inOrder", true); |
| 691 | } |
| 692 | else if (operator.equals("spanrelation")) { |
| 693 | // relation = makeGroup("position"); |
| 694 | // relation.put("groupType", "position"); |
| 695 | String reltype = operatorNode.getChild(0).toStringTree(parser); |
| 696 | String[] frames = new String[]{}; |
| 697 | switch (reltype) { |
| 698 | case "_=_": |
| 699 | frames = new String[]{"frames:matches"}; |
| 700 | break; |
| 701 | case "_l_": |
| Joachim Bingel | 1d79104 | 2015-02-03 10:19:47 +0000 | [diff] [blame] | 702 | frames = new String[]{"frames:startsWith", "frames:matches"}; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 703 | break; |
| 704 | case "_r_": |
| Joachim Bingel | 1d79104 | 2015-02-03 10:19:47 +0000 | [diff] [blame] | 705 | frames = new String[]{"frames:endsWith", "frames:matches"}; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 706 | break; |
| 707 | case "_i_": |
| Joachim Bingel | 1d79104 | 2015-02-03 10:19:47 +0000 | [diff] [blame] | 708 | frames = new String[]{"frames:isAround"}; |
| 709 | break; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 710 | case "_o_": |
| 711 | frames = new String[]{"frames:overlapsLeft", "frames:overlapsRight"}; |
| 712 | break; |
| 713 | case "_ol_": |
| 714 | frames = new String[]{"frames:overlapsLeft"}; |
| 715 | break; |
| 716 | case "_or_": |
| 717 | frames = new String[]{"frames:overlapsRight"}; |
| 718 | break; |
| 719 | } |
| 720 | // relation.put("frames", frames); |
| 721 | // relation.put("sharedClasses", sharedClasses); |
| Joachim Bingel | 1d79104 | 2015-02-03 10:19:47 +0000 | [diff] [blame] | 722 | relation = KoralObjectGenerator.makePosition(frames); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 723 | relation.put("groupType", "position"); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 724 | } |
| 725 | else if (operator.equals("near")) { |
| 726 | relation = new LinkedHashMap<String, Object>(); |
| 727 | relation.put("groupType", "sequence"); |
| 728 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, "rangeSpec"); |
| 729 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| 730 | ArrayList<Object> distances = new ArrayList<Object>(); |
| 731 | if (star != null) { |
| 732 | distances.add(KoralObjectGenerator.makeDistance("w", 0, null)); |
| 733 | relation.put("distances", distances); |
| 734 | } |
| 735 | if (rangeSpec != null) { |
| 736 | distances.add(parseDistance(rangeSpec)); |
| 737 | relation.put("distances", distances); |
| 738 | } |
| 739 | relation.put("inOrder", false); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 740 | } |
| 741 | else if (operator.equals("identity")) { |
| 742 | //TODO since ANNIS v. 3.1.6 |
| 743 | } |
| 744 | else if (operator.equals("equalvalue")) { |
| 745 | //TODO since ANNIS v. 3.1.6 |
| 746 | } |
| 747 | else if (operator.equals("notequalvalue")) { |
| 748 | //TODO since ANNIS v. 3.1.6 |
| 749 | } |
| 750 | return relation; |
| 751 | } |
| 752 | |
| 753 | @SuppressWarnings("unchecked") |
| 754 | private LinkedHashMap<String,Object> parseEdgeSpec(ParseTree edgeSpec) { |
| 755 | List<ParseTree> annos = getChildrenWithCat(edgeSpec, "edgeAnno"); |
| 756 | if (annos.size() == 1) return parseEdgeAnno(annos.get(0)); |
| 757 | else { |
| 758 | LinkedHashMap<String,Object> termGroup = KoralObjectGenerator.makeTermGroup("and"); |
| 759 | ArrayList<Object> operands = (ArrayList<Object>) termGroup.get("operands"); |
| 760 | for (ParseTree anno : annos) { |
| 761 | operands.add(parseEdgeAnno(anno)); |
| 762 | } |
| 763 | return termGroup; |
| 764 | } |
| 765 | } |
| 766 | |
| 767 | private LinkedHashMap<String, Object> parseEdgeAnno(ParseTree edgeAnnoSpec) { |
| 768 | LinkedHashMap<String, Object> edgeAnno = new LinkedHashMap<String, Object>(); |
| 769 | edgeAnno.put("@type", "korap:term"); |
| 770 | ParseTree textSpecNode = getFirstChildWithCat(edgeAnnoSpec, "textSpec"); |
| 771 | ParseTree layerNode = getFirstChildWithCat(edgeAnnoSpec, "layer"); |
| 772 | ParseTree foundryNode = getFirstChildWithCat(edgeAnnoSpec, "foundry"); |
| 773 | ParseTree matchOperatorNode = getFirstChildWithCat(edgeAnnoSpec, "eqOperator"); |
| 774 | if (foundryNode!=null) edgeAnno.put("foundry", foundryNode.getChild(0).toStringTree(parser)); |
| 775 | if (layerNode!=null) edgeAnno.put("layer", layerNode.getChild(0).toStringTree(parser)); |
| 776 | edgeAnno.putAll(parseTextSpec(textSpecNode)); |
| 777 | edgeAnno.put("match", parseMatchOperator(matchOperatorNode)); |
| 778 | return edgeAnno; |
| 779 | } |
| 780 | |
| 781 | private LinkedHashMap<String, Object> boundaryFromRangeSpec(ParseTree rangeSpec) { |
| 782 | return boundaryFromRangeSpec(rangeSpec, true); |
| 783 | } |
| 784 | |
| 785 | private LinkedHashMap<String, Object> boundaryFromRangeSpec(ParseTree rangeSpec, boolean expandToMax) { |
| 786 | Integer min = Integer.parseInt(rangeSpec.getChild(0).toStringTree(parser)); |
| 787 | Integer max = min; |
| 788 | if (expandToMax) max = null; |
| 789 | if (rangeSpec.getChildCount()==3) |
| 790 | max = Integer.parseInt(rangeSpec.getChild(2).toStringTree(parser)); |
| 791 | return KoralObjectGenerator.makeBoundary(min, max); |
| 792 | } |
| 793 | |
| 794 | private LinkedHashMap<String, Object> parseDistance(ParseTree rangeSpec) { |
| 795 | Integer min = Integer.parseInt(rangeSpec.getChild(0).toStringTree(parser)); |
| 796 | Integer max = null; |
| 797 | if (rangeSpec.getChildCount()==3) |
| 798 | max = Integer.parseInt(rangeSpec.getChild(2).toStringTree(parser)); |
| 799 | return KoralObjectGenerator.makeDistance("w", min, max); |
| 800 | } |
| 801 | |
| 802 | private LinkedHashMap<String, Object> parseTextSpec(ParseTree node) { |
| 803 | LinkedHashMap<String, Object> term = new LinkedHashMap<String, Object>(); |
| 804 | if (hasChild(node, "regex")) { |
| 805 | term.put("type", "type:regex"); |
| 806 | term.put("key", node.getChild(0).getChild(0).toStringTree(parser).replaceAll("/", "")); |
| 807 | } else { |
| 808 | term.put("key", node.getChild(1).toStringTree(parser)); |
| 809 | } |
| 810 | term.put("match", "match:eq"); |
| 811 | return term; |
| 812 | } |
| 813 | |
| 814 | /** |
| 815 | * Parses the match operator (= or !=) |
| 816 | * @param node |
| 817 | * @return |
| 818 | */ |
| 819 | private String parseMatchOperator(ParseTree node) { |
| 820 | if (node.getChildCount()>0) { |
| Joachim Bingel | 6e4e9f3 | 2015-01-30 18:59:31 +0000 | [diff] [blame] | 821 | return node.getChild(0).getText().equals("=") ? "match:eq" : "match:ne"; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 822 | } |
| Joachim Bingel | 6e4e9f3 | 2015-01-30 18:59:31 +0000 | [diff] [blame] | 823 | return "match:eq"; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 824 | } |
| 825 | |
| 826 | private LinkedHashMap<String, Object> parseQNameNode(ParseTree node) { |
| 827 | LinkedHashMap<String, Object> fields = new LinkedHashMap<String, Object>(); |
| 828 | ParseTree layerNode = getFirstChildWithCat(node, "layer"); |
| 829 | ParseTree foundryNode = getFirstChildWithCat(node, "foundry"); |
| 830 | if (foundryNode != null) fields.put("foundry", foundryNode.getChild(0).toStringTree(parser)); |
| 831 | String layer = layerNode.getChild(0).toStringTree(parser); |
| 832 | if (layer.equals("pos")) layer = "p"; |
| 833 | if (layer.equals("cat")) layer = "c"; |
| 834 | fields.put("layer", layer); |
| 835 | return fields; |
| 836 | } |
| 837 | |
| 838 | private void putIntoSuperObject(LinkedHashMap<String, Object> object) { |
| 839 | putIntoSuperObject(object, 0); |
| 840 | } |
| 841 | |
| 842 | @SuppressWarnings({ "unchecked" }) |
| 843 | private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) { |
| 844 | if (objectStack.size()>objStackPosition) { |
| 845 | ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands"); |
| 846 | if (!invertedOperandsLists.contains(topObjectOperands)) { |
| 847 | topObjectOperands.add(object); |
| 848 | } else { |
| 849 | topObjectOperands.add(0, object); |
| 850 | } |
| 851 | } else { |
| 852 | requestMap.put("query", object); |
| 853 | } |
| 854 | } |
| 855 | |
| 856 | private void putAllButGroupType(Map<String, Object> container, Map<String, Object> input) { |
| 857 | for (String key : input.keySet()) { |
| 858 | if (!key.equals("groupType")) { |
| 859 | container.put(key, input.get(key)); |
| 860 | } |
| 861 | } |
| 862 | } |
| 863 | |
| 864 | private ParserRuleContext parseAnnisQuery (String query) { |
| 865 | Lexer lexer = new AqlLexer((CharStream)null); |
| 866 | ParserRuleContext tree = null; |
| 867 | Antlr4DescriptiveErrorListener errorListener = new Antlr4DescriptiveErrorListener(query); |
| 868 | // Like p. 111 |
| 869 | try { |
| 870 | // Tokenize input data |
| 871 | ANTLRInputStream input = new ANTLRInputStream(query); |
| 872 | lexer.setInputStream(input); |
| 873 | CommonTokenStream tokens = new CommonTokenStream(lexer); |
| 874 | parser = new AqlParser(tokens); |
| 875 | // Don't throw out erroneous stuff |
| 876 | parser.setErrorHandler(new BailErrorStrategy()); |
| 877 | lexer.removeErrorListeners(); |
| Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame] | 878 | lexer.addErrorListener(errorListener); |
| 879 | parser.removeErrorListeners(); |
| 880 | parser.addErrorListener(errorListener); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 881 | // Get starting rule from parser |
| 882 | Method startRule = AqlParser.class.getMethod("start"); |
| 883 | tree = (ParserRuleContext) startRule.invoke(parser, (Object[])null); |
| 884 | } |
| 885 | // Some things went wrong ... |
| 886 | catch (Exception e) { |
| 887 | log.error("Could not parse query. Please make sure it is well-formed."); |
| 888 | log.error(errorListener.generateFullErrorMsg().toString()); |
| 889 | addError(errorListener.generateFullErrorMsg()); |
| 890 | } |
| 891 | return tree; |
| 892 | } |
| Joachim Bingel | 761d1c1 | 2014-12-17 14:02:40 +0000 | [diff] [blame] | 893 | } |