| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.serialize; |
| 2 | |
| 3 | import java.lang.reflect.Method; |
| 4 | import java.util.ArrayList; |
| Joachim Bingel | 7ee0786 | 2014-04-28 15:22:41 +0000 | [diff] [blame] | 5 | import java.util.Arrays; |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 6 | import java.util.HashMap; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 7 | import java.util.LinkedHashMap; |
| 8 | import java.util.LinkedList; |
| 9 | import java.util.List; |
| 10 | import java.util.Map; |
| Joachim Bingel | fb9d5fd | 2014-06-25 09:32:43 +0000 | [diff] [blame] | 11 | import java.util.NoSuchElementException; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 12 | |
| 13 | import org.antlr.v4.runtime.ANTLRInputStream; |
| 14 | import org.antlr.v4.runtime.BailErrorStrategy; |
| 15 | import org.antlr.v4.runtime.CharStream; |
| 16 | import org.antlr.v4.runtime.CommonTokenStream; |
| 17 | import org.antlr.v4.runtime.Lexer; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 18 | import org.antlr.v4.runtime.ParserRuleContext; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 19 | import org.antlr.v4.runtime.tree.ParseTree; |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 20 | import org.slf4j.LoggerFactory; |
| Joachim Bingel | c63f781 | 2014-07-30 09:12:25 +0000 | [diff] [blame] | 21 | import org.slf4j.Logger; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 22 | |
| 23 | import de.ids_mannheim.korap.query.annis.AqlLexer; |
| 24 | import de.ids_mannheim.korap.query.annis.AqlParser; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 25 | import de.ids_mannheim.korap.util.QueryException; |
| 26 | |
| 27 | /** |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 28 | * Map representation of ANNIS QL syntax tree as returned by ANTLR |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 29 | * @author joachim |
| 30 | * |
| 31 | */ |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 32 | public class AqlTree extends Antlr4AbstractSyntaxTree { |
| Joachim Bingel | c63f781 | 2014-07-30 09:12:25 +0000 | [diff] [blame] | 33 | private static Logger log = LoggerFactory.getLogger(AqlTree.class); |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 34 | /** |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 35 | * Flag that indicates whether token fields or meta fields are currently being processed |
| 36 | */ |
| 37 | boolean inMeta = false; |
| 38 | /** |
| Joachim Bingel | 949e3a8 | 2014-06-16 13:20:43 +0000 | [diff] [blame] | 39 | * Keeps track of operands that are to be integrated into yet uncreated objects. |
| 40 | */ |
| 41 | LinkedList<LinkedHashMap<String,Object>> operandStack = new LinkedList<LinkedHashMap<String,Object>>(); |
| 42 | /** |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 43 | * Keeps track of explicitly (by #-var definition) or implicitly (number as reference) introduced entities (for later reference by #-operator) |
| 44 | */ |
| Joachim Bingel | ca4944e | 2014-06-13 13:55:10 +0000 | [diff] [blame] | 45 | Map<String, LinkedHashMap<String,Object>> variableReferences = new LinkedHashMap<String, LinkedHashMap<String,Object>>(); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 46 | /** |
| 47 | * Counter for variable definitions. |
| 48 | */ |
| 49 | Integer variableCounter = 1; |
| 50 | /** |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 51 | * Marks the currently active token in order to know where to add flags (might already have been taken away from token stack). |
| 52 | */ |
| 53 | LinkedHashMap<String,Object> curToken = new LinkedHashMap<String,Object>(); |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 54 | /** |
| 55 | * Keeps track of operands lists that are to be serialised in an inverted |
| 56 | * order (e.g. the IN() operator) compared to their AST representation. |
| 57 | */ |
| 58 | private LinkedList<ArrayList<Object>> invertedOperandsLists = new LinkedList<ArrayList<Object>>(); |
| Joachim Bingel | 77ac5cb | 2014-04-22 14:03:28 +0000 | [diff] [blame] | 59 | /** |
| Joachim Bingel | fc42881 | 2014-06-18 14:50:14 +0000 | [diff] [blame] | 60 | * Keeps track of operation:class numbers. |
| 61 | */ |
| 62 | int classCounter = 0; |
| 63 | /** |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 64 | * Keeps track of numers of relations processed (important when dealing with multiple predications). |
| 65 | */ |
| 66 | int relationCounter = 0; |
| 67 | /** |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 68 | * Keeps track of references to nodes that are operands of groups (e.g. tree relations). Those nodes appear on the top level of the parse tree |
| 69 | * but are to be integrated into the AqlTree at a later point (namely as operands of the respective group). Therefore, store references to these |
| 70 | * nodes here and exclude the operands from being written into the query map individually. |
| 71 | */ |
| 72 | private LinkedList<String> operandOnlyNodeRefs = new LinkedList<String>(); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 73 | private List<ParseTree> globalLingTermNodes = new ArrayList<ParseTree>(); |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 74 | private int totalRelationCount; |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 75 | /** |
| 76 | * Keeps a record of reference-class-mapping, i.e. which 'class' has been assigned to which #n reference. This is important when introducing korap:reference |
| 77 | * spans to refer back to previously established classes for entities. |
| 78 | */ |
| Joachim Bingel | b001d19 | 2014-06-25 11:34:36 +0000 | [diff] [blame] | 79 | private LinkedHashMap<String, Integer> refClassMapping = new LinkedHashMap<String, Integer>(); |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 80 | private LinkedHashMap<String, Integer> nodeReferencesTotal = new LinkedHashMap<String, Integer>(); |
| 81 | private LinkedHashMap<String, Integer> nodeReferencesProcessed = new LinkedHashMap<String, Integer>(); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 82 | |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 83 | /** |
| 84 | * |
| 85 | * @param tree The syntax tree as returned by ANTLR |
| 86 | * @param parser The ANTLR parser instance that generated the parse tree |
| 87 | */ |
| 88 | public AqlTree(String query) { |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 89 | try { |
| 90 | process(query); |
| 91 | } catch (QueryException e) { |
| 92 | e.printStackTrace(); |
| 93 | } |
| 94 | System.out.println(">>> "+requestMap.get("query")+" <<<"); |
| 95 | } |
| 96 | |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 97 | @Override |
| 98 | public void process(String query) throws QueryException { |
| 99 | ParseTree tree = parseAnnisQuery(query); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 100 | if (this.parser != null) { |
| 101 | super.parser = this.parser; |
| 102 | } else { |
| 103 | throw new NullPointerException("Parser has not been instantiated!"); |
| 104 | } |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 105 | log.info("Processing Annis query."); |
| Joachim Bingel | ca4944e | 2014-06-13 13:55:10 +0000 | [diff] [blame] | 106 | log.info("AST is: "+tree.toStringTree(parser)); |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 107 | System.out.println("Processing Annis QL"); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 108 | if (verbose) System.out.println(tree.toStringTree(parser)); |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 109 | processNode(tree); |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 110 | log.info(requestMap.toString()); |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 111 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 112 | |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 113 | @SuppressWarnings("unchecked") |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 114 | private void processNode(ParseTree node) { |
| 115 | // Top-down processing |
| 116 | if (visited.contains(node)) return; |
| 117 | else visited.add(node); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 118 | |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 119 | String nodeCat = getNodeCat(node); |
| 120 | openNodeCats.push(nodeCat); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 121 | |
| Joachim Bingel | 77ac5cb | 2014-04-22 14:03:28 +0000 | [diff] [blame] | 122 | stackedObjects = 0; |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 123 | |
| Joachim Bingel | 77ac5cb | 2014-04-22 14:03:28 +0000 | [diff] [blame] | 124 | if (verbose) { |
| 125 | System.err.println(" "+objectStack); |
| 126 | System.out.println(openNodeCats); |
| 127 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 128 | |
| 129 | /* |
| 130 | **************************************************************** |
| 131 | **************************************************************** |
| 132 | * Processing individual node categories * |
| 133 | **************************************************************** |
| 134 | **************************************************************** |
| 135 | */ |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 136 | if (nodeCat.equals("exprTop")) { |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 137 | List<ParseTree> andTopExprs = getChildrenWithCat(node, "andTopExpr"); |
| 138 | if (andTopExprs.size() > 1) { |
| 139 | LinkedHashMap<String, Object> topOr = makeGroup("or"); |
| 140 | requestMap.put("query", topOr); |
| 141 | objectStack.push(topOr); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 142 | } |
| 143 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 144 | |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 145 | if (nodeCat.equals("andTopExpr")) { |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 146 | // Before processing any child expr node, check if it has one or more "*ary_linguistic_term" nodes. |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 147 | // Those nodes may use references to earlier established operand nodes. |
| 148 | // Those operand nodes are not to be included into the query map individually but |
| 149 | // naturally as operands of the relations/groups introduced by the |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 150 | // *node. For that purpose, this section mines all used references |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 151 | // and stores them in a list for later reference. |
| 152 | for (ParseTree exprNode : getChildrenWithCat(node,"expr")) { |
| Joachim Bingel | fc42881 | 2014-06-18 14:50:14 +0000 | [diff] [blame] | 153 | // Pre-process any 'variableExpr' such that the variableReferences map can be filled |
| 154 | List<ParseTree> definitionNodes = new ArrayList<ParseTree>(); |
| 155 | definitionNodes.addAll(getChildrenWithCat(exprNode, "variableExpr")); |
| 156 | for (ParseTree definitionNode : definitionNodes) { |
| 157 | processNode(definitionNode); |
| 158 | } |
| 159 | // Then, mine all relations between nodes |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 160 | List<ParseTree> lingTermNodes = new ArrayList<ParseTree>(); |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 161 | lingTermNodes.addAll(getChildrenWithCat(exprNode, "n_ary_linguistic_term")); |
| Joachim Bingel | c89952e | 2014-06-06 12:08:42 +0000 | [diff] [blame] | 162 | globalLingTermNodes.addAll(lingTermNodes); |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 163 | totalRelationCount = globalLingTermNodes.size(); |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 164 | // Traverse refOrNode nodes under *ary_linguistic_term nodes and extract references |
| 165 | for (ParseTree lingTermNode : lingTermNodes) { |
| 166 | for (ParseTree refOrNode : getChildrenWithCat(lingTermNode, "refOrNode")) { |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 167 | String refOrNodeString = refOrNode.getChild(0).toStringTree(parser); |
| 168 | if (refOrNodeString.startsWith("#")) { |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 169 | String ref = refOrNode.getChild(0).toStringTree(parser).substring(1); |
| 170 | if (nodeReferencesTotal.containsKey(ref)) { |
| 171 | nodeReferencesTotal.put(ref, nodeReferencesTotal.get(ref)+1); |
| 172 | } else { |
| 173 | nodeReferencesTotal.put(ref, 1); |
| 174 | nodeReferencesProcessed.put(ref, 0); |
| 175 | } |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 176 | } |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 177 | } |
| 178 | } |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 179 | } |
| 180 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 181 | |
| Joachim Bingel | c89952e | 2014-06-06 12:08:42 +0000 | [diff] [blame] | 182 | if (nodeCat.equals("unary_linguistic_term")) { |
| Joachim Bingel | ca4944e | 2014-06-13 13:55:10 +0000 | [diff] [blame] | 183 | LinkedHashMap<String, Object> unaryOperator = parseUnaryOperator(node); |
| 184 | String reference = node.getChild(0).toStringTree(parser).substring(1); |
| 185 | LinkedHashMap<String, Object> object = variableReferences.get(reference); |
| 186 | object.putAll(unaryOperator); |
| Joachim Bingel | c89952e | 2014-06-06 12:08:42 +0000 | [diff] [blame] | 187 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 188 | |
| Joachim Bingel | c89952e | 2014-06-06 12:08:42 +0000 | [diff] [blame] | 189 | if (nodeCat.equals("n_ary_linguistic_term")) { |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 190 | processN_ary_linguistic_term(node); |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 191 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 192 | |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 193 | if (nodeCat.equals("variableExpr")) { |
| 194 | // simplex word or complex assignment (like qname = textSpec)? |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 195 | String firstChildNodeCat = getNodeCat(node.getChild(0)); |
| 196 | LinkedHashMap<String, Object> object = null; |
| 197 | if (firstChildNodeCat.equals("node")) { |
| 198 | object = makeSpan(); |
| 199 | } else if (firstChildNodeCat.equals("tok")) { |
| 200 | object = makeToken(); |
| Joachim Bingel | c9c0cf9 | 2014-10-02 12:03:59 +0000 | [diff] [blame^] | 201 | if (node.getChildCount() > 1) { // empty tokens do not wrap a term |
| 202 | LinkedHashMap<String, Object> term = makeTerm(); |
| 203 | term.put("layer", "orth"); |
| 204 | object.put("wrap", term); |
| 205 | } |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 206 | } else if (firstChildNodeCat.equals("qName")) { // only (foundry/)?layer specified |
| Joachim Bingel | 7ee0786 | 2014-04-28 15:22:41 +0000 | [diff] [blame] | 207 | // may be token or span, depending on indicated layer! (e.g. cnx/cat=NP or mate/pos=NN) |
| 208 | HashMap<String, Object> qNameParse = parseQNameNode(node.getChild(0)); |
| 209 | if (Arrays.asList(new String[]{"pos", "lemma", "morph", "tok"}).contains(qNameParse.get("layer"))) { |
| 210 | object = makeToken(); |
| 211 | LinkedHashMap<String, Object> term = makeTerm(); |
| 212 | object.put("wrap", term); |
| 213 | term.putAll(qNameParse); |
| 214 | } else { |
| 215 | object = makeSpan(); |
| 216 | object.putAll(qNameParse); |
| 217 | } |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 218 | } else if (firstChildNodeCat.equals("textSpec")) { |
| 219 | object = makeToken(); |
| 220 | LinkedHashMap<String, Object> term = makeTerm(); |
| 221 | object.put("wrap", term); |
| Joachim Bingel | b6d6781 | 2014-08-13 11:37:09 +0000 | [diff] [blame] | 222 | term.put("layer", "orth"); |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 223 | term.putAll(parseTextSpec(node.getChild(0))); |
| 224 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 225 | |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 226 | if (node.getChildCount() == 3) { // (foundry/)?layer=key specification |
| Joachim Bingel | 7ee0786 | 2014-04-28 15:22:41 +0000 | [diff] [blame] | 227 | if (object.get("@type").equals("korap:token")) { |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 228 | HashMap<String, Object> term = (HashMap<String, Object>) object.get("wrap"); |
| 229 | term.putAll(parseTextSpec(node.getChild(2))); |
| 230 | term.put("match", parseMatchOperator(node.getChild(1))); |
| 231 | } else { |
| 232 | object.putAll(parseTextSpec(node.getChild(2))); |
| 233 | object.put("match", parseMatchOperator(node.getChild(1))); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 234 | } |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 235 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 236 | |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 237 | if (object != null) { |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 238 | if (! operandOnlyNodeRefs.contains(variableCounter.toString())) { |
| 239 | putIntoSuperObject(object); |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 240 | } |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 241 | ParseTree parentsFirstChild = node.getParent().getChild(0); |
| 242 | if (getNodeCat(parentsFirstChild).endsWith("#")) { |
| 243 | variableReferences.put(getNodeCat(parentsFirstChild).replaceAll("#", ""), object); |
| 244 | } |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 245 | variableReferences.put(variableCounter.toString(), object); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 246 | variableCounter++; |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 247 | System.out.println(variableReferences); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 248 | } |
| 249 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 250 | |
| Joachim Bingel | 77ac5cb | 2014-04-22 14:03:28 +0000 | [diff] [blame] | 251 | objectsToPop.push(stackedObjects); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 252 | |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 253 | /* |
| 254 | **************************************************************** |
| 255 | **************************************************************** |
| 256 | * recursion until 'request' node (root of tree) is processed * |
| 257 | **************************************************************** |
| 258 | **************************************************************** |
| 259 | */ |
| 260 | for (int i=0; i<node.getChildCount(); i++) { |
| 261 | ParseTree child = node.getChild(i); |
| 262 | processNode(child); |
| 263 | } |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 264 | |
| Joachim Bingel | 77ac5cb | 2014-04-22 14:03:28 +0000 | [diff] [blame] | 265 | /* |
| 266 | ************************************************************** |
| 267 | * Stuff that happens after processing the children of a node * |
| 268 | ************************************************************** |
| 269 | */ |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 270 | if (!objectsToPop.isEmpty()) { |
| 271 | for (int i=0; i<objectsToPop.pop(); i++) { |
| 272 | objectStack.pop(); |
| 273 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 274 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 275 | openNodeCats.pop(); |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 276 | } |
| 277 | |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 278 | |
| 279 | |
| 280 | /** |
| 281 | * Processes an operand node, creating a map for the operand containing all its information |
| 282 | * given in the node definition (referenced via '#'). If this node has been referred to and used earlier, |
| 283 | * a korap:reference is created in its place. |
| 284 | * The operand will be wrapped in a class group if necessary. |
| 285 | * @param operandTree |
| 286 | * @return A map object with the appropriate CQLF representation of the operand |
| 287 | */ |
| 288 | private LinkedHashMap<String, Object> retrieveOperand(ParseTree operandTree) { |
| 289 | LinkedHashMap<String, Object> operand = null; |
| 290 | if (!getNodeCat(operandTree.getChild(0)).equals("variableExpr")) { |
| 291 | String ref = operandTree.getChild(0).toStringTree(parser).substring(1); |
| 292 | operand = variableReferences.get(ref); |
| 293 | if (nodeReferencesTotal.get(ref) > 1) { |
| 294 | if (nodeReferencesProcessed.get(ref)==0) { |
| 295 | refClassMapping.put(ref, classCounter); |
| 296 | operand = wrapInClass(operand, classCounter++); |
| 297 | nodeReferencesProcessed.put(ref, nodeReferencesProcessed.get(ref)+1); |
| 298 | } else if (nodeReferencesProcessed.get(ref)>0 && nodeReferencesTotal.get(ref)>1) { |
| 299 | try { |
| 300 | operand = wrapInReference(operandStack.pop(), refClassMapping.get(ref)); |
| 301 | } catch (NoSuchElementException e) { |
| 302 | operand = makeReference(refClassMapping.get(ref)); |
| 303 | } |
| 304 | } |
| 305 | } |
| 306 | } |
| 307 | return operand; |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 308 | } |
| 309 | |
| 310 | @SuppressWarnings("unchecked") |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 311 | private void processN_ary_linguistic_term(ParseTree node) { |
| 312 | relationCounter++; |
| 313 | // get operator and determine type of group (sequence/treeRelation/relation/...) |
| 314 | // It's possible in Annis QL to concatenate operators, so there may be several operators under one n_ary_linguistic_term node. |
| 315 | // Counter 'i' will iteratively point to all operator nodes (odd-numbered) under this node. |
| 316 | for (int i=1; i<node.getChildCount(); i = i+2) { |
| 317 | ParseTree operandTree1 = node.getChild(i-1); |
| 318 | ParseTree operandTree2 = node.getChild(i+1); |
| 319 | String reltype = getNodeCat(node.getChild(i).getChild(0)); |
| 320 | |
| 321 | LinkedHashMap<String,Object> group = null; |
| 322 | ArrayList<Object> operands = null; |
| 323 | // Retrieve operands. |
| 324 | LinkedHashMap<String, Object> operand1 = retrieveOperand(operandTree1); |
| 325 | LinkedHashMap<String, Object> operand2 = retrieveOperand(operandTree2); |
| 326 | |
| 327 | // 'Proper' n_ary_linguistic_operators receive a considerably different serialisation than 'commonparent' and 'commonancestor'. |
| 328 | // For the latter cases, a dummy span is introduced and declared as a span class that has a dominance relation towards |
| 329 | // the two operands, one after the other, thus resulting in two nested relations! A Poliqarp+ equivalent for A $ B would be |
| 330 | // contains(focus(1:contains({1:<>},A)), B). |
| 331 | // This is modeled here... |
| 332 | if (reltype.equals("commonparent") || reltype.equals("commonancestor")) { |
| 333 | // make an (outer) group and an inner group containing the dummy node or previous relations |
| 334 | group = makeGroup("relation"); |
| 335 | LinkedHashMap<String,Object> innerGroup = makeGroup("relation"); |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 336 | LinkedHashMap<String,Object> relation = makeRelation(); |
| 337 | LinkedHashMap<String,Object> term = makeTerm(); |
| 338 | term.put("layer", "c"); |
| 339 | relation.put("wrap", term); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 340 | // commonancestor is an indirect commonparent relation |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 341 | if (reltype.equals("commonancestor")) relation.put("boundary", makeBoundary(1, null)); |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 342 | group.put("relation", relation); |
| 343 | innerGroup.put("relation", relation); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 344 | // Get operands list before possible re-assignment of 'group' (see following 'if') |
| 345 | ArrayList<Object> outerOperands = (ArrayList<Object>) group.get("operands"); |
| 346 | ArrayList<Object> innerOperands = (ArrayList<Object>) innerGroup.get("operands"); |
| 347 | // for lowest level, add the underspecified node as first operand and wrap it in a class group |
| 348 | if (i == 1) { |
| 349 | innerOperands.add(wrapInClass(makeSpan(), classCounter)); |
| 350 | // add the first operand and wrap the whole group in a focusing reference |
| 351 | innerOperands.add(operand1); |
| 352 | innerGroup = wrapInReference(innerGroup, classCounter); |
| 353 | outerOperands.add(innerGroup); |
| 354 | } else { |
| 355 | outerOperands.add(operandStack.pop()); |
| 356 | } |
| 357 | // Lookahead: if next operator is not commonparent or commonancestor, wrap in class for accessibility |
| 358 | if (i < node.getChildCount()-2 && !getNodeCat(node.getChild(i+2).getChild(0)).startsWith("common")) { |
| 359 | operand2 = wrapInClass(operand2, ++classCounter); |
| 360 | } |
| 361 | outerOperands.add(operand2); |
| 362 | |
| 363 | // Wrap in another reference object in case other relations are following |
| 364 | if (i < node.getChildCount()-2) { |
| 365 | group = wrapInReference(group, classCounter); |
| 366 | } |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 367 | // All other n-ary linguistic relations have special 'relation' attributes defined in CQLF and can be |
| 368 | // handled more easily... |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 369 | } else { |
| 370 | LinkedHashMap<String, Object> operatorGroup = parseOperatorNode(node.getChild(i).getChild(0)); |
| 371 | String groupType; |
| 372 | try { |
| 373 | groupType = (String) operatorGroup.get("groupType"); |
| 374 | } catch (ClassCastException | NullPointerException n) { |
| 375 | groupType = "relation"; |
| 376 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 377 | if (groupType.equals("relation") || groupType.equals("treeRelation")) { |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 378 | group = makeGroup(groupType); |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 379 | LinkedHashMap<String, Object> relation = new LinkedHashMap<String, Object>(); |
| 380 | putAllButGroupType(relation, operatorGroup); |
| 381 | System.err.println(relation); |
| 382 | group.put("relation", relation); |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 383 | } else if (groupType.equals("sequence")) { |
| 384 | group = makeGroup(groupType); |
| 385 | putAllButGroupType(group, operatorGroup); |
| 386 | } else if (groupType.equals("position")) { |
| 387 | group = new LinkedHashMap<String,Object>(); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 388 | putAllButGroupType(group, operatorGroup); |
| 389 | } |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 390 | |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 391 | // Get operands list before possible re-assignment of 'group' (see following 'if') |
| 392 | operands = (ArrayList<Object>) group.get("operands"); |
| 393 | // Wrap in reference object in case other relations are following |
| 394 | if (i < node.getChildCount()-2) { |
| 395 | group = wrapInReference(group, classCounter); |
| 396 | } |
| 397 | |
| 398 | // Inject operands. |
| 399 | // -> Case distinction: |
| 400 | if (node.getChildCount()==3) { |
| 401 | // Things are easy when there's just one operator (thus 3 children incl. operands)... |
| 402 | if (operand1 != null) operands.add(operand1); |
| 403 | if (operand2 != null) operands.add(operand2); |
| 404 | } else { |
| 405 | // ... but things get a little more complicated here. The AST is of this form: (operand1 operator 1 operand2 operator2 operand3 operator3 ...) |
| 406 | // but we'll have to serialize it in a nested, binary way: (((operand1 operator1 operand2) operator2 operand3) operator3 ...) |
| 407 | // the following code will do just that: |
| 408 | if (i == 1) { |
| 409 | // for the first operator, include both operands |
| 410 | if (operand1 != null) operands.add(operand1); |
| 411 | if (operand2 != null) operands.add(wrapInClass(operand2, classCounter++)); |
| 412 | // Don't put this into the super object directly but store on operandStack |
| 413 | // (because this group will have to be an operand of a subsequent operator) |
| 414 | operandStack.push(group); |
| 415 | // for all subsequent operators, only take the 2nd operand (first was already added by previous operator) |
| 416 | } else if (i < node.getChildCount()-2) { |
| 417 | // for all intermediate operators, include other previous groups and 2nd operand. Store this on the operandStack, too. |
| 418 | if (operand2 != null) operands.add(wrapInClass(operand2, classCounter++)); |
| 419 | operands.add(0, operandStack.pop()); |
| 420 | operandStack.push(group); |
| 421 | } else if (i == node.getChildCount()-2) { |
| 422 | // This is the last operator. Include 2nd operand only |
| 423 | if (operand2 != null) operands.add(operand2); |
| 424 | } |
| 425 | } |
| 426 | } |
| 427 | // Final step: decide what to do with the 'group' object, depending on whether all relations have been processed |
| 428 | if (i == node.getChildCount()-2 && relationCounter == totalRelationCount) { |
| 429 | putIntoSuperObject(group); |
| 430 | if (!operandStack.isEmpty()) { |
| 431 | operands.add(0, operandStack.pop()); |
| 432 | } |
| 433 | objectStack.push(group); |
| 434 | stackedObjects++; |
| 435 | } else { |
| 436 | operandStack.push(group); |
| 437 | } |
| 438 | } |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 439 | } |
| 440 | |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 441 | |
| 442 | |
| Joachim Bingel | ca4944e | 2014-06-13 13:55:10 +0000 | [diff] [blame] | 443 | /** |
| 444 | * Parses a unary_linguistic_operator node. Possible operators are: root, arity, tokenarity. |
| 445 | * Operators are embedded into a korap:term, in turn wrapped by an 'attr' property in a korap:span. |
| 446 | * @param node The unary_linguistic_operator node |
| 447 | * @return A map containing the attr key, to be inserted into korap:span |
| 448 | */ |
| 449 | private LinkedHashMap<String, Object> parseUnaryOperator(ParseTree node) { |
| 450 | LinkedHashMap<String, Object> attr = new LinkedHashMap<String, Object>(); |
| 451 | LinkedHashMap<String, Object> term = makeTerm(); |
| 452 | String op = node.getChild(1).toStringTree(parser).substring(1); |
| 453 | if (op.equals("arity") || op.equals("tokenarity")) { |
| 454 | LinkedHashMap<String, Object> boundary = boundaryFromRangeSpec(node.getChild(3), false); |
| 455 | term.put(op, boundary); |
| 456 | } else { |
| 457 | term.put(op, true); |
| 458 | } |
| Joachim Bingel | ca4944e | 2014-06-13 13:55:10 +0000 | [diff] [blame] | 459 | attr.put("attr", term); |
| 460 | return attr; |
| 461 | } |
| 462 | |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 463 | private LinkedHashMap<String, Object> parseOperatorNode(ParseTree operatorNode) { |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 464 | LinkedHashMap<String, Object> relation = null; |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 465 | String operator = getNodeCat(operatorNode); |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 466 | // DOMINANCE |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 467 | if (operator.equals("dominance")) { |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 468 | relation = makeRelation(); |
| Joachim Bingel | 13421ef | 2014-05-07 16:22:58 +0000 | [diff] [blame] | 469 | relation.put("groupType", "relation"); |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 470 | ParseTree leftChildSpec = getFirstChildWithCat(operatorNode, "@l"); |
| 471 | ParseTree rightChildSpec = getFirstChildWithCat(operatorNode, "@r"); |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 472 | ParseTree qName = getFirstChildWithCat(operatorNode, "qName"); |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 473 | ParseTree edgeSpec = getFirstChildWithCat(operatorNode, "edgeSpec"); |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 474 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| 475 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, "rangeSpec"); |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 476 | LinkedHashMap<String,Object> term = makeTerm(); |
| 477 | term.put("layer", "c"); |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 478 | if (leftChildSpec != null) relation.put("index", 0); |
| 479 | if (rightChildSpec != null) relation.put("index", -1); |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 480 | if (qName != null) term = parseQNameNode(qName); |
| 481 | if (edgeSpec != null) term.putAll(parseEdgeSpec(edgeSpec)); |
| 482 | if (star != null) relation.put("boundary", makeBoundary(0, null)); |
| Joachim Bingel | 86195a5 | 2014-07-08 14:29:48 +0000 | [diff] [blame] | 483 | if (rangeSpec != null) relation.put("boundary", boundaryFromRangeSpec(rangeSpec)); |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 484 | relation.put("wrap", term); |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 485 | } |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 486 | else if (operator.equals("pointing")) { |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 487 | // String reltype = operatorNode.getChild(1).toStringTree(parser); |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 488 | relation = makeRelation(); |
| Joachim Bingel | 13421ef | 2014-05-07 16:22:58 +0000 | [diff] [blame] | 489 | relation.put("groupType", "relation"); |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 490 | ParseTree qName = getFirstChildWithCat(operatorNode, "qName"); |
| 491 | ParseTree edgeSpec = getFirstChildWithCat(operatorNode, "edgeSpec"); |
| 492 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| 493 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, "rangeSpec"); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 494 | // if (qName != null) relation.putAll(parseQNameNode(qName)); |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 495 | LinkedHashMap<String,Object> term = makeTerm(); |
| 496 | if (qName != null) term.putAll(parseQNameNode(qName)); |
| 497 | if (edgeSpec != null) term.putAll(parseEdgeSpec(edgeSpec)); |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 498 | if (star != null) relation.put("boundary", makeBoundary(0, null)); |
| Joachim Bingel | 86195a5 | 2014-07-08 14:29:48 +0000 | [diff] [blame] | 499 | if (rangeSpec != null) relation.put("boundary", boundaryFromRangeSpec(rangeSpec)); |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 500 | relation.put("wrap", term); |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 501 | } |
| 502 | else if (operator.equals("precedence")) { |
| Joachim Bingel | 13421ef | 2014-05-07 16:22:58 +0000 | [diff] [blame] | 503 | relation = new LinkedHashMap<String, Object>(); |
| 504 | relation.put("groupType", "sequence"); |
| 505 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, "rangeSpec"); |
| 506 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| 507 | ArrayList<Object> distances = new ArrayList<Object>(); |
| 508 | if (star != null) { |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 509 | distances.add(makeDistance("w", 0, null)); |
| Joachim Bingel | 13421ef | 2014-05-07 16:22:58 +0000 | [diff] [blame] | 510 | relation.put("distances", distances); |
| 511 | } |
| 512 | if (rangeSpec != null) { |
| 513 | distances.add(parseDistance(rangeSpec)); |
| 514 | relation.put("distances", distances); |
| 515 | } |
| 516 | relation.put("inOrder", true); |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 517 | } |
| 518 | else if (operator.equals("spanrelation")) { |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 519 | // relation = makeGroup("position"); |
| 520 | // relation.put("groupType", "position"); |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 521 | String reltype = operatorNode.getChild(0).toStringTree(parser); |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 522 | String[] frames = new String[]{}; |
| 523 | String[] sharedClasses = new String[]{"sharedClasses:includes"}; |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 524 | switch (reltype) { |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 525 | case "_=_": |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 526 | frames = new String[]{"frame:matches"}; |
| 527 | sharedClasses = new String[]{"sharedClasses:equals"}; |
| 528 | break; |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 529 | case "_l_": |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 530 | frames = new String[]{"frame:startswith"}; |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 531 | break; |
| 532 | case "_r_": |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 533 | frames = new String[]{"frame:endswith"}; |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 534 | break; |
| 535 | case "_i_": |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 536 | frames = new String[]{"frame:contains"};break; |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 537 | case "_o_": |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 538 | frames = new String[]{"frame:overlapsLeft", "frame:overlapsRight"}; |
| 539 | sharedClasses = new String[]{"sharedClasses:intersects"}; |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 540 | break; |
| 541 | case "_ol_": |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 542 | frames = new String[]{"frame:overlapsLeft"}; |
| 543 | sharedClasses = new String[]{"sharedClasses:intersects"}; |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 544 | break; |
| 545 | case "_or_": |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 546 | frames = new String[]{"frame:overlapsRight"}; |
| 547 | sharedClasses = new String[]{"sharedClasses:intersects"}; |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 548 | break; |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 549 | } |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 550 | // relation.put("frames", frames); |
| 551 | // relation.put("sharedClasses", sharedClasses); |
| 552 | relation = makePosition(frames, sharedClasses); |
| 553 | relation.put("groupType", "position"); |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 554 | } |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 555 | else if (operator.equals("identity")) { |
| Joachim Bingel | c89952e | 2014-06-06 12:08:42 +0000 | [diff] [blame] | 556 | //TODO |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 557 | } |
| 558 | else if (operator.equals("equalvalue")) { |
| Joachim Bingel | c89952e | 2014-06-06 12:08:42 +0000 | [diff] [blame] | 559 | //TODO |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 560 | } |
| 561 | else if (operator.equals("notequalvalue")) { |
| Joachim Bingel | c89952e | 2014-06-06 12:08:42 +0000 | [diff] [blame] | 562 | //TODO |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 563 | } |
| 564 | return relation; |
| 565 | } |
| 566 | |
| Joachim Bingel | ceb7906 | 2014-09-22 11:50:37 +0000 | [diff] [blame] | 567 | @SuppressWarnings("unchecked") |
| Joachim Bingel | 6289241 | 2014-08-06 15:18:59 +0000 | [diff] [blame] | 568 | private LinkedHashMap<String,Object> parseEdgeSpec(ParseTree edgeSpec) { |
| 569 | List<ParseTree> annos = getChildrenWithCat(edgeSpec, "edgeAnno"); |
| 570 | if (annos.size() == 1) return parseEdgeAnno(annos.get(0)); |
| 571 | else { |
| 572 | LinkedHashMap<String,Object> termGroup = makeTermGroup("and"); |
| 573 | ArrayList<Object> operands = (ArrayList<Object>) termGroup.get("operands"); |
| 574 | for (ParseTree anno : annos) { |
| 575 | operands.add(parseEdgeAnno(anno)); |
| 576 | } |
| 577 | return termGroup; |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 578 | } |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 579 | } |
| 580 | |
| 581 | private LinkedHashMap<String, Object> parseEdgeAnno( |
| 582 | ParseTree edgeAnnoSpec) { |
| 583 | LinkedHashMap<String, Object> edgeAnno = new LinkedHashMap<String, Object>(); |
| Joachim Bingel | a07b8e7 | 2014-05-09 15:06:07 +0000 | [diff] [blame] | 584 | edgeAnno.put("@type", "korap:term"); |
| Joachim Bingel | 639df9c | 2014-05-12 15:27:00 +0000 | [diff] [blame] | 585 | ParseTree qNameNode = edgeAnnoSpec.getChild(0); |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 586 | ParseTree matchOperatorNode = edgeAnnoSpec.getChild(1); |
| 587 | ParseTree textSpecNode = edgeAnnoSpec.getChild(2); |
| Joachim Bingel | 639df9c | 2014-05-12 15:27:00 +0000 | [diff] [blame] | 588 | ParseTree layerNode = getFirstChildWithCat(qNameNode, "layer"); |
| 589 | ParseTree foundryNode = getFirstChildWithCat(qNameNode, "foundry"); |
| 590 | if (foundryNode!=null) edgeAnno.put("foundry", foundryNode.getChild(0).toStringTree(parser)); |
| 591 | if (layerNode!=null) edgeAnno.put("layer", layerNode.getChild(0).toStringTree(parser)); |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 592 | edgeAnno.putAll(parseTextSpec(textSpecNode)); |
| 593 | edgeAnno.put("match", parseMatchOperator(matchOperatorNode)); |
| 594 | return edgeAnno; |
| 595 | } |
| 596 | |
| Joachim Bingel | a07b8e7 | 2014-05-09 15:06:07 +0000 | [diff] [blame] | 597 | private LinkedHashMap<String, Object> boundaryFromRangeSpec(ParseTree rangeSpec) { |
| Joachim Bingel | ca4944e | 2014-06-13 13:55:10 +0000 | [diff] [blame] | 598 | return boundaryFromRangeSpec(rangeSpec, true); |
| 599 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 600 | |
| Joachim Bingel | ca4944e | 2014-06-13 13:55:10 +0000 | [diff] [blame] | 601 | private LinkedHashMap<String, Object> boundaryFromRangeSpec(ParseTree rangeSpec, boolean expandToMax) { |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 602 | Integer min = Integer.parseInt(rangeSpec.getChild(0).toStringTree(parser)); |
| Joachim Bingel | ca4944e | 2014-06-13 13:55:10 +0000 | [diff] [blame] | 603 | Integer max = min; |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 604 | if (expandToMax) max = null; |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 605 | if (rangeSpec.getChildCount()==3) |
| 606 | max = Integer.parseInt(rangeSpec.getChild(2).toStringTree(parser)); |
| 607 | return makeBoundary(min, max); |
| 608 | } |
| Joachim Bingel | a07b8e7 | 2014-05-09 15:06:07 +0000 | [diff] [blame] | 609 | |
| Joachim Bingel | 13421ef | 2014-05-07 16:22:58 +0000 | [diff] [blame] | 610 | private LinkedHashMap<String, Object> parseDistance(ParseTree rangeSpec) { |
| 611 | Integer min = Integer.parseInt(rangeSpec.getChild(0).toStringTree(parser)); |
| Joachim Bingel | 687e4d4 | 2014-07-30 09:34:18 +0000 | [diff] [blame] | 612 | Integer max = null; |
| Joachim Bingel | 13421ef | 2014-05-07 16:22:58 +0000 | [diff] [blame] | 613 | if (rangeSpec.getChildCount()==3) |
| 614 | max = Integer.parseInt(rangeSpec.getChild(2).toStringTree(parser)); |
| 615 | return makeDistance("w", min, max); |
| 616 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 617 | |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 618 | private LinkedHashMap<String, Object> parseTextSpec(ParseTree node) { |
| 619 | LinkedHashMap<String, Object> term = new LinkedHashMap<String, Object>(); |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 620 | if (hasChild(node, "regex")) { |
| 621 | term.put("type", "type:regex"); |
| 622 | term.put("key", node.getChild(0).getChild(0).toStringTree(parser).replaceAll("/", "")); |
| 623 | } else { |
| 624 | term.put("key", node.getChild(1).toStringTree(parser)); |
| 625 | } |
| 626 | term.put("match", "match:eq"); |
| 627 | return term; |
| 628 | } |
| 629 | |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 630 | /** |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 631 | * Parses the match operator (= or !=) |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 632 | * @param node |
| 633 | * @return |
| 634 | */ |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 635 | private String parseMatchOperator(ParseTree node) { |
| 636 | return node.toStringTree(parser).equals("=") ? "match:eq" : "match:ne"; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 637 | } |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 638 | |
| 639 | private LinkedHashMap<String, Object> parseQNameNode(ParseTree node) { |
| 640 | LinkedHashMap<String, Object> fields = new LinkedHashMap<String, Object>(); |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 641 | ParseTree layerNode = getFirstChildWithCat(node, "layer"); |
| 642 | ParseTree foundryNode = getFirstChildWithCat(node, "foundry"); |
| 643 | if (foundryNode != null) fields.put("foundry", foundryNode.getChild(0).toStringTree(parser)); |
| 644 | fields.put("layer", layerNode.getChild(0).toStringTree(parser)); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 645 | return fields; |
| 646 | } |
| 647 | |
| Joachim Bingel | 77ac5cb | 2014-04-22 14:03:28 +0000 | [diff] [blame] | 648 | private void putIntoSuperObject(LinkedHashMap<String, Object> object) { |
| 649 | putIntoSuperObject(object, 0); |
| 650 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 651 | |
| Joachim Bingel | 77ac5cb | 2014-04-22 14:03:28 +0000 | [diff] [blame] | 652 | @SuppressWarnings({ "unchecked" }) |
| 653 | private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) { |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 654 | if (objectStack.size()>objStackPosition) { |
| Joachim Bingel | 77ac5cb | 2014-04-22 14:03:28 +0000 | [diff] [blame] | 655 | ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands"); |
| Joachim Bingel | 7503879 | 2014-05-19 15:12:23 +0000 | [diff] [blame] | 656 | if (!invertedOperandsLists.contains(topObjectOperands)) { |
| 657 | topObjectOperands.add(object); |
| 658 | } else { |
| 659 | topObjectOperands.add(0, object); |
| 660 | } |
| Joachim Bingel | 77ac5cb | 2014-04-22 14:03:28 +0000 | [diff] [blame] | 661 | } else { |
| 662 | requestMap.put("query", object); |
| 663 | } |
| 664 | } |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 665 | |
| Joachim Bingel | 13421ef | 2014-05-07 16:22:58 +0000 | [diff] [blame] | 666 | private void putAllButGroupType(Map<String, Object> container, Map<String, Object> input) { |
| 667 | for (String key : input.keySet()) { |
| 668 | if (!key.equals("groupType")) { |
| 669 | container.put(key, input.get(key)); |
| 670 | } |
| 671 | } |
| 672 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 673 | |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 674 | private ParserRuleContext parseAnnisQuery (String p) throws QueryException { |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 675 | Lexer poliqarpLexer = new AqlLexer((CharStream)null); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 676 | ParserRuleContext tree = null; |
| 677 | // Like p. 111 |
| 678 | try { |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 679 | |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 680 | // Tokenize input data |
| 681 | ANTLRInputStream input = new ANTLRInputStream(p); |
| 682 | poliqarpLexer.setInputStream(input); |
| 683 | CommonTokenStream tokens = new CommonTokenStream(poliqarpLexer); |
| 684 | parser = new AqlParser(tokens); |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 685 | |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 686 | // Don't throw out erroneous stuff |
| 687 | parser.setErrorHandler(new BailErrorStrategy()); |
| 688 | parser.removeErrorListeners(); |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 689 | |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 690 | // Get starting rule from parser |
| 691 | Method startRule = AqlParser.class.getMethod("start"); |
| 692 | tree = (ParserRuleContext) startRule.invoke(parser, (Object[])null); |
| 693 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 694 | |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 695 | // Some things went wrong ... |
| 696 | catch (Exception e) { |
| 697 | log.error(e.getMessage()); |
| 698 | System.err.println( e.getMessage() ); |
| 699 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 700 | |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 701 | if (tree == null) { |
| 702 | log.error("Could not parse query. Make sure it is correct ANNIS QL syntax."); |
| 703 | throw new QueryException("Could not parse query. Make sure it is correct ANNIS QL syntax."); |
| 704 | } |
| 705 | |
| 706 | // Return the generated tree |
| 707 | return tree; |
| 708 | } |
| 709 | |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 710 | public static void main(String[] args) { |
| 711 | /* |
| 712 | * For testing |
| 713 | */ |
| 714 | String[] queries = new String[] { |
| Joachim Bingel | aaabb72 | 2014-09-24 14:29:10 +0000 | [diff] [blame] | 715 | // "cat=\"NP\" & cat=\"VP\" & #1 $ #2 ", |
| 716 | // "Haus", |
| 717 | // "lemma=\"Haus\"", |
| 718 | // "Katze=\"Hund\"", |
| 719 | // "cnx/c=\"NP\"", |
| 720 | // "cat=\"NP\"", |
| 721 | // "node & node & #1 .+ #2", |
| 722 | // " #1 > #2 & cnx/cat=\"VP\" & cnx/cat=\"NP\"", |
| 723 | // "\"Mann\" & node & #2 >[cat=\"NP\"] #1", |
| 724 | // "node & node & #2 ->coref[val=\"true\"] #1", |
| 725 | // "cat=\"NP\" & cat=\"VP\" & cat=\"PP\" & #1 $ #2 > #3", |
| 726 | // "tok=\"Mann\" & tok=\"geht\" & #1 .* #2", |
| 727 | // "\"Sonne\"", |
| Joachim Bingel | e6d73b1 | 2014-09-30 15:34:59 +0000 | [diff] [blame] | 728 | // "\"so\" & ( \"nicht\" | \"doch\" ) & #1 .1,6 #2", |
| 729 | // |
| 730 | // "NP#cat=\"NP\" & PP1#cat=\"PP\" . PP2#cat=\"PP\" & #NP > #PP1 & #NP > #PP2 ", |
| 731 | // "cat=\"NP\" > cat=\"VP\" & #1 _l_ #2", |
| 732 | // "cat=\"NP\" > cat=\"VP\" & #1 . tok=\"foo\"", |
| 733 | "cat=\"NP\" & cat=\"VP\" & #1 > #2 & #1 _l_ #2", |
| Joachim Bingel | c9c0cf9 | 2014-10-02 12:03:59 +0000 | [diff] [blame^] | 734 | "tok" |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 735 | }; |
| 736 | // AqlTree.verbose=true; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 737 | for (String q : queries) { |
| 738 | try { |
| 739 | System.out.println(q); |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 740 | AqlTree at = new AqlTree(q); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 741 | System.out.println(at.parseAnnisQuery(q).toStringTree(at.parser)); |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 742 | System.out.println(); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 743 | |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 744 | } catch (NullPointerException | QueryException npe) { |
| 745 | npe.printStackTrace(); |
| 746 | } |
| 747 | } |
| 748 | } |
| 749 | |
| 750 | } |