| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.serialize; |
| 2 | |
| 3 | import java.lang.reflect.Method; |
| 4 | import java.util.ArrayList; |
| Joachim Bingel | 7ee0786 | 2014-04-28 15:22:41 +0000 | [diff] [blame] | 5 | import java.util.Arrays; |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 6 | import java.util.HashMap; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 7 | import java.util.LinkedList; |
| 8 | import java.util.List; |
| 9 | import java.util.Map; |
| Joachim Bingel | fb9d5fd | 2014-06-25 09:32:43 +0000 | [diff] [blame] | 10 | import java.util.NoSuchElementException; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 11 | |
| 12 | import org.antlr.v4.runtime.ANTLRInputStream; |
| 13 | import org.antlr.v4.runtime.BailErrorStrategy; |
| 14 | import org.antlr.v4.runtime.CharStream; |
| 15 | import org.antlr.v4.runtime.CommonTokenStream; |
| 16 | import org.antlr.v4.runtime.Lexer; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 17 | import org.antlr.v4.runtime.ParserRuleContext; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 18 | import org.antlr.v4.runtime.tree.ParseTree; |
| Joachim Bingel | c63f781 | 2014-07-30 09:12:25 +0000 | [diff] [blame] | 19 | import org.slf4j.Logger; |
| margaretha | d7e75b5 | 2017-01-20 13:52:28 +0100 | [diff] [blame] | 20 | import org.slf4j.LoggerFactory; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 21 | |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 22 | import de.ids_mannheim.korap.query.object.KoralFrame; |
| 23 | import de.ids_mannheim.korap.query.object.KoralOperation; |
| 24 | import de.ids_mannheim.korap.query.object.KoralTermGroupRelation; |
| Joachim Bingel | 6003b85 | 2014-12-18 14:20:55 +0000 | [diff] [blame] | 25 | import de.ids_mannheim.korap.query.parse.annis.AqlLexer; |
| 26 | import de.ids_mannheim.korap.query.parse.annis.AqlParser; |
| Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame] | 27 | import de.ids_mannheim.korap.query.serialize.util.Antlr4DescriptiveErrorListener; |
| Joachim Bingel | aa4ab2f | 2015-01-16 14:26:51 +0000 | [diff] [blame] | 28 | import de.ids_mannheim.korap.query.serialize.util.KoralObjectGenerator; |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 29 | import de.ids_mannheim.korap.query.serialize.util.StatusCodes; |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 30 | |
| 31 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 32 | * Processor class for ANNIS QL queries. This class uses an ANTLR v4 |
| 33 | * grammar |
| 34 | * for query parsing, it therefore extends |
| 35 | * {@link Antlr4AbstractQueryProcessor}. |
| 36 | * The parser object is inherited from the parent class and |
| 37 | * instantiated in {@link #parseAnnisQuery(String)} as an |
| 38 | * {@link AqlParser}. |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 39 | * |
| 40 | * @see http://annis-tools.org/aql.html |
| 41 | * |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 42 | * @author Joachim Bingel (bingel@ids-mannheim.de) |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 43 | * @author Eliza Margaretha (margaretha@ids-mannheim.de) |
| Joachim Bingel | 7cb346e | 2015-03-09 10:56:20 +0100 | [diff] [blame] | 44 | * @version 0.3.0 |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 45 | * @since 0.1.0 |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 46 | */ |
| Joachim Bingel | 1faf8a5 | 2015-01-09 13:17:34 +0000 | [diff] [blame] | 47 | public class AnnisQueryProcessor extends Antlr4AbstractQueryProcessor { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 48 | private static Logger log = LoggerFactory |
| 49 | .getLogger(AnnisQueryProcessor.class); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 50 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 51 | * Flag that indicates whether token fields or meta fields are |
| 52 | * currently |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 53 | * being processed |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 54 | */ |
| 55 | boolean inMeta = false; |
| 56 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 57 | * Keeps track of operands that are to be integrated into yet |
| 58 | * uncreated |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 59 | * objects. |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 60 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 61 | LinkedList<Map<String, Object>> operandStack = new LinkedList<Map<String, Object>>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 62 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 63 | * Keeps track of explicitly (by #-var definition) or implicitly |
| 64 | * (number |
| 65 | * as reference) introduced entities (for later reference by |
| 66 | * #-operator) |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 67 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 68 | Map<String, Map<String, Object>> nodeVariables = new HashMap<String, Map<String, Object>>(); |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 69 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 70 | * Keeps track of explicitly (by #-var definition) or implicitly |
| 71 | * (number |
| 72 | * as reference) introduced entities (for later reference by |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 73 | * #-operator)s |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 74 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 75 | Map<ParseTree, String> nodes2refs = new HashMap<ParseTree, String>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 76 | /** |
| 77 | * Counter for variable definitions. |
| 78 | */ |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 79 | Integer variableCount = 1; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 80 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 81 | * Marks the currently active token in order to know where to add |
| 82 | * flags |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 83 | * (might already have been taken away from token stack). |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 84 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 85 | Map<String, Object> curToken = new HashMap<String, Object>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 86 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 87 | * Keeps track of operands lists that are to be serialised in an |
| 88 | * inverted |
| 89 | * order (e.g. the IN() operator) compared to their AST |
| 90 | * representation. |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 91 | */ |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 92 | private LinkedList<ArrayList<Object>> invertedOperandsLists = new LinkedList<ArrayList<Object>>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 93 | /** |
| 94 | * Keeps track of operation:class numbers. |
| 95 | */ |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 96 | int classCounter = 1; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 97 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 98 | * Keeps track of numers of relations processed (important when |
| 99 | * dealing |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 100 | * with multiple predications). |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 101 | */ |
| 102 | int relationCounter = 0; |
| 103 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 104 | * Keeps track of references to nodes that are operands of groups |
| 105 | * (e.g. |
| 106 | * tree relations). Those nodes appear on the top level of the |
| 107 | * parse tree |
| 108 | * but are to be integrated into the AqlTree at a later point |
| 109 | * (namely as |
| 110 | * operands of the respective group). Therefore, store references |
| 111 | * to these |
| 112 | * nodes here and exclude the operands from being written into the |
| 113 | * query |
| 114 | * map individually. |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 115 | */ |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 116 | private int totalRelationCount = 0; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 117 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 118 | * Keeps a record of reference-class-mapping, i.e. which 'class' |
| 119 | * has been |
| 120 | * assigned to which #n reference. This is important when |
| 121 | * introducing |
| 122 | * koral:reference spans to refer back to previously established |
| 123 | * classes for |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 124 | * entities. |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 125 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 126 | private Map<String, Integer> refClassMapping = new HashMap<String, Integer>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 127 | /** |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 128 | * Keeps a record of unary relations on spans/tokens. |
| 129 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 130 | private Map<String, ArrayList<ParseTree>> unaryRelations = new HashMap<String, ArrayList<ParseTree>>(); |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 131 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 132 | * Keeps track of the number of references to a node/token by |
| 133 | * means of #n. |
| 134 | * E.g. in the query <tt>tok="x" & tok="y" & tok="z" & #1 . #2 & |
| 135 | * #2 . #3</tt>, |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 136 | * the 2nd token ("y") is referenced twice, the others once. |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 137 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 138 | private Map<String, Integer> nodeReferencesTotal = new HashMap<String, Integer>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 139 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 140 | * Keeps track of the number of references to a node/token that |
| 141 | * have |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 142 | * already been processed. |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 143 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 144 | private Map<String, Integer> nodeReferencesProcessed = new HashMap<String, Integer>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 145 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 146 | * Keeps track of queued relations. Relations sometimes cannot be |
| 147 | * processed |
| 148 | * directly, namely in case it does not share any operands with |
| 149 | * the |
| 150 | * previous relation. Then wait until a relation with a shared |
| 151 | * operand has |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 152 | * been processed. |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 153 | */ |
| 154 | private LinkedList<ParseTree> queuedRelations = new LinkedList<ParseTree>(); |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 155 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 156 | * For some objects, it may be decided in the initial scan |
| 157 | * ({@link #processAndTopExpr(ParseTree)} that they need to be |
| 158 | * wrapped in a |
| 159 | * class operation when retrieved later. This map stores this |
| 160 | * information. |
| 161 | * More precisely, it stores for every node in the tree which |
| 162 | * class ID its |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 163 | * derived KoralQuery object will receive. |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 164 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 165 | private Map<ParseTree, Integer> objectsToWrapInClass = new HashMap<ParseTree, Integer>(); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 166 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 167 | |
| 168 | public AnnisQueryProcessor (String query) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 169 | KoralObjectGenerator.setQueryProcessor(this); |
| 170 | process(query); |
| 171 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 172 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 173 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 174 | @Override |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 175 | public void process (String query) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 176 | ParseTree tree = parseAnnisQuery(query); |
| 177 | if (this.parser != null) { |
| 178 | super.parser = this.parser; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 179 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 180 | else { |
| 181 | throw new NullPointerException("Parser has not been instantiated!"); |
| 182 | } |
| 183 | log.info("Processing Annis query: " + query); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 184 | if (tree != null) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 185 | log.debug("ANTLR parse tree: " + tree.toStringTree(parser)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 186 | processNode(tree); |
| 187 | // Last check to see if all relations have left the queue |
| 188 | if (!queuedRelations.isEmpty()) { |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 189 | ParseTree queued = queuedRelations.pop(); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 190 | if (verbose) |
| 191 | System.out.println("Taking off queue (last rel): " |
| 192 | + queued.getText()); |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 193 | if (checkOperandsProcessedPreviously(queued)) { |
| 194 | processNode(queued); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 195 | } |
| 196 | else { |
| 197 | addError(StatusCodes.UNBOUND_ANNIS_RELATION, |
| 198 | "The relation " + queued.getText() |
| 199 | + " is not bound to any other relations."); |
| 200 | requestMap |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 201 | .put("query", new HashMap<String, Object>()); |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 202 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 203 | } |
| 204 | } |
| 205 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 206 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 207 | |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 208 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 209 | * Traverses the parse tree by recursively calling itself, |
| 210 | * starting with |
| 211 | * the root node of the tree and calling itself with the children |
| 212 | * of its |
| 213 | * current node in a depth-first, left-to-right fashion. In each |
| 214 | * call, |
| 215 | * depending on the category of the current node, special |
| 216 | * processor |
| 217 | * methods for the respective node category are called to process |
| 218 | * the node. |
| 219 | * |
| 220 | * @param node |
| 221 | * The node currently visited in the parse tree |
| 222 | * traversal. |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 223 | */ |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 224 | private void processNode (ParseTree node) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 225 | String nodeCat = getNodeCat(node); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 226 | // Top-down processing |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 227 | if (visited.contains(node)) |
| 228 | return; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 229 | openNodeCats.push(nodeCat); |
| 230 | stackedObjects = 0; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 231 | // Before doing anything else, check if any relations are queued |
| 232 | // and need to be processed first |
| 233 | if (nodeCat.equals("n_ary_linguistic_term")) { |
| 234 | if (!queuedRelations.isEmpty()) { |
| 235 | ParseTree queued = queuedRelations.getFirst(); |
| 236 | if (checkOperandsProcessedPreviously(queued)) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 237 | if (verbose) |
| 238 | System.out.println("Taking off queue: " |
| 239 | + queued.getText()); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 240 | queuedRelations.removeFirst(); |
| 241 | processNode(queued); |
| 242 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 243 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 244 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 245 | if (verbose) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 246 | System.err.println(" " + objectStack); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 247 | System.out.println(openNodeCats); |
| 248 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 249 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 250 | /* |
| 251 | **************************************************************** |
| 252 | **************************************************************** |
| 253 | * Processing individual node categories * |
| 254 | **************************************************************** |
| 255 | **************************************************************** |
| 256 | */ |
| 257 | if (nodeCat.equals("exprTop")) { |
| 258 | processExprTop(node); |
| 259 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 260 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 261 | if (nodeCat.equals("andTopExpr")) { |
| 262 | processAndTopExpr(node); |
| 263 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 264 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 265 | if (nodeCat.equals("n_ary_linguistic_term")) { |
| 266 | processN_ary_linguistic_term(node); |
| 267 | } |
| Joachim Bingel | dc03c00 | 2014-04-17 13:40:40 +0000 | [diff] [blame] | 268 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 269 | objectsToPop.push(stackedObjects); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 270 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 271 | /* |
| 272 | **************************************************************** |
| 273 | **************************************************************** |
| 274 | * recursion until 'request' node (root of tree) is processed * |
| 275 | **************************************************************** |
| 276 | **************************************************************** |
| 277 | */ |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 278 | for (int i = 0; i < node.getChildCount(); i++) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 279 | ParseTree child = node.getChild(i); |
| 280 | processNode(child); |
| 281 | } |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 282 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 283 | /* |
| 284 | ************************************************************** |
| 285 | * Stuff that happens after processing the children of a node * |
| 286 | ************************************************************** |
| 287 | */ |
| 288 | if (!objectsToPop.isEmpty()) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 289 | for (int i = 0; i < objectsToPop.pop(); i++) { |
| 290 | objectStack.pop(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 291 | } |
| 292 | } |
| 293 | openNodeCats.pop(); |
| 294 | } |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 295 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 296 | |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 297 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 298 | * Processes an <tt>andTopExpr</tt> node. This is a child of the |
| 299 | * root |
| 300 | * and contains a set of expressions connected by logical |
| 301 | * conjunction. |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 302 | * Several of these nodes are possibly connected via disjunction. |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 303 | * |
| 304 | * @param node |
| 305 | * The current parse tree node (must be of category |
| 306 | * <tt>andTopExpr</tt>). |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 307 | */ |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 308 | private void processAndTopExpr (ParseTree node) { |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 309 | // Before processing any child expr node, check if it has one or more |
| 310 | // "*ary_linguistic_term" nodes. |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 311 | // Those nodes may use references to earlier established operand nodes. |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 312 | // Those operand nodes are not to be included into the query map |
| 313 | // individually but naturally as operands of the relations/groups |
| 314 | // introduced by the node. For that purpose, this section mines all |
| 315 | // used references and stores them in a list for later reference. |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 316 | for (ParseTree unaryTermNode : getDescendantsWithCat(node, |
| 317 | "unary_linguistic_term")) { |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 318 | String ref = getNodeCat(unaryTermNode.getChild(0)).substring(1); |
| Joachim Bingel | ef0b5b0 | 2015-01-30 09:37:43 +0000 | [diff] [blame] | 319 | ArrayList<ParseTree> unaryTermsForRef = unaryRelations.get(ref); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 320 | if (unaryTermsForRef == null) |
| 321 | unaryTermsForRef = new ArrayList<ParseTree>(); |
| Joachim Bingel | ef0b5b0 | 2015-01-30 09:37:43 +0000 | [diff] [blame] | 322 | unaryTermsForRef.add(unaryTermNode); |
| 323 | unaryRelations.put(ref, unaryTermsForRef); |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 324 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 325 | for (ParseTree lingTermNode : getDescendantsWithCat(node, |
| 326 | "n_ary_linguistic_term")) { |
| 327 | for (ParseTree refOrNode : getChildrenWithCat(lingTermNode, |
| 328 | "refOrNode")) { |
| 329 | String refOrNodeString = refOrNode.getChild(0).toStringTree( |
| 330 | parser); |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 331 | if (refOrNodeString.startsWith("#")) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 332 | String ref = refOrNode.getChild(0).toStringTree(parser) |
| 333 | .substring(1); |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 334 | if (nodeReferencesTotal.containsKey(ref)) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 335 | nodeReferencesTotal.put(ref, |
| 336 | nodeReferencesTotal.get(ref) + 1); |
| 337 | } |
| 338 | else { |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 339 | nodeReferencesTotal.put(ref, 1); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 340 | nodeReferencesProcessed.put(ref, 0); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 341 | } |
| 342 | } |
| 343 | } |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 344 | totalRelationCount++; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 345 | } |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 346 | // Then, mine all object definitions. |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 347 | for (ParseTree variableExprNode : getDescendantsWithCat(node, |
| 348 | "variableExpr")) { |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 349 | String ref; |
| 350 | // might be a ref label rather than a counting number |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 351 | ParseTree varDef = getFirstChildWithCat( |
| 352 | variableExprNode.getParent(), "varDef"); |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 353 | if (varDef != null) { |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 354 | // remove trailing # |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 355 | ref = varDef.getText().replaceFirst("#", ""); |
| 356 | } |
| 357 | else { |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 358 | ref = variableCount.toString(); |
| 359 | } |
| 360 | nodes2refs.put(variableExprNode, ref); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 361 | Map<String, Object> object = processVariableExpr(variableExprNode); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 362 | nodeVariables.put(ref, object); |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 363 | variableCount++; |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 364 | // Check if this object definition is part of a "direct declaration |
| 365 | // relation", i.e. a relation which declares its operands directly |
| 366 | // rather than using references to earlier declared objects. These |
| 367 | // objects must still be available for later reference, handle this |
| 368 | // here. Direct declaration relation is present when grandparent is |
| 369 | // n_ary_linguistic_term node. |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 370 | if (getNodeCat(variableExprNode.getParent().getParent()).equals( |
| 371 | "n_ary_linguistic_term")) { |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 372 | if (nodeReferencesTotal.containsKey(ref)) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 373 | nodeReferencesTotal.put(ref, |
| 374 | nodeReferencesTotal.get(ref) + 1); |
| 375 | } |
| 376 | else { |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 377 | nodeReferencesTotal.put(ref, 1); |
| 378 | } |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 379 | // This is important for later relations wrapping the present |
| 380 | // relation. If the object isn't registered as processed, it |
| 381 | // won't be available for referencing. |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 382 | nodeReferencesProcessed.put(ref, 1); |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 383 | // Register this node for latter wrapping in class. |
| 384 | if (nodeReferencesTotal.get(ref) > 1) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 385 | refClassMapping.put(ref, classCounter + 128); |
| 386 | objectsToWrapInClass.put(variableExprNode, |
| 387 | 128 + classCounter++); |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 388 | } |
| 389 | } |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 390 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 391 | } |
| Joachim Bingel | d4ae5fd | 2014-04-29 15:00:16 +0000 | [diff] [blame] | 392 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 393 | |
| 394 | private void processExprTop (ParseTree node) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 395 | List<ParseTree> andTopExprs = getChildrenWithCat(node, "andTopExpr"); |
| 396 | if (andTopExprs.size() > 1) { |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 397 | Map<String, Object> topOr = KoralObjectGenerator |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 398 | .makeGroup(KoralOperation.DISJUNCTION); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 399 | requestMap.put("query", topOr); |
| 400 | objectStack.push(topOr); |
| 401 | } |
| 402 | } |
| Joachim Bingel | eee549e | 2014-04-29 11:15:37 +0000 | [diff] [blame] | 403 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 404 | |
| Joachim Bingel | ef0b5b0 | 2015-01-30 09:37:43 +0000 | [diff] [blame] | 405 | @SuppressWarnings("unchecked") |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 406 | private Map<String, Object> processVariableExpr (ParseTree node) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 407 | // simplex word or complex assignment (like qname = textSpec)? |
| 408 | String firstChildNodeCat = getNodeCat(node.getChild(0)); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 409 | Map<String, Object> object = null; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 410 | if (firstChildNodeCat.equals("node")) { |
| 411 | object = KoralObjectGenerator.makeSpan(); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 412 | } |
| 413 | else if (firstChildNodeCat.equals("tok")) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 414 | object = KoralObjectGenerator.makeToken(); |
| 415 | if (node.getChildCount() > 1) { // empty tokens do not wrap a term |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 416 | Map<String, Object> term = KoralObjectGenerator |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 417 | .makeTerm(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 418 | term.put("layer", "orth"); |
| 419 | object.put("wrap", term); |
| 420 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 421 | } |
| 422 | else if (firstChildNodeCat.equals("qName")) { |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 423 | // Only (foundry/)?layer specified. |
| 424 | // May be token or span, depending on indicated layer! |
| 425 | // (e.g. cnx/cat=NP vs mate/pos=NN) |
| 426 | // TODO generalize the list below -> look up layers associated with |
| 427 | // tokens rather than spans somewhere |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 428 | Map<String, Object> qNameParse = parseQNameNode(node |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 429 | .getChild(0)); |
| 430 | if (Arrays.asList(new String[] { "p", "lemma", "m", "orth" }) |
| 431 | .contains(qNameParse.get("layer"))) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 432 | object = KoralObjectGenerator.makeToken(); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 433 | Map<String, Object> term = KoralObjectGenerator |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 434 | .makeTerm(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 435 | object.put("wrap", term); |
| 436 | term.putAll(qNameParse); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 437 | } |
| 438 | else { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 439 | object = KoralObjectGenerator.makeSpan(); |
| 440 | object.putAll(qNameParse); |
| 441 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 442 | } |
| 443 | else if (firstChildNodeCat.equals("textSpec")) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 444 | object = KoralObjectGenerator.makeToken(); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 445 | Map<String, Object> term = KoralObjectGenerator |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 446 | .makeTerm(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 447 | object.put("wrap", term); |
| 448 | term.put("layer", "orth"); |
| 449 | term.putAll(parseTextSpec(node.getChild(0))); |
| 450 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 451 | if (node.getChildCount() == 3) { |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 452 | // (foundry/)?layer=key specification |
| Joachim Bingel | b9814e3 | 2015-02-24 16:18:10 +0100 | [diff] [blame] | 453 | if (object.get("@type").equals("koral:token")) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 454 | HashMap<String, Object> term = (HashMap<String, Object>) object |
| 455 | .get("wrap"); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 456 | term.putAll(parseTextSpec(node.getChild(2))); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 457 | term.put( |
| 458 | "match", |
| 459 | parseMatchOperator(getFirstChildWithCat(node, |
| 460 | "eqOperator"))); |
| 461 | } |
| 462 | else { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 463 | object.putAll(parseTextSpec(node.getChild(2))); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 464 | object.put( |
| 465 | "match", |
| 466 | parseMatchOperator(getFirstChildWithCat(node, |
| 467 | "eqOperator"))); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 468 | } |
| 469 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 470 | |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 471 | // Check if there's a unary relation defined for this node |
| 472 | // If yes, parse and retrieve it and put it in the object. |
| 473 | String ref = nodes2refs.get(node); |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 474 | if (unaryRelations.containsKey(ref)) { |
| Joachim Bingel | ef0b5b0 | 2015-01-30 09:37:43 +0000 | [diff] [blame] | 475 | ArrayList<ParseTree> unaryTermsForRef = unaryRelations.get(ref); |
| 476 | if (unaryTermsForRef.size() == 1) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 477 | object.put("attr", parseUnaryOperator(unaryTermsForRef.get(0))); |
| 478 | } |
| 479 | else { |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 480 | Map<String, Object> termGroup = KoralObjectGenerator |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 481 | .makeTermGroup(KoralTermGroupRelation.AND); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 482 | ArrayList<Object> operands = (ArrayList<Object>) termGroup |
| 483 | .get("operands"); |
| Joachim Bingel | ef0b5b0 | 2015-01-30 09:37:43 +0000 | [diff] [blame] | 484 | for (ParseTree unaryTerm : unaryTermsForRef) { |
| 485 | operands.add(parseUnaryOperator(unaryTerm)); |
| 486 | } |
| 487 | object.put("attr", termGroup); |
| 488 | } |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 489 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 490 | if (object != null) { |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 491 | // query: object only, no relation |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 492 | if (totalRelationCount == 0) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 493 | putIntoSuperObject(object); |
| 494 | } |
| 495 | ParseTree parentsFirstChild = node.getParent().getChild(0); |
| 496 | if (getNodeCat(parentsFirstChild).endsWith("#")) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 497 | nodeVariables.put( |
| 498 | getNodeCat(parentsFirstChild).replaceAll("#", ""), |
| 499 | object); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 500 | } |
| Joachim Bingel | c273a44 | 2015-01-26 14:03:51 +0000 | [diff] [blame] | 501 | if (objectsToWrapInClass.containsKey(node)) { |
| 502 | int classId = objectsToWrapInClass.get(node); |
| 503 | object = KoralObjectGenerator.wrapInClass(object, classId); |
| 504 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 505 | } |
| 506 | return object; |
| 507 | } |
| Joachim Bingel | a07b8e7 | 2014-05-09 15:06:07 +0000 | [diff] [blame] | 508 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 509 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 510 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 511 | * Processes an operand node, creating a map for the operand |
| 512 | * containing |
| 513 | * all its information given in the node definition (referenced |
| 514 | * via '#'). |
| 515 | * If this node has been referred to and used earlier, a reference |
| 516 | * is |
| 517 | * created in its place. The operand will be wrapped in a class |
| 518 | * group if |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 519 | * necessary. |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 520 | * |
| 521 | * @param operandNode |
| 522 | * The operand node of a relation, e.g. '#1' |
| 523 | * @return A map object with the appropriate KoralQuery |
| 524 | * representation |
| 525 | * of the operand |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 526 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 527 | private Map<String, Object> retrieveOperand (ParseTree operandNode) { |
| 528 | Map<String, Object> operand = null; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 529 | if (!getNodeCat(operandNode.getChild(0)).equals("variableExpr")) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 530 | String ref = operandNode.getChild(0).toStringTree(parser) |
| 531 | .substring(1); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 532 | operand = nodeVariables.get(ref); |
| 533 | if (nodeReferencesTotal.get(ref) > 1) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 534 | if (nodeReferencesProcessed.get(ref) == 0) { |
| 535 | refClassMapping.put(ref, classCounter + 128); |
| 536 | operand = KoralObjectGenerator.wrapInClass(operand, |
| 537 | 128 + classCounter++); |
| 538 | } |
| 539 | else if (nodeReferencesProcessed.get(ref) > 0 |
| 540 | && nodeReferencesTotal.get(ref) > 1) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 541 | try { |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 542 | operand = KoralObjectGenerator.wrapInReference( |
| 543 | operandStack.pop(), refClassMapping.get(ref)); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 544 | } |
| 545 | catch (NoSuchElementException e) { |
| 546 | operand = KoralObjectGenerator |
| 547 | .makeReference(refClassMapping.get(ref)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 548 | } |
| 549 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 550 | nodeReferencesProcessed.put(ref, |
| 551 | nodeReferencesProcessed.get(ref) + 1); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 552 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 553 | } |
| 554 | else { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 555 | operand = processVariableExpr(operandNode.getChild(0)); |
| 556 | } |
| 557 | return operand; |
| 558 | } |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 559 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 560 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 561 | /** |
| 562 | * @param node |
| 563 | * @return |
| 564 | */ |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 565 | private boolean checkOperandsProcessedPreviously (ParseTree node) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 566 | // We can assume two operands. |
| 567 | ParseTree operand1 = node.getChild(0); |
| 568 | ParseTree operand2 = node.getChild(2); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 569 | if (checkOperandProcessedPreviously(operand1) |
| 570 | || checkOperandProcessedPreviously(operand2)) { |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 571 | return true; |
| 572 | } |
| 573 | return false; |
| 574 | } |
| Joachim Bingel | 019ba5c | 2014-04-28 14:59:04 +0000 | [diff] [blame] | 575 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 576 | |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 577 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 578 | * @param operand |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 579 | * @return |
| 580 | */ |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 581 | private boolean checkOperandProcessedPreviously (ParseTree operand) { |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 582 | String operandRef = operand.getText(); |
| 583 | if (operandRef.startsWith("#")) { |
| 584 | operandRef = operandRef.substring(1, operandRef.length()); |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 585 | if (nodeReferencesProcessed.get(operandRef) > 0) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 586 | return true; |
| 587 | } |
| 588 | } |
| 589 | return false; |
| 590 | } |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 591 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 592 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 593 | @SuppressWarnings("unchecked") |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 594 | private void processN_ary_linguistic_term (ParseTree node) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 595 | relationCounter++; |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 596 | // Get operator and determine type of group (sequence/treeRelation/ |
| 597 | // relation/...). It's possible in Annis QL to concatenate operatiors, |
| 598 | // so there may be several operators under one n_ary_linguistic_term |
| 599 | // node. Counter 'i' will iteratively point to all operator nodes |
| 600 | // (odd-numbered children) under this node. |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 601 | for (int i = 1; i < node.getChildCount(); i = i + 2) { |
| 602 | ParseTree operandTree1 = node.getChild(i - 1); |
| 603 | ParseTree operandTree2 = node.getChild(i + 1); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 604 | String reltype = getNodeCat(node.getChild(i).getChild(0)); |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 605 | |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 606 | Map<String, Object> group = null; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 607 | ArrayList<Object> operands = null; |
| 608 | // make sure one of the operands has already been put into a |
| Joachim Bingel | 1f8f378 | 2015-01-19 17:58:41 +0000 | [diff] [blame] | 609 | // relation (if this is not the 1st relation). If none of the |
| 610 | // operands has been ingested at a lower level (and is therefore |
| 611 | // unavailable for refrencing), queue this relation for later |
| 612 | // processing. |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 613 | if (relationCounter != 1) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 614 | if (!checkOperandsProcessedPreviously(node)) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 615 | queuedRelations.add(node); |
| 616 | relationCounter--; |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 617 | if (verbose) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 618 | System.out |
| 619 | .println("Adding to queue: " + node.getText()); |
| Joachim Bingel | 4acf246 | 2015-01-27 11:49:57 +0000 | [diff] [blame] | 620 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 621 | objectsToPop.push(stackedObjects); |
| 622 | return; |
| 623 | } |
| 624 | } |
| 625 | // Retrieve operands. |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 626 | Map<String, Object> operand1 = retrieveOperand(operandTree1); |
| 627 | Map<String, Object> operand2 = retrieveOperand(operandTree2); |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 628 | // 'Proper' n_ary_linguistic_operators receive a considerably |
| 629 | // different serialisation than 'commonparent' and 'commonancestor' |
| 630 | // For the latter cases, a dummy span is introduced and declared as |
| 631 | // a span class that has a dominance relation towards the two |
| 632 | // operands, one after the other, thus resulting in two nested |
| 633 | // relations! A Poliqarp+ equivalent for A $ B would be |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 634 | // contains(focus(1:contains({1:<>},A)), B). |
| 635 | // This is modeled here... |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 636 | if (reltype.equals("commonparent") |
| 637 | || reltype.equals("commonancestor")) { |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 638 | // make an (outer) group and an inner group containing the dummy |
| 639 | // node or previous relations |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 640 | group = KoralObjectGenerator.makeGroup(KoralOperation.RELATION); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 641 | Map<String, Object> innerGroup = KoralObjectGenerator |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 642 | .makeGroup(KoralOperation.RELATION); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 643 | Map<String, Object> relation = KoralObjectGenerator |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 644 | .makeRelation(); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 645 | Map<String, Object> term = KoralObjectGenerator |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 646 | .makeTerm(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 647 | term.put("layer", "c"); |
| 648 | relation.put("wrap", term); |
| 649 | // commonancestor is an indirect commonparent relation |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 650 | if (reltype.equals("commonancestor")) |
| 651 | relation.put("boundary", |
| 652 | KoralObjectGenerator.makeBoundary(1, null)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 653 | group.put("relation", relation); |
| 654 | innerGroup.put("relation", relation); |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 655 | // Get operands list before possible re-assignment of 'group' |
| 656 | // (see following 'if') |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 657 | ArrayList<Object> outerOperands = (ArrayList<Object>) group |
| 658 | .get("operands"); |
| 659 | ArrayList<Object> innerOperands = (ArrayList<Object>) innerGroup |
| 660 | .get("operands"); |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 661 | // for lowest level, add the underspecified node as first |
| 662 | // operand and wrap it in a class group |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 663 | if (i == 1) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 664 | innerOperands |
| 665 | .add(KoralObjectGenerator.wrapInClass( |
| 666 | KoralObjectGenerator.makeSpan(), |
| 667 | classCounter + 128)); |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 668 | // add the first operand and wrap the whole group in a |
| 669 | // focusing reference |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 670 | innerOperands.add(operand1); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 671 | innerGroup = KoralObjectGenerator.wrapInReference( |
| 672 | innerGroup, classCounter + 128); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 673 | outerOperands.add(innerGroup); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 674 | } |
| 675 | else { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 676 | outerOperands.add(operandStack.pop()); |
| 677 | } |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 678 | // Lookahead: if next operator is not commonparent or |
| 679 | // commonancestor, wrap in class for accessibility |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 680 | if (i < node.getChildCount() - 2 |
| 681 | && !getNodeCat(node.getChild(i + 2).getChild(0)) |
| 682 | .startsWith("common")) { |
| 683 | operand2 = KoralObjectGenerator.wrapInClass(operand2, |
| 684 | ++classCounter + 128); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 685 | } |
| 686 | outerOperands.add(operand2); |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 687 | // Wrap in another reference object in case other relations |
| 688 | // are following |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 689 | if (i < node.getChildCount() - 2) { |
| 690 | group = KoralObjectGenerator.wrapInReference(group, |
| 691 | classCounter + 128); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 692 | } |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 693 | // All other n-ary linguistic relations have special 'relation' |
| 694 | // attributes defined in KoralQ. and can be handled more easily |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 695 | } |
| 696 | else { |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 697 | Map<String, Object> operatorGroup = parseOperatorNode(node |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 698 | .getChild(i).getChild(0)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 699 | String groupType; |
| 700 | try { |
| 701 | groupType = (String) operatorGroup.get("groupType"); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 702 | } |
| 703 | catch (ClassCastException | NullPointerException n) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 704 | groupType = "relation"; |
| 705 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 706 | if (groupType.equals("relation") |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 707 | // || groupType.equals("treeRelation") |
| 708 | ) { |
| 709 | group = KoralObjectGenerator.makeGroup(KoralOperation.RELATION); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 710 | Map<String, Object> relation = new HashMap<String, Object>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 711 | putAllButGroupType(relation, operatorGroup); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 712 | group.put("relType", relation); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 713 | } |
| 714 | else if (groupType.equals("sequence")) { |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 715 | group = KoralObjectGenerator.makeGroup(KoralOperation.SEQUENCE); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 716 | putAllButGroupType(group, operatorGroup); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 717 | } |
| margaretha | 549fb0a | 2017-04-27 18:35:54 +0200 | [diff] [blame] | 718 | else if (groupType.equals("hierarchy")) { |
| 719 | group = KoralObjectGenerator.makeGroup(KoralOperation.HIERARCHY); |
| 720 | putAllButGroupType(group, operatorGroup); |
| 721 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 722 | else if (groupType.equals("position")) { |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 723 | group = new HashMap<String, Object>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 724 | putAllButGroupType(group, operatorGroup); |
| 725 | } |
| Joachim Bingel | 9c3ddb9 | 2014-06-23 13:49:58 +0000 | [diff] [blame] | 726 | |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 727 | // Get operands list before possible re-assignment of 'group' |
| 728 | // (see following 'if') |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 729 | operands = (ArrayList<Object>) group.get("operands"); |
| Joachim Bingel | 1846c8c | 2014-07-08 14:13:31 +0000 | [diff] [blame] | 730 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 731 | ParseTree leftChildSpec = getFirstChildWithCat(node.getChild(i) |
| 732 | .getChild(0), "@l"); |
| 733 | ParseTree rightChildSpec = getFirstChildWithCat(node |
| 734 | .getChild(i).getChild(0), "@r"); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 735 | if (leftChildSpec != null || rightChildSpec != null) { |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 736 | KoralFrame frame = (leftChildSpec != null) ? KoralFrame.STARTS_WITH |
| 737 | : KoralFrame.ENDS_WITH; |
| 738 | ArrayList<KoralFrame> frames = new ArrayList<KoralFrame>(); |
| 739 | frames.add(frame); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 740 | Map<String, Object> positionGroup = KoralObjectGenerator |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 741 | .makePosition(frames); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 742 | operand2 = KoralObjectGenerator.wrapInClass(operand2, |
| 743 | ++classCounter + 128); |
| 744 | ((ArrayList<Object>) positionGroup.get("operands")) |
| 745 | .add(group); |
| 746 | ((ArrayList<Object>) positionGroup.get("operands")) |
| 747 | .add(KoralObjectGenerator |
| 748 | .makeReference(classCounter + 128)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 749 | group = positionGroup; |
| 750 | } |
| 751 | |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 752 | // Wrap in reference object in case other relations follow |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 753 | if (i < node.getChildCount() - 2) { |
| 754 | group = KoralObjectGenerator.wrapInReference(group, |
| 755 | classCounter + 128); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 756 | } |
| 757 | |
| 758 | // Inject operands. |
| 759 | // -> Case distinction: |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 760 | if (node.getChildCount() == 3) { |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 761 | // Things are easy when there's just one operator |
| 762 | // (thus 3 children incl. operands)... |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 763 | if (operand1 != null) |
| 764 | operands.add(operand1); |
| 765 | if (operand2 != null) |
| 766 | operands.add(operand2); |
| 767 | } |
| 768 | else { |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 769 | // ... but things get a little more complicated here. The |
| 770 | // AST is of this form: (operand1 operator1 operand2 |
| 771 | // operator2 operand3 operator3 ...), but we'll have |
| 772 | // to serialize it in a nested, binary way: (((operand1 |
| 773 | // operator1 operand2) operator2 operand3) operator3 ...). |
| 774 | // The following code will do just that: |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 775 | if (i == 1) { |
| 776 | // for the first operator, include both operands |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 777 | if (operand1 != null) |
| 778 | operands.add(operand1); |
| 779 | if (operand2 != null) |
| 780 | operands.add(KoralObjectGenerator.wrapInClass( |
| 781 | operand2, 128 + classCounter++)); |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 782 | // Don't put this into the super object directly but |
| 783 | // store on operandStack (because this group will have |
| 784 | // to be an operand of a subsequent operator) |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 785 | operandStack.push(group); |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 786 | // for all subsequent operators, only take 2nd operand |
| 787 | // (1st was already added by previous operator) |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 788 | } |
| 789 | else if (i < node.getChildCount() - 2) { |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 790 | // for all intermediate operators, include other |
| 791 | // previous groups and 2nd operand. Store this on the |
| 792 | // operandStack, too. |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 793 | if (operand2 != null) |
| 794 | operands.add(KoralObjectGenerator.wrapInClass( |
| 795 | operand2, 128 + classCounter++)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 796 | operands.add(0, operandStack.pop()); |
| 797 | operandStack.push(group); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 798 | } |
| 799 | else if (i == node.getChildCount() - 2) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 800 | // This is the last operator. Include 2nd operand only |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 801 | if (operand2 != null) |
| 802 | operands.add(operand2); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 803 | } |
| 804 | } |
| 805 | } |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 806 | // Final step: decide what to do with the 'group' object, depending |
| 807 | // on whether all relations have been processed |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 808 | if (i == node.getChildCount() - 2 |
| 809 | && relationCounter == totalRelationCount) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 810 | putIntoSuperObject(group); |
| 811 | if (!operandStack.isEmpty()) { |
| 812 | operands.add(0, operandStack.pop()); |
| 813 | } |
| 814 | objectStack.push(group); |
| 815 | stackedObjects++; |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 816 | } |
| 817 | else { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 818 | operandStack.push(group); |
| 819 | } |
| 820 | } |
| 821 | } |
| 822 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 823 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 824 | /** |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 825 | * Parses a unary_linguistic_operator node. Possible operators |
| 826 | * are: |
| 827 | * root, arity, tokenarity. Operators are embedded into a |
| 828 | * koral:term, |
| Joachim Bingel | b9814e3 | 2015-02-24 16:18:10 +0100 | [diff] [blame] | 829 | * in turn wrapped by an 'attr' property in a koral:span. |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 830 | * |
| 831 | * @param node |
| 832 | * The unary_linguistic_operator node |
| 833 | * @return A map containing the attr key, to be inserted into |
| 834 | * koral:span |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 835 | */ |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 836 | private Map<String, Object> parseUnaryOperator (ParseTree node) { |
| 837 | Map<String, Object> term = KoralObjectGenerator.makeTerm(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 838 | String op = node.getChild(1).toStringTree(parser).substring(1); |
| 839 | if (op.equals("arity") || op.equals("tokenarity")) { |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 840 | Map<String, Object> boundary = boundaryFromRangeSpec( |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 841 | node.getChild(3), false); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 842 | term.put(op, boundary); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 843 | } |
| 844 | else { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 845 | term.put(op, true); |
| 846 | } |
| Joachim Bingel | 0fae220 | 2015-01-28 15:53:55 +0000 | [diff] [blame] | 847 | return term; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 848 | } |
| 849 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 850 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 851 | @SuppressWarnings("unchecked") |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 852 | private Map<String, Object> parseOperatorNode ( |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 853 | ParseTree operatorNode) { |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 854 | Map<String, Object> relation = null; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 855 | String operator = getNodeCat(operatorNode); |
| 856 | // DOMINANCE |
| 857 | if (operator.equals("dominance")) { |
| margaretha | 549fb0a | 2017-04-27 18:35:54 +0200 | [diff] [blame] | 858 | // relation = KoralObjectGenerator.makeRelation(); |
| 859 | relation = new HashMap<String, Object>(); |
| 860 | relation.put("groupType", "hierarchy"); |
| 861 | // ParseTree qName = getFirstChildWithCat(operatorNode, "qName"); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 862 | ParseTree edgeSpecNode = getFirstChildWithCat(operatorNode, |
| 863 | "edgeSpec"); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 864 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 865 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, |
| 866 | "rangeSpec"); |
| margaretha | 549fb0a | 2017-04-27 18:35:54 +0200 | [diff] [blame] | 867 | |
| 868 | // term.put("layer", "c"); |
| 869 | // if (qName != null) |
| 870 | // term = parseQNameNode(qName); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 871 | if (edgeSpecNode != null) { |
| margaretha | 549fb0a | 2017-04-27 18:35:54 +0200 | [diff] [blame] | 872 | Map<String, Object> term = KoralObjectGenerator |
| 873 | .makeTerm(); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 874 | Map<String, Object> edgeSpec = parseEdgeSpec(edgeSpecNode); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 875 | String edgeSpecType = (String) edgeSpec.get("@type"); |
| Joachim Bingel | b9814e3 | 2015-02-24 16:18:10 +0100 | [diff] [blame] | 876 | if (edgeSpecType.equals("koral:termGroup")) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 877 | ((ArrayList<Object>) edgeSpec.get("operands")).add(term); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 878 | // term = edgeSpec; |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 879 | } |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 880 | // else { |
| margaretha | 549fb0a | 2017-04-27 18:35:54 +0200 | [diff] [blame] | 881 | // term = KoralObjectGenerator.makeTermGroup(KoralTermGroupRelation.AND); |
| 882 | // ArrayList<Object> termGroupOperands = (ArrayList<Object>) term |
| 883 | // .get("operands"); |
| 884 | // termGroupOperands.add(edgeSpec); |
| 885 | // Map<String, Object> constTerm = KoralObjectGenerator |
| 886 | // .makeTerm(); |
| 887 | // constTerm.put("layer", "c"); |
| 888 | // termGroupOperands.add(constTerm); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 889 | // } |
| 890 | term = edgeSpec; |
| 891 | Map<String, Object> relType = KoralObjectGenerator.makeRelation(); |
| 892 | relType.put("wrap", term); |
| 893 | relation.put("relType", relType); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 894 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 895 | if (star != null) |
| 896 | relation.put("boundary", |
| 897 | KoralObjectGenerator.makeBoundary(0, null)); |
| 898 | if (rangeSpec != null) |
| 899 | relation.put("boundary", boundaryFromRangeSpec(rangeSpec)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 900 | } |
| 901 | else if (operator.equals("pointing")) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 902 | relation = KoralObjectGenerator.makeRelation(); |
| 903 | relation.put("groupType", "relation"); |
| 904 | ParseTree qName = getFirstChildWithCat(operatorNode, "qName"); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 905 | ParseTree edgeSpec = getFirstChildWithCat(operatorNode, "edgeAnno"); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 906 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 907 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, |
| 908 | "rangeSpec"); |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 909 | Map<String, Object> term = KoralObjectGenerator |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 910 | .makeTerm(); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 911 | if (qName != null){ |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 912 | term.putAll(parseQNameNode(qName)); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 913 | relation.put("wrap", term); |
| 914 | } |
| 915 | if (edgeSpec != null){ |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 916 | term.putAll(parseEdgeSpec(edgeSpec)); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 917 | Map<String, Object> relType = KoralObjectGenerator.makeRelation(); |
| 918 | relType.put("wrap", term); |
| 919 | relation.put("relType", relType); |
| 920 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 921 | if (star != null) |
| 922 | relation.put("boundary", |
| 923 | KoralObjectGenerator.makeBoundary(0, null)); |
| 924 | if (rangeSpec != null) |
| 925 | relation.put("boundary", boundaryFromRangeSpec(rangeSpec)); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 926 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 927 | } |
| 928 | else if (operator.equals("precedence")) { |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 929 | relation = new HashMap<String, Object>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 930 | relation.put("groupType", "sequence"); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 931 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, |
| 932 | "rangeSpec"); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 933 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| 934 | ArrayList<Object> distances = new ArrayList<Object>(); |
| 935 | if (star != null) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 936 | distances.add(KoralObjectGenerator.makeDistance("w", 0, null)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 937 | relation.put("distances", distances); |
| 938 | } |
| 939 | if (rangeSpec != null) { |
| 940 | distances.add(parseDistance(rangeSpec)); |
| 941 | relation.put("distances", distances); |
| 942 | } |
| 943 | relation.put("inOrder", true); |
| 944 | } |
| 945 | else if (operator.equals("spanrelation")) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 946 | String reltype = operatorNode.getChild(0).toStringTree(parser); |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 947 | ArrayList<KoralFrame> frames = new ArrayList<>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 948 | switch (reltype) { |
| 949 | case "_=_": |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 950 | frames.add(KoralFrame.MATCHES); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 951 | break; |
| 952 | case "_l_": |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 953 | frames.add(KoralFrame.STARTS_WITH); |
| 954 | frames.add(KoralFrame.MATCHES); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 955 | break; |
| 956 | case "_r_": |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 957 | frames.add(KoralFrame.ENDS_WITH); |
| 958 | frames.add(KoralFrame.MATCHES); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 959 | break; |
| 960 | case "_i_": |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 961 | frames.add(KoralFrame.IS_AROUND); |
| Joachim Bingel | 1d79104 | 2015-02-03 10:19:47 +0000 | [diff] [blame] | 962 | break; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 963 | case "_o_": |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 964 | frames.add(KoralFrame.OVERLAPS_LEFT); |
| 965 | frames.add(KoralFrame.OVERLAPS_RIGHT); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 966 | break; |
| 967 | case "_ol_": |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 968 | frames.add(KoralFrame.OVERLAPS_LEFT); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 969 | break; |
| 970 | case "_or_": |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 971 | frames.add(KoralFrame.OVERLAPS_RIGHT); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 972 | break; |
| 973 | } |
| Joachim Bingel | 1d79104 | 2015-02-03 10:19:47 +0000 | [diff] [blame] | 974 | relation = KoralObjectGenerator.makePosition(frames); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 975 | relation.put("groupType", "position"); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 976 | } |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 977 | else if (operator.equals("near")) { |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 978 | relation = new HashMap<String, Object>(); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 979 | relation.put("groupType", "sequence"); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 980 | ParseTree rangeSpec = getFirstChildWithCat(operatorNode, |
| 981 | "rangeSpec"); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 982 | ParseTree star = getFirstChildWithCat(operatorNode, "*"); |
| 983 | ArrayList<Object> distances = new ArrayList<Object>(); |
| 984 | if (star != null) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 985 | distances.add(KoralObjectGenerator.makeDistance("w", 0, null)); |
| Joachim Bingel | 5fd0932 | 2015-01-29 14:01:30 +0000 | [diff] [blame] | 986 | relation.put("distances", distances); |
| 987 | } |
| 988 | if (rangeSpec != null) { |
| 989 | distances.add(parseDistance(rangeSpec)); |
| 990 | relation.put("distances", distances); |
| 991 | } |
| 992 | relation.put("inOrder", false); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 993 | } |
| 994 | else if (operator.equals("identity")) { |
| 995 | //TODO since ANNIS v. 3.1.6 |
| 996 | } |
| 997 | else if (operator.equals("equalvalue")) { |
| 998 | //TODO since ANNIS v. 3.1.6 |
| 999 | } |
| 1000 | else if (operator.equals("notequalvalue")) { |
| 1001 | //TODO since ANNIS v. 3.1.6 |
| 1002 | } |
| 1003 | return relation; |
| 1004 | } |
| 1005 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1006 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1007 | @SuppressWarnings("unchecked") |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1008 | private Map<String, Object> parseEdgeSpec (ParseTree edgeSpec) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1009 | List<ParseTree> annos = getChildrenWithCat(edgeSpec, "edgeAnno"); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1010 | if (annos.size() == 1) |
| 1011 | return parseEdgeAnno(annos.get(0)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1012 | else { |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1013 | Map<String, Object> termGroup = KoralObjectGenerator |
| margaretha | fe7fc45 | 2017-01-17 17:19:30 +0100 | [diff] [blame] | 1014 | .makeTermGroup(KoralTermGroupRelation.AND); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1015 | ArrayList<Object> operands = (ArrayList<Object>) termGroup |
| 1016 | .get("operands"); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1017 | for (ParseTree anno : annos) { |
| 1018 | operands.add(parseEdgeAnno(anno)); |
| 1019 | } |
| 1020 | return termGroup; |
| 1021 | } |
| 1022 | } |
| 1023 | |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1024 | private Map<String, Object> parseEdgeAnno (ParseTree edgeAnnoSpec) { |
| 1025 | Map<String, Object> edgeAnno = KoralObjectGenerator |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1026 | .makeTerm(); |
| 1027 | ParseTree textSpecNode = getFirstChildWithCat(edgeAnnoSpec, "textSpec"); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 1028 | ParseTree keyNode = getFirstChildWithCat(edgeAnnoSpec, "key"); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1029 | ParseTree layerNode = getFirstChildWithCat(edgeAnnoSpec, "layer"); |
| 1030 | ParseTree foundryNode = getFirstChildWithCat(edgeAnnoSpec, "foundry"); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1031 | ParseTree matchOperatorNode = getFirstChildWithCat(edgeAnnoSpec, |
| 1032 | "eqOperator"); |
| 1033 | if (foundryNode != null) |
| 1034 | edgeAnno.put("foundry", foundryNode.getChild(0) |
| 1035 | .toStringTree(parser)); |
| 1036 | if (layerNode != null) |
| 1037 | edgeAnno.put("layer", layerNode.getChild(0).toStringTree(parser)); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 1038 | if (keyNode != null) |
| 1039 | edgeAnno.put("key", keyNode.getChild(0).toStringTree(parser)); |
| 1040 | edgeAnno.putAll(parseTextSpec(textSpecNode, "value")); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1041 | edgeAnno.put("match", parseMatchOperator(matchOperatorNode)); |
| 1042 | return edgeAnno; |
| 1043 | } |
| 1044 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1045 | |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1046 | private Map<String, Object> boundaryFromRangeSpec ( |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 1047 | ParseTree rangeSpec) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1048 | return boundaryFromRangeSpec(rangeSpec, true); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1049 | } |
| 1050 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1051 | |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1052 | private Map<String, Object> boundaryFromRangeSpec ( |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 1053 | ParseTree rangeSpec, boolean expandToMax) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1054 | Integer min = Integer.parseInt(rangeSpec.getChild(0).toStringTree( |
| 1055 | parser)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1056 | Integer max = min; |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1057 | if (expandToMax) |
| 1058 | max = null; |
| 1059 | if (rangeSpec.getChildCount() == 3) |
| 1060 | max = Integer.parseInt(rangeSpec.getChild(2).toStringTree(parser)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1061 | return KoralObjectGenerator.makeBoundary(min, max); |
| 1062 | } |
| 1063 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1064 | |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1065 | private Map<String, Object> parseDistance (ParseTree rangeSpec) { |
| Joachim Bingel | 019190b | 2015-03-20 10:41:24 +0100 | [diff] [blame] | 1066 | String minString = rangeSpec.getChild(0).toStringTree(parser); |
| 1067 | String maxString = null; // not always given, prevent NPE |
| 1068 | if (minString.equals("0")) { |
| 1069 | addError(StatusCodes.MALFORMED_QUERY, "Distance may not be 0!"); |
| 1070 | return KoralObjectGenerator.makeDistance("w", 0, 0); |
| 1071 | } |
| 1072 | // decrease by 1 to account for disparity between ANNIS distance and |
| 1073 | // koral:distance (ANNIS "x .1,3 y" means distance range 0,2 in KoralQ) |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1074 | Integer min = Integer.parseInt(minString) - 1; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1075 | Integer max = null; |
| Joachim Bingel | 019190b | 2015-03-20 10:41:24 +0100 | [diff] [blame] | 1076 | if (rangeSpec.getChildCount() == 3) { |
| 1077 | maxString = rangeSpec.getChild(2).toStringTree(parser); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1078 | max = Integer.parseInt(maxString) - 1; |
| Joachim Bingel | 019190b | 2015-03-20 10:41:24 +0100 | [diff] [blame] | 1079 | } |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1080 | return KoralObjectGenerator.makeDistance("w", min, max); |
| 1081 | } |
| 1082 | |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1083 | private Map<String, Object> parseTextSpec (ParseTree node) { |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 1084 | return parseTextSpec(node, "key"); |
| 1085 | } |
| 1086 | |
| 1087 | private Map<String, Object> parseTextSpec (ParseTree node, String name) { |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1088 | Map<String, Object> term = new HashMap<String, Object>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1089 | if (hasChild(node, "regex")) { |
| 1090 | term.put("type", "type:regex"); |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 1091 | term.put(name, node.getChild(0).getChild(0).toStringTree(parser) |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1092 | .replaceAll("/", "")); |
| 1093 | } |
| 1094 | else { |
| margaretha | c588963 | 2017-04-28 16:04:38 +0200 | [diff] [blame^] | 1095 | term.put(name, node.getChild(1).toStringTree(parser)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1096 | } |
| 1097 | term.put("match", "match:eq"); |
| 1098 | return term; |
| 1099 | } |
| 1100 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1101 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1102 | /** |
| 1103 | * Parses the match operator (= or !=) |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1104 | * |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1105 | * @param node |
| 1106 | * @return |
| 1107 | */ |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1108 | private String parseMatchOperator (ParseTree node) { |
| 1109 | if (node.getChildCount() > 0) { |
| 1110 | return node.getChild(0).getText().equals("=") ? "match:eq" |
| 1111 | : "match:ne"; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1112 | } |
| Joachim Bingel | 6e4e9f3 | 2015-01-30 18:59:31 +0000 | [diff] [blame] | 1113 | return "match:eq"; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1114 | } |
| 1115 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1116 | |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1117 | private Map<String, Object> parseQNameNode (ParseTree node) { |
| 1118 | Map<String, Object> fields = new HashMap<String, Object>(); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1119 | ParseTree layerNode = getFirstChildWithCat(node, "layer"); |
| 1120 | ParseTree foundryNode = getFirstChildWithCat(node, "foundry"); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1121 | if (foundryNode != null) |
| 1122 | fields.put("foundry", foundryNode.getChild(0).toStringTree(parser)); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1123 | String layer = layerNode.getChild(0).toStringTree(parser); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1124 | if (layer.equals("pos")) |
| 1125 | layer = "p"; |
| 1126 | if (layer.equals("cat")) |
| 1127 | layer = "c"; |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1128 | fields.put("layer", layer); |
| 1129 | return fields; |
| 1130 | } |
| 1131 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1132 | |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1133 | private void putIntoSuperObject (Map<String, Object> object) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1134 | putIntoSuperObject(object, 0); |
| 1135 | } |
| 1136 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1137 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1138 | @SuppressWarnings({ "unchecked" }) |
| margaretha | 67eec73 | 2017-01-18 17:45:16 +0100 | [diff] [blame] | 1139 | private void putIntoSuperObject (Map<String, Object> object, |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1140 | int objStackPosition) { |
| 1141 | if (objectStack.size() > objStackPosition) { |
| 1142 | ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack |
| 1143 | .get(objStackPosition).get("operands"); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1144 | if (!invertedOperandsLists.contains(topObjectOperands)) { |
| 1145 | topObjectOperands.add(object); |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1146 | } |
| 1147 | else { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1148 | topObjectOperands.add(0, object); |
| 1149 | } |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1150 | } |
| 1151 | else { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1152 | requestMap.put("query", object); |
| 1153 | } |
| 1154 | } |
| 1155 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1156 | |
| 1157 | private void putAllButGroupType (Map<String, Object> container, |
| 1158 | Map<String, Object> input) { |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1159 | for (String key : input.keySet()) { |
| 1160 | if (!key.equals("groupType")) { |
| 1161 | container.put(key, input.get(key)); |
| 1162 | } |
| 1163 | } |
| 1164 | } |
| 1165 | |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1166 | |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1167 | private ParserRuleContext parseAnnisQuery (String query) { |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1168 | Lexer lexer = new AqlLexer((CharStream) null); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1169 | ParserRuleContext tree = null; |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1170 | Antlr4DescriptiveErrorListener errorListener = new Antlr4DescriptiveErrorListener( |
| 1171 | query); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1172 | // Like p. 111 |
| 1173 | try { |
| 1174 | // Tokenize input data |
| 1175 | ANTLRInputStream input = new ANTLRInputStream(query); |
| 1176 | lexer.setInputStream(input); |
| 1177 | CommonTokenStream tokens = new CommonTokenStream(lexer); |
| 1178 | parser = new AqlParser(tokens); |
| 1179 | // Don't throw out erroneous stuff |
| 1180 | parser.setErrorHandler(new BailErrorStrategy()); |
| 1181 | lexer.removeErrorListeners(); |
| Joachim Bingel | 3fa584b | 2014-12-17 13:35:43 +0000 | [diff] [blame] | 1182 | lexer.addErrorListener(errorListener); |
| 1183 | parser.removeErrorListeners(); |
| 1184 | parser.addErrorListener(errorListener); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1185 | // Get starting rule from parser |
| Joachim Bingel | a6954de | 2015-03-20 16:37:37 +0100 | [diff] [blame] | 1186 | Method startRule = AqlParser.class.getMethod("start"); |
| 1187 | tree = (ParserRuleContext) startRule |
| 1188 | .invoke(parser, (Object[]) null); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1189 | } |
| 1190 | // Some things went wrong ... |
| 1191 | catch (Exception e) { |
| Joachim Bingel | a145c98 | 2015-02-18 18:31:57 +0100 | [diff] [blame] | 1192 | log.error("Could not parse query. " |
| 1193 | + "Please make sure it is well-formed."); |
| Joachim Bingel | c9551b3 | 2015-01-19 14:26:58 +0000 | [diff] [blame] | 1194 | log.error(errorListener.generateFullErrorMsg().toString()); |
| 1195 | addError(errorListener.generateFullErrorMsg()); |
| 1196 | } |
| 1197 | return tree; |
| 1198 | } |
| Joachim Bingel | 761d1c1 | 2014-12-17 14:02:40 +0000 | [diff] [blame] | 1199 | } |