| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.serialize; |
| 2 | |
| 3 | import java.util.ArrayList; |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 4 | import java.util.Arrays; |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 5 | import java.util.LinkedHashMap; |
| 6 | import java.util.LinkedList; |
| 7 | import java.util.List; |
| 8 | import java.util.Map; |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 9 | |
| 10 | import org.antlr.runtime.ANTLRStringStream; |
| 11 | import org.antlr.runtime.RecognitionException; |
| 12 | import org.antlr.runtime.tree.Tree; |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 13 | import org.antlr.v4.runtime.tree.ParseTree; |
| 14 | import org.slf4j.Logger; |
| 15 | import org.slf4j.LoggerFactory; |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 16 | |
| 17 | import de.ids_mannheim.korap.query.cosmas2.c2psLexer; |
| 18 | import de.ids_mannheim.korap.query.cosmas2.c2psParser; |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 19 | import de.ids_mannheim.korap.query.serialize.AbstractSyntaxTree; |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 20 | import de.ids_mannheim.korap.query.serialize.util.CosmasCondition; |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 21 | import de.ids_mannheim.korap.util.QueryException; |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 22 | |
| 23 | /** |
| 24 | * Map representation of CosmasII syntax tree as returned by ANTLR |
| 25 | * @author joachim |
| 26 | * |
| 27 | */ |
| 28 | public class CosmasTree extends AbstractSyntaxTree { |
| 29 | |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 30 | Logger log = LoggerFactory.getLogger(CosmasTree.class); |
| 31 | |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 32 | private static c2psParser cosmasParser; |
| 33 | /* |
| 34 | * Following collections have the following functions: |
| 35 | * - the request is a map with two keys (meta/query): {meta=[], query=[]} |
| 36 | * - the query is a list of token group maps: {meta=[], query=[tg1=[], tg2=[]]} |
| 37 | * - each token group is a list of tokens: {meta=[], query=[tg1=[t1_1, t1_2], tg2=[t2_1, t2_2, t2_3]]} |
| 38 | * - each token corresponds to a single 'fields' linked list {meta=[], query=[tg1=[t1_1=[], t1_2=[]], ... ]} |
| 39 | * - each fields list contains a logical operator and 'field maps' defining attributes and values |
| 40 | * {meta=[], query=[tg1=[t1_1=[[disj, {base=foo}, {base=bar}]], t1_2=[]], ... ]} |
| 41 | */ |
| 42 | String query; |
| 43 | LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>(); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 44 | /** |
| 45 | * Keeps track of active object. |
| 46 | */ |
| 47 | LinkedList<LinkedHashMap<String,Object>> objectStack = new LinkedList<LinkedHashMap<String,Object>>(); |
| 48 | /** |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 49 | * Makes it possible to store several distantTokenGroups |
| 50 | */ |
| 51 | LinkedList<ArrayList<List<Object>>> distantTokensStack = new LinkedList<ArrayList<List<Object>>>(); |
| 52 | /** |
| 53 | * Field for repetition query (Kleene + or * operations, or min/max queries: {2,4} |
| 54 | */ |
| 55 | String repetition = ""; |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 56 | /** |
| 57 | * Keeps track of open node categories |
| 58 | */ |
| 59 | LinkedList<String> openNodeCats = new LinkedList<String>(); |
| 60 | /** |
| 61 | * Global control structure for fieldGroups, keeps track of open fieldGroups. |
| 62 | */ |
| 63 | LinkedList<ArrayList<Object>> openFieldGroups = new LinkedList<ArrayList<Object>>(); |
| 64 | /** |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 65 | * Keeps track of how many objects there are to pop after every recursion of {@link #processNode(ParseTree)} |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 66 | */ |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 67 | LinkedList<Integer> objectsToPop = new LinkedList<Integer>(); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 68 | /** |
| 69 | * Flag that indicates whether token fields or meta fields are currently being processed |
| 70 | */ |
| 71 | boolean inMeta = false; |
| 72 | boolean negate = false; |
| 73 | |
| 74 | Tree cosmasTree; |
| 75 | |
| 76 | LinkedHashMap<String,Object> treeMap = new LinkedHashMap<String,Object>(); |
| 77 | /** |
| 78 | * Keeps track of all visited nodes in a tree |
| 79 | */ |
| 80 | List<Tree> visited = new ArrayList<Tree>(); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 81 | |
| 82 | Integer stackedObjects = 0; |
| 83 | |
| 84 | private static boolean debug = false; |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 85 | /** |
| 86 | * A list of node categories that can be sequenced (i.e. which can be in a sequence with any number of other nodes in this list) |
| 87 | */ |
| 88 | private final List<String> sequentiableCats = Arrays.asList(new String[] {"OPWF", "OPLEM", "OPMORPH", "OPBEG", "OPEND", "OPIN"}); |
| 89 | /** |
| 90 | * Keeps track of sequenced nodes, i.e. nodes that implicitly govern a sequence, as in (C2PQ (OPWF der) (OPWF Mann)). |
| 91 | * This is necessary in order to know when to take the sequence off the object stack, as the sequence is introduced by the |
| 92 | * first child but cannot be closed after this first child in order not to lose its siblings |
| 93 | */ |
| 94 | private LinkedList<Tree> sequencedNodes = new LinkedList<Tree>(); |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 95 | |
| 96 | private boolean hasSequentiableSiblings; |
| 97 | |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 98 | /** |
| 99 | * Keeps track of operands lists that are to be serialised in an inverted |
| 100 | * order (e.g. the IN() operator) compared to their AST representation. |
| 101 | */ |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 102 | private LinkedList<ArrayList<Object>> invertedOperandsLists = new LinkedList<ArrayList<Object>>(); |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 103 | |
| 104 | private LinkedList<ArrayList<ArrayList<Object>>> distributedOperandsLists = new LinkedList<ArrayList<ArrayList<Object>>>(); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 105 | /** |
| 106 | * |
| 107 | * @param tree The syntax tree as returned by ANTLR |
| 108 | * @param parser The ANTLR parser instance that generated the parse tree |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 109 | * @throws QueryException |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 110 | */ |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 111 | public CosmasTree(String query) throws QueryException { |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 112 | this.query = query; |
| 113 | process(query); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 114 | System.out.println(requestMap.get("query")); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 115 | } |
| 116 | |
| 117 | @Override |
| 118 | public Map<String, Object> getRequestMap() { |
| 119 | return this.requestMap; |
| 120 | } |
| 121 | |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 122 | private void prepareContext() { |
| 123 | LinkedHashMap<String,Object> context = new LinkedHashMap<String,Object>(); |
| 124 | LinkedHashMap<String,Object> operands = new LinkedHashMap<String,Object>(); |
| 125 | LinkedHashMap<String,Object> relation = new LinkedHashMap<String,Object>(); |
| 126 | LinkedHashMap<String,Object> classMap = new LinkedHashMap<String,Object>(); |
| 127 | |
| 128 | operands.put("@id", "korap:operands"); |
| 129 | operands.put("@container", "@list"); |
| 130 | |
| 131 | relation.put("@id", "korap:relation"); |
| 132 | relation.put("@type", "korap:relation#types"); |
| 133 | |
| 134 | classMap.put("@id", "korap:class"); |
| 135 | classMap.put("@type", "xsd:integer"); |
| 136 | |
| 137 | context.put("korap", "http://korap.ids-mannheim.de/ns/query"); |
| 138 | context.put("@language", "de"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 139 | context.put("@operands", operands); |
| 140 | context.put("@relation", relation); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 141 | context.put("class", classMap); |
| 142 | context.put("query", "korap:query"); |
| 143 | context.put("filter", "korap:filter"); |
| 144 | context.put("meta", "korap:meta"); |
| 145 | |
| 146 | requestMap.put("@context", context); |
| 147 | } |
| 148 | |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 149 | @Override |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 150 | public void process(String query) throws QueryException { |
| 151 | Tree tree = null; |
| 152 | try { |
| 153 | tree = parseCosmasQuery(query); |
| 154 | } catch (RecognitionException e) { |
| 155 | throw new QueryException("Your query could not be processed. Please make sure it is well-formed."); |
| 156 | } catch (NullPointerException e) { |
| 157 | throw new QueryException("Your query could not be processed. Please make sure it is well-formed."); |
| 158 | } |
| 159 | |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 160 | System.out.println("Processing Cosmas"); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 161 | prepareContext(); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 162 | processNode(tree); |
| 163 | } |
| 164 | |
| 165 | private void processNode(Tree node) { |
| 166 | |
| 167 | // Top-down processing |
| 168 | if (visited.contains(node)) return; |
| 169 | else visited.add(node); |
| 170 | |
| 171 | |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 172 | String nodeCat = QueryUtils.getNodeCat(node); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 173 | openNodeCats.push(nodeCat); |
| 174 | |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 175 | stackedObjects = 0; |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 176 | |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 177 | if (debug) { |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 178 | System.err.println(" "+objectStack); |
| 179 | System.out.println(openNodeCats); |
| 180 | } |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 181 | |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 182 | |
| 183 | /* *************************************** |
| 184 | * Processing individual node categories * |
| 185 | *****************************************/ |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 186 | |
| 187 | |
| 188 | // Check for potential implicit sequences as in (C2PQ (OPWF der) (OPWF Mann)). The sequence is introduced |
| 189 | // by the first child if it (and its siblings) is sequentiable. |
| 190 | if (sequentiableCats.contains(nodeCat)) { |
| 191 | // for each node, check if parent has more than one child (-> could be implicit sequence) |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 192 | Tree parent = node.getParent(); |
| 193 | if (parent.getChildCount()>1) { |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 194 | // if node is first child of parent... |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 195 | if (node == parent.getChild(0)) { |
| 196 | hasSequentiableSiblings = false; |
| 197 | for (int i=1; i<parent.getChildCount() ;i++) { |
| 198 | if (sequentiableCats.contains(QueryUtils.getNodeCat(parent.getChild(i)))) { |
| 199 | hasSequentiableSiblings = true; |
| 200 | } |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 201 | } |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 202 | if (hasSequentiableSiblings) { |
| 203 | // Step I: create sequence |
| 204 | LinkedHashMap<String, Object> sequence = new LinkedHashMap<String, Object>(); |
| 205 | sequence.put("@type", "korap:sequence"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 206 | sequence.put("@operands", new ArrayList<Object>()); |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 207 | // push sequence on object stack but don't increment stackedObjects counter since |
| 208 | // we've got to wait until the parent node is processed - therefore, add the parent |
| 209 | // to the sequencedNodes list and remove the sequence from the stack when the parent |
| 210 | // has been processed |
| 211 | objectStack.push(sequence); |
| 212 | sequencedNodes.push(parent); |
| 213 | // Step II: decide where to put sequence |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 214 | putIntoSuperObject(sequence, 1); |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 215 | } |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 216 | } |
| 217 | } |
| 218 | } |
| 219 | |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 220 | // Nodes introducing tokens. Process all in the same manner, except for the fieldMap entry |
| Joachim Bingel | ffd65e3 | 2014-01-22 14:22:57 +0000 | [diff] [blame] | 221 | if (nodeCat.equals("OPWF") || nodeCat.equals("OPLEM")) { |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 222 | |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 223 | //Step I: get info |
| 224 | LinkedHashMap<String, Object> token = new LinkedHashMap<String, Object>(); |
| 225 | token.put("@type", "korap:token"); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 226 | objectStack.push(token); |
| 227 | stackedObjects++; |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 228 | LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>(); |
| 229 | token.put("@value", fieldMap); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 230 | |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 231 | fieldMap.put("@type", "korap:term"); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 232 | // make category-specific fieldMap entry |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 233 | String attr = nodeCat.equals("OPWF") ? "orth" : "lemma"; |
| 234 | String value = node.getChild(0).toStringTree().replaceAll("\"", ""); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 235 | fieldMap.put("@value", value); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 236 | fieldMap.put("@attr", attr); |
| Joachim Bingel | ffd65e3 | 2014-01-22 14:22:57 +0000 | [diff] [blame] | 237 | |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 238 | // negate field (see above) |
| 239 | if (negate) { |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 240 | fieldMap.put("@relation", "!="); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 241 | } else { |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 242 | fieldMap.put("@relation", "="); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 243 | } |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 244 | //Step II: decide where to put |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 245 | putIntoSuperObject(token, 1); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 246 | } |
| 247 | |
| Joachim Bingel | ffd65e3 | 2014-01-22 14:22:57 +0000 | [diff] [blame] | 248 | if (nodeCat.equals("OPMORPH")) { |
| 249 | //Step I: get info |
| 250 | LinkedHashMap<String, Object> token = new LinkedHashMap<String, Object>(); |
| 251 | token.put("@type", "korap:token"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 252 | LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>(); |
| 253 | token.put("@value", fieldMap); |
| Joachim Bingel | ffd65e3 | 2014-01-22 14:22:57 +0000 | [diff] [blame] | 254 | |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 255 | fieldMap.put("@type", "korap:term"); |
| 256 | // fieldMap.put("@value", "morph:"+node.getChild(0).toString().replace(" ", "_")); |
| 257 | fieldMap.put("@value", node.getChild(0).toString().replace(" ", "_")); |
| 258 | // make category-specific fieldMap entry |
| 259 | // negate field (see above) |
| 260 | if (negate) { |
| 261 | fieldMap.put("@relation", "!="); |
| Joachim Bingel | ffd65e3 | 2014-01-22 14:22:57 +0000 | [diff] [blame] | 262 | } else { |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 263 | fieldMap.put("@relation", "="); |
| Joachim Bingel | ffd65e3 | 2014-01-22 14:22:57 +0000 | [diff] [blame] | 264 | } |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 265 | // List<String> morphValues = QueryUtils.parseMorph(node.getChild(0).toStringTree()); |
| 266 | // System.err.println(morphValues); |
| 267 | // if (morphValues.size() == 1) { |
| 268 | // LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>(); |
| 269 | // token.put("@value", fieldMap); |
| 270 | // |
| 271 | // fieldMap.put("@type", "korap:term"); |
| 272 | // fieldMap.put("@value", morphValues.get(0)); |
| 273 | // // make category-specific fieldMap entry |
| 274 | // // negate field (see above) |
| 275 | // if (negate) { |
| 276 | // fieldMap.put("@relation", "!="); |
| 277 | // } else { |
| 278 | // fieldMap.put("@relation", "="); |
| 279 | // } |
| 280 | // } else { |
| 281 | // LinkedHashMap<String, Object> conjGroup = new LinkedHashMap<String, Object>(); |
| 282 | // token.put("@value", conjGroup); |
| 283 | // ArrayList<Object> conjOperands = new ArrayList<Object>(); |
| 284 | // conjGroup.put("@type", "korap:group"); |
| 285 | // conjGroup.put("@relation", "and"); |
| 286 | // conjGroup.put("@operands", conjOperands); |
| 287 | // for (String value : morphValues) { |
| 288 | // LinkedHashMap<String, Object> fieldMap = new LinkedHashMap<String, Object>(); |
| 289 | // token.put("@value", fieldMap); |
| 290 | // |
| 291 | // fieldMap.put("@type", "korap:term"); |
| 292 | // fieldMap.put("@value", value); |
| 293 | // // make category-specific fieldMap entry |
| 294 | // // negate field (see above) |
| 295 | // if (negate) { |
| 296 | // fieldMap.put("@relation", "!="); |
| 297 | // } else { |
| 298 | // fieldMap.put("@relation", "="); |
| 299 | // } |
| 300 | // } |
| 301 | // } |
| Joachim Bingel | ffd65e3 | 2014-01-22 14:22:57 +0000 | [diff] [blame] | 302 | |
| 303 | |
| 304 | //Step II: decide where to put |
| 305 | putIntoSuperObject(token, 0); |
| 306 | } |
| 307 | |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 308 | if (nodeCat.equals("OPELEM")) { |
| 309 | // Step I: create element |
| 310 | LinkedHashMap<String, Object> elem = new LinkedHashMap<String, Object>(); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 311 | elem.put("@type", "korap:span"); |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 312 | elem.put("@value", node.getChild(0).getChild(0).toStringTree().toLowerCase()); |
| 313 | //Step II: decide where to put |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 314 | putIntoSuperObject(elem); |
| 315 | } |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 316 | |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 317 | if (nodeCat.equals("OPLABEL")) { |
| 318 | // Step I: create element |
| 319 | LinkedHashMap<String, Object> elem = new LinkedHashMap<String, Object>(); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 320 | elem.put("@type", "korap:span"); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 321 | elem.put("@value", node.getChild(0).toStringTree().replaceAll("<|>", "")); |
| 322 | //Step II: decide where to put |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 323 | putIntoSuperObject(elem); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 324 | } |
| 325 | |
| Joachim Bingel | 3a9f793 | 2014-01-07 17:11:31 +0000 | [diff] [blame] | 326 | if (nodeCat.equals("OPOR") || nodeCat.equals("OPAND") || nodeCat.equals("OPNOT")) { |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 327 | // Step I: create group |
| 328 | LinkedHashMap<String, Object> disjunction = new LinkedHashMap<String, Object>(); |
| 329 | disjunction.put("@type", "korap:group"); |
| Joachim Bingel | 3a9f793 | 2014-01-07 17:11:31 +0000 | [diff] [blame] | 330 | String relation = "or"; |
| 331 | if (nodeCat.equals("OPAND")) relation = "and"; |
| 332 | if (nodeCat.equals("OPNOT")) relation = "not"; |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 333 | disjunction.put("@relation", relation); |
| 334 | disjunction.put("@operands", new ArrayList<Object>()); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 335 | objectStack.push(disjunction); |
| 336 | stackedObjects++; |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 337 | // Step II: decide where to put |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 338 | putIntoSuperObject(disjunction, 1); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 339 | } |
| 340 | |
| 341 | if (nodeCat.equals("OPPROX")) { |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 342 | //TODO direction "both": wrap in "or" group with operands once flipped, once not |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 343 | // collect info |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 344 | Tree prox_opts = node.getChild(0); |
| 345 | Tree typ = prox_opts.getChild(0); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 346 | Tree dist_list = prox_opts.getChild(1); |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 347 | // Step I: create group |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 348 | LinkedHashMap<String, Object> proxSequence = new LinkedHashMap<String, Object>(); |
| 349 | proxSequence.put("@type", "korap:sequence"); |
| 350 | objectStack.push(proxSequence); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 351 | stackedObjects++; |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 352 | // if (openNodeCats.get(1).equals("OPALL")) proxSequence.put("match", "all"); |
| 353 | // else if (openNodeCats.get(1).equals("OPNHIT")) proxSequence.put("match", "between"); |
| 354 | // else proxSequence.put("match", "operands"); |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 355 | ArrayList<Object> constraints = new ArrayList<Object>(); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 356 | boolean exclusion = ! typ.getChild(0).toStringTree().equals("PROX"); |
| 357 | |
| 358 | String inOrder = "true"; |
| 359 | proxSequence.put("@inOrder", inOrder); |
| 360 | proxSequence.put("@constraints", constraints); |
| 361 | |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 362 | ArrayList<Object> operands = new ArrayList<Object>(); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 363 | proxSequence.put("@operands", operands); |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 364 | |
| 365 | // if only one dist_info, put directly into constraints |
| 366 | if (dist_list.getChildCount()==1) { |
| 367 | String direction = dist_list.getChild(0).getChild(0).getChild(0).toStringTree().toLowerCase(); |
| 368 | String min = dist_list.getChild(0).getChild(1).getChild(0).toStringTree(); |
| 369 | String max = dist_list.getChild(0).getChild(1).getChild(1).toStringTree(); |
| 370 | String meas = dist_list.getChild(0).getChild(2).getChild(0).toStringTree(); |
| 371 | if (min.equals("VAL0")) { |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 372 | min="0"; |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 373 | } |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 374 | if (direction.equals("minus")) { |
| 375 | direction = "plus"; |
| 376 | invertedOperandsLists.add(operands); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 377 | } else if (direction.equals("both")) { |
| 378 | inOrder="false"; |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 379 | } |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 380 | LinkedHashMap<String, Object> distance = new LinkedHashMap<String, Object>(); |
| 381 | distance.put("@type", "korap:distance"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 382 | distance.put("@measure", meas); |
| 383 | distance.put("@min", Integer.parseInt(min)); |
| 384 | distance.put("@max", Integer.parseInt(max)); |
| 385 | if (exclusion) { |
| 386 | distance.put("@exclude", exclusion); |
| 387 | } |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 388 | constraints.add(distance); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 389 | proxSequence.put("@inOrder", inOrder); |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 390 | } |
| 391 | // otherwise, create group and add info there |
| 392 | else { |
| 393 | LinkedHashMap<String, Object> distanceGroup = new LinkedHashMap<String, Object>(); |
| 394 | ArrayList<Object> groupOperands = new ArrayList<Object>(); |
| 395 | distanceGroup.put("@type", "korap:group"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 396 | distanceGroup.put("@relation", "and"); |
| 397 | distanceGroup.put("@operands", groupOperands); |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 398 | constraints.add(distanceGroup); |
| 399 | for (int i=0; i<dist_list.getChildCount(); i++) { |
| 400 | String direction = dist_list.getChild(i).getChild(0).getChild(0).toStringTree().toLowerCase(); |
| 401 | String min = dist_list.getChild(i).getChild(1).getChild(0).toStringTree(); |
| 402 | String max = dist_list.getChild(i).getChild(1).getChild(1).toStringTree(); |
| 403 | String meas = dist_list.getChild(i).getChild(2).getChild(0).toStringTree(); |
| 404 | if (min.equals("VAL0")) { |
| 405 | min=max; |
| 406 | } |
| 407 | LinkedHashMap<String, Object> distance = new LinkedHashMap<String, Object>(); |
| 408 | distance.put("@type", "korap:distance"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 409 | distance.put("@measure", meas); |
| 410 | distance.put("@min", min); |
| 411 | distance.put("@max", max); |
| 412 | if (exclusion) { |
| 413 | distance.put("@exclude", exclusion); |
| 414 | } |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 415 | groupOperands.add(distance); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 416 | if (direction.equals("plus")) { |
| 417 | inOrder="true"; |
| 418 | } else if (direction.equals("minus")) { |
| 419 | inOrder="true"; |
| 420 | invertedOperandsLists.add(operands); |
| 421 | } |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 422 | } |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 423 | proxSequence.put("@inOrder", inOrder); |
| 424 | |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 425 | } |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 426 | // Step II: decide where to put |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 427 | putIntoSuperObject(proxSequence, 1); |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 428 | } |
| 429 | |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 430 | // inlcusion or overlap |
| 431 | if (nodeCat.equals("OPIN") || nodeCat.equals("OPOV")) { |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 432 | // Step I: create group |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 433 | LinkedHashMap<String, Object> submatchgroup = new LinkedHashMap<String, Object>(); |
| 434 | submatchgroup.put("@type", "korap:group"); |
| 435 | submatchgroup.put("@relation", "submatch"); |
| 436 | submatchgroup.put("@classRef", "1"); |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 437 | |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 438 | ArrayList<Object> submatchoperands = new ArrayList<Object>(); |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 439 | LinkedHashMap<String, Object> posgroup = new LinkedHashMap<String, Object>(); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 440 | submatchgroup.put("@operands", submatchoperands); |
| 441 | submatchoperands.add(posgroup); |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 442 | posgroup.put("@type", "korap:group"); |
| Joachim Bingel | 84e33df | 2014-01-31 14:02:46 +0000 | [diff] [blame] | 443 | // String relation = nodeCat.equals("OPIN") ? "position" : "overlaps"; |
| 444 | posgroup.put("@relation", "position"); |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 445 | |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 446 | if (nodeCat.equals("OPIN")) { |
| 447 | parseOPINOptions(node, posgroup); |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 448 | } else { |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 449 | parseOPOVOptions(node, posgroup); |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 450 | } |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 451 | |
| 452 | |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 453 | ArrayList<Object> posoperands = new ArrayList<Object>(); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 454 | posgroup.put("@operands", posoperands); |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 455 | objectStack.push(posgroup); |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 456 | // mark this an inverted list |
| 457 | invertedOperandsLists.push(posoperands); |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 458 | stackedObjects++; |
| 459 | |
| 460 | // Step II: decide where to put |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 461 | putIntoSuperObject(submatchgroup, 1); |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 462 | } |
| 463 | |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 464 | |
| 465 | // Wrap the first argument of an #IN operator in a class group |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 466 | if (nodeCat.equals("ARG1") && (openNodeCats.get(1).equals("OPIN") || openNodeCats.get(1).equals("OPOV"))) { |
| 467 | // Step I: create group |
| 468 | LinkedHashMap<String, Object> classGroup = new LinkedHashMap<String, Object>(); |
| 469 | classGroup.put("@type", "korap:group"); |
| 470 | classGroup.put("class", "1"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 471 | classGroup.put("@operands", new ArrayList<Object>()); |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 472 | objectStack.push(classGroup); |
| 473 | stackedObjects++; |
| 474 | // Step II: decide where to put |
| 475 | putIntoSuperObject(classGroup, 1); |
| 476 | } |
| 477 | |
| 478 | |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 479 | if (nodeCat.equals("OPALL") || nodeCat.equals("OPNHIT")) { |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 480 | // proxGroupMatching = nodeCat.equals("OPALL") ? "all" : "exlcude"; |
| Joachim Bingel | 89cceac | 2014-01-08 15:51:08 +0000 | [diff] [blame] | 481 | } |
| 482 | |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 483 | if (nodeCat.equals("OPEND") || nodeCat.equals("OPBEG")) { |
| 484 | // Step I: create group |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 485 | LinkedHashMap<String, Object> beggroup = new LinkedHashMap<String, Object>(); |
| 486 | beggroup.put("@type", "korap:group"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 487 | beggroup.put("@relation", "submatch"); |
| 488 | ArrayList<Integer> spanRef = new ArrayList<Integer>(); |
| 489 | if (nodeCat.equals("OPBEG")) { |
| 490 | spanRef.add(0); spanRef.add(1); |
| 491 | } else { |
| 492 | spanRef.add(-1); spanRef.add(1); |
| 493 | } |
| 494 | beggroup.put("@spanRef", spanRef); |
| 495 | beggroup.put("@operands", new ArrayList<Object>()); |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 496 | objectStack.push(beggroup); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 497 | stackedObjects++; |
| 498 | |
| 499 | // Step II: decide where to put |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 500 | putIntoSuperObject(beggroup, 1); |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 501 | } |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 502 | |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 503 | if (nodeCat.equals("OPBED")) { |
| 504 | // Step I: create group |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 505 | int optsChild = node.getChildCount()-1; |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 506 | Tree conditions = node.getChild(optsChild).getChild(0); |
| 507 | // Distinguish two cases. Normal case: query has just one condition, like #BED(XY, sa) ... |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 508 | if (conditions.getChildCount()==1) { |
| 509 | LinkedHashMap<String, Object> posgroup = new LinkedHashMap<String, Object>(); |
| 510 | posgroup.put("@type", "korap:group"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 511 | posgroup.put("@relation", "position"); |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 512 | CosmasCondition c = new CosmasCondition(conditions.getChild(0)); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 513 | posgroup.put("@position", c.position); |
| 514 | if (c.negated) posgroup.put("@relation", "!="); |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 515 | ArrayList<Object> operands = new ArrayList<Object>(); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 516 | posgroup.put("@operands", operands); |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 517 | LinkedHashMap<String, Object> bedElem = new LinkedHashMap<String, Object>(); |
| 518 | operands.add(bedElem); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 519 | bedElem.put("@type", "korap:span"); |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 520 | bedElem.put("@value", c.elem); |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 521 | objectStack.push(posgroup); |
| 522 | stackedObjects++; |
| 523 | // Step II: decide where to put |
| 524 | putIntoSuperObject(posgroup, 1); |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 525 | // ... or the query has several conditions specified, like #BED(XY, sa,-pa). In that case, |
| 526 | // create an 'and' group and embed the position groups in its operands |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 527 | } else { |
| 528 | // node has several conditions (like 'sa, -pa') |
| 529 | // -> create 'and' group and embed all position groups there |
| 530 | LinkedHashMap<String, Object> conjunct = new LinkedHashMap<String, Object>(); |
| 531 | conjunct.put("@type", "korap:group"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 532 | conjunct.put("@relation", "and"); |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 533 | ArrayList<Object> operands = new ArrayList<Object>(); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 534 | conjunct.put("@operands", operands); |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 535 | ArrayList<ArrayList<Object>> distributedOperands = new ArrayList<ArrayList<Object>>(); |
| 536 | |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 537 | for (int i=0; i<conditions.getChildCount(); i++) { |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 538 | // for each condition, create a position group. problem: how to get argument into every operands list? |
| 539 | // -> use distributedOperandsLists |
| 540 | LinkedHashMap<String, Object> posGroup = new LinkedHashMap<String, Object>(); |
| 541 | operands.add(posGroup); |
| 542 | |
| 543 | CosmasCondition c = new CosmasCondition(conditions.getChild(i)); |
| 544 | posGroup.put("@type", "korap:group"); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 545 | posGroup.put("@relation", "position"); |
| 546 | posGroup.put("@position", c.position); |
| 547 | if (c.negated) posGroup.put("@exclude", "true"); |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 548 | ArrayList<Object> posOperands = new ArrayList<Object>(); |
| 549 | distributedOperands.add(posOperands); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 550 | posGroup.put("@operands", posOperands); |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 551 | LinkedHashMap<String, Object> bedElem = new LinkedHashMap<String, Object>(); |
| 552 | posOperands.add(bedElem); |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 553 | bedElem.put("@type", "korap:span"); |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 554 | bedElem.put("@value", c.elem); |
| 555 | |
| 556 | |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 557 | } |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 558 | putIntoSuperObject(conjunct, 0); |
| 559 | distributedOperandsLists.push(distributedOperands); |
| Joachim Bingel | 3f0850c | 2014-01-17 16:50:10 +0000 | [diff] [blame] | 560 | } |
| 561 | |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 562 | } |
| 563 | |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 564 | |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 565 | objectsToPop.push(stackedObjects); |
| 566 | |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 567 | /* |
| 568 | **************************************************************** |
| 569 | **************************************************************** |
| 570 | * recursion until 'request' node (root of tree) is processed * |
| 571 | **************************************************************** |
| 572 | **************************************************************** |
| 573 | */ |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 574 | for (int i=0; i<node.getChildCount(); i++) { |
| 575 | Tree child = node.getChild(i); |
| 576 | processNode(child); |
| 577 | } |
| 578 | |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 579 | /* |
| 580 | ************************************************************** |
| 581 | * Stuff that happens after processing the children of a node * |
| 582 | ************************************************************** |
| 583 | */ |
| 584 | |
| Joachim Bingel | d5161a1 | 2014-01-08 11:15:49 +0000 | [diff] [blame] | 585 | // remove sequence from object stack if node is implicitly sequenced |
| 586 | if (sequencedNodes.size()>0) { |
| 587 | if (node == sequencedNodes.getFirst()) { |
| 588 | objectStack.pop(); |
| 589 | sequencedNodes.pop(); |
| 590 | } |
| 591 | } |
| 592 | |
| Joachim Bingel | eecc765 | 2014-01-11 17:21:07 +0000 | [diff] [blame] | 593 | for (int i=0; i<objectsToPop.get(0); i++) { |
| 594 | objectStack.pop(); |
| 595 | } |
| 596 | objectsToPop.pop(); |
| 597 | |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 598 | if (nodeCat.equals("ARG2") && openNodeCats.get(1).equals("OPNOT")) { |
| 599 | negate = false; |
| 600 | } |
| 601 | |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 602 | openNodeCats.pop(); |
| 603 | |
| 604 | } |
| 605 | |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 606 | |
| Joachim Bingel | ffd65e3 | 2014-01-22 14:22:57 +0000 | [diff] [blame] | 607 | |
| 608 | |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 609 | private void parseOPINOptions(Tree node, LinkedHashMap<String, Object> posgroup) { |
| 610 | Tree posnode = QueryUtils.getFirstChildWithCat(node, "POS"); |
| 611 | Tree rangenode = QueryUtils.getFirstChildWithCat(node, "RANGE"); |
| 612 | Tree exclnode = QueryUtils.getFirstChildWithCat(node, "EXCL"); |
| 613 | Tree groupnode = QueryUtils.getFirstChildWithCat(node, "GROUP"); |
| 614 | boolean negatePosition = false; |
| 615 | |
| 616 | String position = ""; |
| 617 | if (posnode != null) { |
| 618 | String value = posnode.getChild(0).toStringTree(); |
| Joachim Bingel | 84e33df | 2014-01-31 14:02:46 +0000 | [diff] [blame] | 619 | position = translateTextAreaArgument(value, "in"); |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 620 | if (value.equals("N")) { |
| 621 | negatePosition = !negatePosition; |
| 622 | } |
| 623 | } else { |
| 624 | position = "contains"; |
| 625 | } |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 626 | posgroup.put("@position", position); |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 627 | position = openNodeCats.get(1).equals("OPIN") ? "contains" : "full"; |
| 628 | |
| 629 | if (rangenode != null) { |
| 630 | String range = rangenode.getChild(0).toStringTree(); |
| 631 | posgroup.put("range", range.toLowerCase()); |
| 632 | } |
| 633 | |
| 634 | if (exclnode != null) { |
| 635 | if (exclnode.getChild(0).toStringTree().equals("YES")) { |
| 636 | negatePosition = !negatePosition; |
| 637 | } |
| 638 | } |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 639 | System.err.println(negatePosition); |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 640 | if (negatePosition) { |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 641 | posgroup.put("@exclude", "true"); |
| 642 | // negate = !negate; |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 643 | } |
| 644 | |
| 645 | if (groupnode != null) { |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 646 | String grouping = groupnode.getChild(0).toStringTree().equals("max") ? "true" : "false"; |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 647 | posgroup.put("grouping", grouping); |
| 648 | } |
| 649 | } |
| 650 | |
| 651 | private void parseOPOVOptions(Tree node, LinkedHashMap<String, Object> posgroup) { |
| 652 | Tree posnode = QueryUtils.getFirstChildWithCat(node, "POS"); |
| 653 | Tree exclnode = QueryUtils.getFirstChildWithCat(node, "EXCL"); |
| 654 | Tree groupnode = QueryUtils.getFirstChildWithCat(node, "GROUP"); |
| 655 | |
| 656 | String position = ""; |
| 657 | if (posnode != null) { |
| 658 | String value = posnode.getChild(0).toStringTree(); |
| Joachim Bingel | 84e33df | 2014-01-31 14:02:46 +0000 | [diff] [blame] | 659 | position = "-"+translateTextAreaArgument(value, "ov"); |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 660 | } |
| Joachim Bingel | 84e33df | 2014-01-31 14:02:46 +0000 | [diff] [blame] | 661 | posgroup.put("@position", "overlaps"+position); |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 662 | |
| 663 | if (exclnode != null) { |
| 664 | if (exclnode.getChild(0).toStringTree().equals("YES")) { |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 665 | posgroup.put("@relation", "!="); |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 666 | } |
| 667 | } |
| 668 | if (groupnode != null) { |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 669 | String grouping = groupnode.getChild(0).toStringTree().equals("@max") ? "true" : "false"; |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 670 | posgroup.put("grouping", grouping); |
| 671 | } |
| 672 | |
| 673 | } |
| 674 | |
| 675 | /** |
| 676 | * Translates the text area specifications (position option arguments) to terms used in serealisation. |
| 677 | * For the allowed argument types and their values for OPIN and OPOV, see |
| 678 | * http://www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/ARGUMENT_I.html or |
| 679 | * http://www.ids-mannheim.de/cosmas2/win-app/hilfe/suchanfrage/eingabe-grafisch/syntax/ARGUMENT_O.html, respectively. |
| 680 | * @param argument |
| Joachim Bingel | 84e33df | 2014-01-31 14:02:46 +0000 | [diff] [blame] | 681 | * @param mode |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 682 | * @return |
| 683 | */ |
| Joachim Bingel | 84e33df | 2014-01-31 14:02:46 +0000 | [diff] [blame] | 684 | private String translateTextAreaArgument(String argument, String mode) { |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 685 | String position = ""; |
| 686 | switch (argument) { |
| 687 | case "L": |
| Joachim Bingel | 84e33df | 2014-01-31 14:02:46 +0000 | [diff] [blame] | 688 | position = mode.equals("in") ? "startswith" : "left"; |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 689 | break; |
| 690 | case "R": |
| Joachim Bingel | 84e33df | 2014-01-31 14:02:46 +0000 | [diff] [blame] | 691 | position = mode.equals("in") ? "endswith" : "right"; |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 692 | break; |
| 693 | case "F": |
| 694 | position = "leftrightmatch"; |
| 695 | break; |
| 696 | case "FE": |
| Joachim Bingel | 84e33df | 2014-01-31 14:02:46 +0000 | [diff] [blame] | 697 | position = "matches"; |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 698 | break; |
| 699 | case "FI": |
| 700 | position = "leftrightmatch-noident"; |
| 701 | break; |
| 702 | case "N": // for OPIN only - exclusion constraint formulated in parseOPINOptions |
| 703 | position = "leftrightmatch"; |
| 704 | break; |
| 705 | case "X": // for OPOV only |
| 706 | position = "residual"; |
| 707 | break; |
| 708 | } |
| 709 | return position; |
| 710 | } |
| 711 | |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 712 | @SuppressWarnings("unchecked") |
| 713 | private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) { |
| Joachim Bingel | e98d088 | 2014-01-21 12:58:54 +0000 | [diff] [blame] | 714 | if (distributedOperandsLists.size()>0) { |
| 715 | ArrayList<ArrayList<Object>> distributedOperands = distributedOperandsLists.pop(); |
| 716 | for (ArrayList<Object> operands : distributedOperands) { |
| 717 | operands.add(object); |
| 718 | } |
| 719 | } else if (objectStack.size()>objStackPosition) { |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 720 | ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("@operands"); |
| Joachim Bingel | cc1dc24 | 2014-01-15 09:32:38 +0000 | [diff] [blame] | 721 | if (!invertedOperandsLists.contains(topObjectOperands)) { |
| 722 | topObjectOperands.add(object); |
| 723 | } else { |
| 724 | topObjectOperands.add(0, object); |
| 725 | } |
| 726 | |
| 727 | } else { |
| 728 | requestMap.put("query", object); |
| 729 | } |
| 730 | } |
| 731 | |
| 732 | private void putIntoSuperObject(LinkedHashMap<String, Object> object) { |
| 733 | putIntoSuperObject(object, 0); |
| 734 | } |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 735 | |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 736 | |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 737 | private static Tree parseCosmasQuery(String p) throws RecognitionException { |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 738 | Tree tree = null; |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 739 | ANTLRStringStream ss = new ANTLRStringStream(p); |
| 740 | c2psLexer lex = new c2psLexer(ss); |
| 741 | org.antlr.runtime.CommonTokenStream tokens = new org.antlr.runtime.CommonTokenStream(lex); //v3 |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 742 | cosmasParser = new c2psParser(tokens); |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 743 | c2psParser.c2ps_query_return c2Return = cosmasParser.c2ps_query(); // statt t(). |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 744 | // AST Tree anzeigen: |
| 745 | tree = (Tree)c2Return.getTree(); |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 746 | |
| 747 | String treestring = tree.toStringTree(); |
| 748 | if (treestring.contains("<mismatched token") || treestring.contains("<error") || treestring.contains("<unexpected")) { |
| 749 | throw new RecognitionException(); |
| 750 | } |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 751 | return tree; |
| 752 | } |
| 753 | |
| 754 | /** |
| 755 | * @param args |
| 756 | */ |
| 757 | public static void main(String[] args) { |
| 758 | /* |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 759 | * For debugging |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 760 | */ |
| 761 | String[] queries = new String[] { |
| 762 | /* COSMAS 2 */ |
| Joachim Bingel | ba9a0ab | 2014-01-29 10:12:25 +0000 | [diff] [blame] | 763 | "MORPH(V)", |
| 764 | "MORPH(V PRES)", |
| 765 | "wegen #IN(%, L) <s>", |
| 766 | "wegen #IN(%) <s>", |
| 767 | "(Mann oder Frau) #IN <s>", |
| Joachim Bingel | 84e33df | 2014-01-31 14:02:46 +0000 | [diff] [blame] | 768 | "#BEG(der /w3:5 Mann) /+w10 kommt", |
| 769 | "&würde /w0 MORPH(V)" |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 770 | }; |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 771 | CosmasTree.debug=true; |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 772 | for (String q : queries) { |
| 773 | try { |
| 774 | System.out.println(q); |
| Joachim Bingel | 87480d0 | 2014-01-17 14:07:46 +0000 | [diff] [blame] | 775 | try { |
| 776 | System.out.println(parseCosmasQuery(q).toStringTree()); |
| 777 | @SuppressWarnings("unused") |
| 778 | CosmasTree act = new CosmasTree(q); |
| 779 | } catch (RecognitionException e) { |
| 780 | e.printStackTrace(); |
| 781 | } catch (QueryException e) { |
| 782 | e.printStackTrace(); |
| 783 | } |
| Joachim Bingel | 5c93f90 | 2013-11-19 14:49:04 +0000 | [diff] [blame] | 784 | System.out.println(); |
| 785 | |
| 786 | } catch (NullPointerException npe) { |
| 787 | npe.printStackTrace(); |
| 788 | System.out.println("null\n"); |
| 789 | } |
| 790 | } |
| 791 | } |
| 792 | } |