| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.serialize; |
| 2 | |
| Michael Hanl | 4dc6679 | 2014-02-14 10:47:58 +0000 | [diff] [blame] | 3 | import de.ids_mannheim.korap.util.QueryException; |
| Michael Hanl | 4dc6679 | 2014-02-14 10:47:58 +0000 | [diff] [blame] | 4 | import org.apache.commons.lang.StringUtils; |
| 5 | |
| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 6 | import java.util.ArrayList; |
| 7 | import java.util.LinkedHashMap; |
| 8 | import java.util.List; |
| 9 | import java.util.Map; |
| Joachim Bingel | b5f7bf0 | 2014-01-07 16:36:54 +0000 | [diff] [blame] | 10 | |
| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 11 | /** |
| 12 | * @author hanl |
| 13 | * @date 10/12/2013 |
| 14 | */ |
| 15 | public class QueryUtils { |
| Joachim Bingel | c8a28e4 | 2014-04-24 15:06:42 +0000 | [diff] [blame] | 16 | // |
| 17 | // /** |
| 18 | // * Returns the category (or 'label') of the root of a ParseTree (ANTLR 4). |
| 19 | // * |
| 20 | // * @param node |
| 21 | // * @return |
| 22 | // */ |
| 23 | // public static String getNodeCat(ParseTree node) { |
| 24 | // String nodeCat = node.toStringTree(parser); |
| 25 | // Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace |
| 26 | // Matcher m = p.matcher(node.toStringTree(parser)); |
| 27 | // if (m.find()) { |
| 28 | // nodeCat = m.group(1); |
| 29 | // } |
| 30 | // return nodeCat; |
| 31 | // } |
| 32 | // |
| 33 | // /** |
| 34 | // * Returns the category (or 'label') of the root of a ParseTree (ANTLR 3). |
| 35 | // * |
| 36 | // * @param node |
| 37 | // * @return |
| 38 | // */ |
| 39 | // public static String getNodeCat(Tree node) { |
| 40 | // String nodeCat = node.toStringTree(); |
| 41 | // Pattern p = Pattern.compile("\\((.*?)\\s"); // from opening parenthesis to 1st whitespace |
| 42 | // Matcher m = p.matcher(node.toStringTree()); |
| 43 | // if (m.find()) { |
| 44 | // nodeCat = m.group(1); |
| 45 | // } |
| 46 | // return nodeCat; |
| 47 | // } |
| 48 | // |
| 49 | // |
| 50 | // /** |
| 51 | // * Tests whether a certain node has a child by a certain name |
| 52 | // * |
| 53 | // * @param node The parent node. |
| 54 | // * @param childCat The category of the potential child. |
| 55 | // * @return true iff one or more children belong to the specified category |
| 56 | // */ |
| 57 | // public static boolean hasChild(Tree node, String childCat) { |
| 58 | // for (int i = 0; i < node.getChildCount(); i++) { |
| 59 | // if (getNodeCat(node.getChild(i)).equals(childCat)) { |
| 60 | // return true; |
| 61 | // } |
| 62 | // } |
| 63 | // return false; |
| 64 | // } |
| 65 | // |
| 66 | // /** |
| 67 | // * Tests whether a certain node has a child by a certain name |
| 68 | // * |
| 69 | // * @param node The parent node. |
| 70 | // * @param childCat The category of the potential child. |
| 71 | // * @return true iff one or more children belong to the specified category |
| 72 | // */ |
| 73 | // public static boolean hasChild(ParseTree node, String childCat) { |
| 74 | // for (int i = 0; i < node.getChildCount(); i++) { |
| 75 | // if (getNodeCat(node.getChild(i)).equals(childCat)) { |
| 76 | // return true; |
| 77 | // } |
| 78 | // } |
| 79 | // return false; |
| 80 | // } |
| 81 | // |
| 82 | // public static boolean hasDescendant(ParseTree node, String childCat) { |
| 83 | // for (int i = 0; i < node.getChildCount(); i++) { |
| 84 | // ParseTree child = node.getChild(i); |
| 85 | // if (getNodeCat(child).equals(childCat)) { |
| 86 | // return true; |
| 87 | // } |
| 88 | // if (hasDescendant(child, childCat)) { |
| 89 | // return true; |
| 90 | // } |
| 91 | // } |
| 92 | // return false; |
| 93 | // } |
| 94 | // |
| 95 | // public static List<Tree> getChildrenWithCat(Tree node, String nodeCat) { |
| 96 | // ArrayList<Tree> children = new ArrayList<Tree>(); |
| 97 | // for (int i = 0; i < node.getChildCount(); i++) { |
| 98 | // if (getNodeCat(node.getChild(i)).equals(nodeCat)) { |
| 99 | // children.add(node.getChild(i)); |
| 100 | // } |
| 101 | // } |
| 102 | // return children; |
| 103 | // } |
| 104 | // |
| 105 | // public static List<ParseTree> getChildrenWithCat(ParseTree node, String nodeCat) { |
| 106 | // ArrayList<ParseTree> children = new ArrayList<ParseTree>(); |
| 107 | // for (int i = 0; i < node.getChildCount(); i++) { |
| 108 | // if (getNodeCat(node.getChild(i)).equals(nodeCat)) { |
| 109 | // children.add(node.getChild(i)); |
| 110 | // } |
| 111 | // } |
| 112 | // return children; |
| 113 | // } |
| 114 | // |
| 115 | // public static List<ParseTree> getChildren(ParseTree node) { |
| 116 | // ArrayList<ParseTree> children = new ArrayList<ParseTree>(); |
| 117 | // for (int i = 0; i < node.getChildCount(); i++) { |
| 118 | // children.add(node.getChild(i)); |
| 119 | // } |
| 120 | // return children; |
| 121 | // } |
| 122 | // |
| 123 | // public static Tree getFirstChildWithCat(Tree node, String nodeCat) { |
| 124 | // for (int i = 0; i < node.getChildCount(); i++) { |
| 125 | // if (getNodeCat(node.getChild(i)).equals(nodeCat)) { |
| 126 | // return node.getChild(i); |
| 127 | // } |
| 128 | // } |
| 129 | // return null; |
| 130 | // } |
| 131 | // |
| 132 | // public static ParseTree getFirstChildWithCat(ParseTree node, String nodeCat) { |
| 133 | // for (int i = 0; i < node.getChildCount(); i++) { |
| 134 | // if (getNodeCat(node.getChild(i)).equals(nodeCat)) { |
| 135 | // return node.getChild(i); |
| 136 | // } |
| 137 | // } |
| 138 | // return null; |
| 139 | // } |
| 140 | // |
| 141 | // /** |
| 142 | // * Checks whether a node only serves as a container for another node (e.g. in (cq_segment ( cg_seg_occ ...)), the cq_segment node does not contain |
| 143 | // * any information and only contains the cq_seg_occ node. |
| 144 | // * @param node The node to check |
| 145 | // * @return true iff the node is a container only. |
| 146 | // */ |
| 147 | // public static boolean isContainerOnly(ParseTree node) { |
| 148 | // String[] validNodeNamesArray = "cq_segment sq_segment element empty_segments".split(" "); |
| 149 | // List<String> validNodeNames = Arrays.asList(validNodeNamesArray); |
| 150 | // List<ParseTree> children = getChildren(node); |
| 151 | // for (ParseTree child : children) { |
| 152 | // if (validNodeNames.contains(getNodeCat(child))) { |
| 153 | // return false; |
| 154 | // } |
| 155 | // } |
| 156 | // return true; |
| 157 | // } |
| Michael Hanl | 4dc6679 | 2014-02-14 10:47:58 +0000 | [diff] [blame] | 158 | |
| 159 | public static void checkUnbalancedPars(String q) throws QueryException { |
| 160 | int openingPars = StringUtils.countMatches(q, "("); |
| 161 | int closingPars = StringUtils.countMatches(q, ")"); |
| 162 | int openingBrkts = StringUtils.countMatches(q, "["); |
| 163 | int closingBrkts = StringUtils.countMatches(q, "]"); |
| 164 | int openingBrcs = StringUtils.countMatches(q, "{"); |
| 165 | int closingBrcs = StringUtils.countMatches(q, "}"); |
| 166 | if (openingPars != closingPars) throw new QueryException( |
| 167 | "Your query string contains an unbalanced number of parantheses."); |
| 168 | if (openingBrkts != closingBrkts) throw new QueryException( |
| 169 | "Your query string contains an unbalanced number of brackets."); |
| 170 | if (openingBrcs != closingBrcs) throw new QueryException( |
| 171 | "Your query string contains an unbalanced number of braces."); |
| 172 | } |
| 173 | |
| 174 | public static List<String> parseMorph(String stringTree) { |
| 175 | |
| 176 | ArrayList<String> morph = new ArrayList<String>(); |
| 177 | return morph; |
| 178 | } |
| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 179 | |
| 180 | |
| 181 | public static String buildCypherQuery(String cypher, String ctypel, String ctyper, |
| Michael Hanl | 4dc6679 | 2014-02-14 10:47:58 +0000 | [diff] [blame] | 182 | int cl, int cr, int page, int limit) { |
| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 183 | //todo: implies that there is only one type allowed! |
| 184 | String sctypel = "", sctyper = ""; |
| 185 | switch (ctypel) { |
| 186 | case "C": |
| 187 | sctypel = "chars"; |
| 188 | break; |
| 189 | case "T": |
| 190 | sctypel = "tokens"; |
| 191 | break; |
| 192 | } |
| 193 | switch (ctyper) { |
| 194 | case "C": |
| 195 | sctyper = "chars"; |
| 196 | break; |
| 197 | case "T": |
| 198 | sctyper = "tokens"; |
| 199 | break; |
| 200 | } |
| 201 | |
| 202 | StringBuffer buffer = new StringBuffer(); |
| 203 | buffer.append("<query><cypher><![CDATA["); |
| 204 | buffer.append(cypher); |
| 205 | buffer.append("]]></cypher>"); |
| 206 | buffer.append("<wordAliasPrefix>wtok_</wordAliasPrefix>"); |
| 207 | buffer.append("<contextColumn>sent</contextColumn>"); |
| 208 | buffer.append("<contextIdColumn>sid</contextIdColumn>"); |
| 209 | buffer.append("<textColumn>txt</textColumn>"); |
| 210 | buffer.append("<startIndex>"); |
| 211 | buffer.append(page); |
| 212 | buffer.append("</startIndex>"); |
| 213 | buffer.append("<itemsPerPage>"); |
| 214 | buffer.append(limit); |
| 215 | buffer.append("</itemsPerPage>"); |
| 216 | buffer.append("<context>"); |
| 217 | buffer.append("<left>"); |
| 218 | buffer.append("<" + sctypel + ">"); |
| 219 | buffer.append(cl); |
| 220 | buffer.append("</" + sctypel + ">"); |
| 221 | buffer.append("</left>"); |
| 222 | buffer.append("<right>"); |
| 223 | buffer.append("<" + sctyper + ">"); |
| 224 | buffer.append(cr); |
| 225 | buffer.append("</" + sctyper + ">"); |
| 226 | buffer.append("</right>"); |
| 227 | buffer.append("</context>"); |
| 228 | buffer.append("</query>"); |
| 229 | return buffer.toString(); |
| 230 | } |
| 231 | |
| 232 | public static String buildDotQuery(long sid, String graphdb_id) { |
| 233 | StringBuffer b = new StringBuffer(); |
| 234 | b.append("<query>"); |
| 235 | b.append("<sentenceId>"); |
| 236 | b.append(sid); |
| 237 | b.append("</sentenceId>"); |
| 238 | b.append("<gdbId>"); |
| 239 | b.append(graphdb_id); |
| 240 | b.append("</gdbId>"); |
| 241 | b.append("<hls>"); |
| 242 | b.append("<hl>"); |
| 243 | b.append(40857); |
| 244 | b.append("</hl>"); |
| 245 | b.append("<hl>"); |
| 246 | b.append(40856); |
| 247 | b.append("</hl>"); |
| 248 | b.append("</hls>"); |
| 249 | b.append("</query>"); |
| 250 | |
| 251 | return b.toString(); |
| 252 | } |
| 253 | |
| 254 | public String buildaggreQuery(String query) { |
| 255 | StringBuffer b = new StringBuffer(); |
| 256 | b.append("<query><cypher><![CDATA["); |
| 257 | b.append(query); |
| 258 | b.append("]]></cypher>"); |
| 259 | b.append("<columns>"); |
| 260 | b.append("<column agg='true' sum='false'>"); |
| 261 | b.append("<cypherAlias>"); |
| 262 | b.append("aggBy"); |
| 263 | b.append("</cypherAlias>"); |
| 264 | b.append("<displayName>"); |
| 265 | b.append("Aggregate"); |
| 266 | b.append("</displayName>"); |
| 267 | b.append("</column>"); |
| 268 | |
| 269 | b.append("<column agg='fals' sum='true'>"); |
| 270 | b.append("<cypherAlias>"); |
| 271 | b.append("cnt"); |
| 272 | b.append("</cypherAlias>"); |
| 273 | b.append("<displayName>"); |
| 274 | b.append("Count"); |
| 275 | b.append("</displayName>"); |
| 276 | b.append("</column>"); |
| 277 | b.append("</columns>"); |
| 278 | |
| 279 | b.append("</query>"); |
| 280 | return b.toString(); |
| 281 | } |
| 282 | |
| Michael Hanl | 4dc6679 | 2014-02-14 10:47:58 +0000 | [diff] [blame] | 283 | @Deprecated |
| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 284 | public static Map addParameters(Map request, int page, int num, String cli, String cri, |
| Michael Hanl | 5c0c819 | 2013-12-17 10:16:27 +0000 | [diff] [blame] | 285 | int cls, int crs, boolean cutoff) { |
| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 286 | Map ctx = new LinkedHashMap(); |
| 287 | List left = new ArrayList(); |
| 288 | left.add(cli); |
| 289 | left.add(cls); |
| 290 | List right = new ArrayList(); |
| 291 | right.add(cri); |
| 292 | right.add(crs); |
| 293 | ctx.put("left", left); |
| 294 | ctx.put("right", right); |
| 295 | |
| 296 | request.put("startPage", page); |
| 297 | request.put("count", num); |
| 298 | request.put("context", ctx); |
| Michael Hanl | 5c0c819 | 2013-12-17 10:16:27 +0000 | [diff] [blame] | 299 | request.put("cutOff", cutoff); |
| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 300 | |
| 301 | return request; |
| 302 | } |
| Michael Hanl | 4dc6679 | 2014-02-14 10:47:58 +0000 | [diff] [blame] | 303 | |
| Joachim Bingel | 11d5b15 | 2014-02-11 21:33:47 +0000 | [diff] [blame] | 304 | public static void prepareContext(LinkedHashMap<String, Object> requestMap) { |
| Michael Hanl | 4dc6679 | 2014-02-14 10:47:58 +0000 | [diff] [blame] | 305 | LinkedHashMap<String, Object> context = new LinkedHashMap<String, Object>(); |
| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 306 | |
| Michael Hanl | 4dc6679 | 2014-02-14 10:47:58 +0000 | [diff] [blame] | 307 | LinkedHashMap<String, Object> classMap = new LinkedHashMap<String, Object>(); |
| 308 | LinkedHashMap<String, Object> operands = new LinkedHashMap<String, Object>(); |
| 309 | LinkedHashMap<String, Object> operation = new LinkedHashMap<String, Object>(); |
| 310 | LinkedHashMap<String, Object> frame = new LinkedHashMap<String, Object>(); |
| 311 | LinkedHashMap<String, Object> classRef = new LinkedHashMap<String, Object>(); |
| 312 | LinkedHashMap<String, Object> spanRef = new LinkedHashMap<String, Object>(); |
| 313 | LinkedHashMap<String, Object> classRefOp = new LinkedHashMap<String, Object>(); |
| 314 | LinkedHashMap<String, Object> min = new LinkedHashMap<String, Object>(); |
| 315 | LinkedHashMap<String, Object> max = new LinkedHashMap<String, Object>(); |
| 316 | LinkedHashMap<String, Object> exclude = new LinkedHashMap<String, Object>(); |
| 317 | LinkedHashMap<String, Object> distances = new LinkedHashMap<String, Object>(); |
| 318 | LinkedHashMap<String, Object> inOrder = new LinkedHashMap<String, Object>(); |
| Joachim Bingel | ffd65e3 | 2014-01-22 14:22:57 +0000 | [diff] [blame] | 319 | |
| Michael Hanl | 4dc6679 | 2014-02-14 10:47:58 +0000 | [diff] [blame] | 320 | operation.put("@id", "group:operation/"); |
| 321 | operation.put("@type", "@id"); |
| Joachim Bingel | ffd65e3 | 2014-01-22 14:22:57 +0000 | [diff] [blame] | 322 | |
| Michael Hanl | 4dc6679 | 2014-02-14 10:47:58 +0000 | [diff] [blame] | 323 | classMap.put("@id", "group:class"); |
| 324 | classMap.put("@type", "xsd:integer"); |
| 325 | |
| 326 | operands.put("@id", "group:operands"); |
| 327 | operands.put("@container", "@list"); |
| 328 | |
| 329 | frame.put("@id", "group:frame/"); |
| 330 | frame.put("@type", "@id"); |
| 331 | |
| 332 | classRef.put("@id", "group:classRef"); |
| 333 | classRef.put("@type", "xsd:integer"); |
| 334 | |
| 335 | spanRef.put("@id", "group:spanRef"); |
| 336 | spanRef.put("@type", "xsd:integer"); |
| 337 | |
| 338 | classRefOp.put("@id", "group:classRefOp"); |
| 339 | classRefOp.put("@type", "@id"); |
| 340 | |
| 341 | min.put("@id", "boundary:min"); |
| 342 | min.put("@type", "xsd:integer"); |
| 343 | |
| 344 | max.put("@id", "boundary:max"); |
| 345 | max.put("@type", "xsd:integer"); |
| 346 | |
| 347 | exclude.put("@id", "group:exclude"); |
| 348 | exclude.put("@type", "xsd:boolean"); |
| 349 | |
| 350 | distances.put("@id", "group:distances"); |
| 351 | distances.put("@container", "@list"); |
| 352 | |
| 353 | inOrder.put("@id", "group:inOrder"); |
| 354 | inOrder.put("@type", "xsd:boolean"); |
| 355 | |
| 356 | context.put("korap", "http://korap.ids-mannheim.de/ns/KorAP/json-ld/v0.1/"); |
| 357 | context.put("boundary", "korap:boundary/"); |
| 358 | context.put("group", "korap:group/"); |
| 359 | context.put("operation", operation); |
| 360 | context.put("class", classMap); |
| 361 | context.put("operands", operands); |
| 362 | context.put("frame", frame); |
| 363 | context.put("classRef", classRef); |
| 364 | context.put("spanRef", spanRef); |
| 365 | context.put("classRefOp", classRefOp); |
| 366 | context.put("min", min); |
| 367 | context.put("max", max); |
| 368 | context.put("exclude", exclude); |
| 369 | context.put("distances", distances); |
| 370 | context.put("inOrder", inOrder); |
| 371 | |
| 372 | requestMap.put("@context", context); |
| 373 | } |
| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 374 | |
| 375 | |
| Joachim Bingel | 8181263 | 2014-02-18 08:55:22 +0000 | [diff] [blame] | 376 | |
| Michael Hanl | 4fe41cc | 2013-12-10 17:59:51 +0000 | [diff] [blame] | 377 | } |