| Joachim Bingel | 43607ed | 2014-05-19 12:39:55 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.serialize; |
| 2 | |
| 3 | import de.ids_mannheim.korap.query.serialize.util.CollectionQueryParser; |
| 4 | import de.ids_mannheim.korap.query.serialize.util.CollectionQueryLexer; |
| 5 | import de.ids_mannheim.korap.util.QueryException; |
| 6 | |
| 7 | import java.lang.reflect.Method; |
| 8 | import java.util.*; |
| 9 | import java.util.regex.Matcher; |
| 10 | import java.util.regex.Pattern; |
| 11 | |
| 12 | import org.antlr.v4.runtime.ANTLRInputStream; |
| 13 | import org.antlr.v4.runtime.BailErrorStrategy; |
| 14 | import org.antlr.v4.runtime.CharStream; |
| 15 | import org.antlr.v4.runtime.CommonTokenStream; |
| 16 | import org.antlr.v4.runtime.Lexer; |
| 17 | import org.antlr.v4.runtime.Parser; |
| 18 | import org.antlr.v4.runtime.ParserRuleContext; |
| 19 | import org.antlr.v4.runtime.tree.ParseTree; |
| 20 | import org.slf4j.LoggerFactory; |
| 21 | |
| 22 | /** |
| 23 | * @author bingel |
| 24 | * @date 12/05/2014 |
| 25 | */ |
| 26 | public class ExpertFilter extends Antlr4AbstractSyntaxTree { |
| 27 | |
| 28 | private org.slf4j.Logger log = LoggerFactory |
| 29 | .getLogger(ExpertFilter.class); |
| 30 | |
| 31 | private Parser parser; |
| 32 | private boolean verbose = false; |
| 33 | private List<ParseTree> visited = new ArrayList<ParseTree>(); |
| 34 | /** |
| 35 | * Top-level map representing the whole request. |
| 36 | */ |
| 37 | LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>(); |
| 38 | /** |
| 39 | * Keeps track of active object. |
| 40 | */ |
| 41 | LinkedList<LinkedHashMap<String,Object>> objectStack = new LinkedList<LinkedHashMap<String,Object>>(); |
| 42 | /** |
| 43 | * Keeps track of open node categories |
| 44 | */ |
| 45 | LinkedList<String> openNodeCats = new LinkedList<String>(); |
| 46 | /** |
| 47 | * Keeps track of how many objects there are to pop after every recursion of {@link #processNode(ParseTree)} |
| 48 | */ |
| 49 | LinkedList<Integer> objectsToPop = new LinkedList<Integer>(); |
| 50 | Integer stackedObjects = 0; |
| 51 | |
| 52 | |
| 53 | public ExpertFilter() { |
| 54 | } |
| 55 | |
| 56 | @Override |
| 57 | public void process(String query) throws QueryException { |
| 58 | ParseTree tree = parseCollectionQuery(query); |
| 59 | if (this.parser != null) { |
| 60 | super.parser = this.parser; |
| 61 | } else { |
| 62 | throw new NullPointerException("Parser has not been instantiated!"); |
| 63 | } |
| 64 | |
| 65 | log.info("Processing collection query: "+query); |
| 66 | if (verbose) System.out.println(tree.toStringTree(parser)); |
| 67 | requestMap.put("@type", "korap:filter"); |
| 68 | processNode(tree); |
| 69 | log.info(requestMap.toString()); |
| 70 | } |
| 71 | |
| 72 | private void processNode(ParseTree node) { |
| 73 | // Top-down processing |
| 74 | String nodeCat = getNodeCat(node); |
| 75 | openNodeCats.push(nodeCat); |
| 76 | |
| 77 | stackedObjects = 0; |
| 78 | |
| 79 | if (verbose) { |
| 80 | System.err.println(" "+objectStack); |
| 81 | System.out.println(openNodeCats); |
| 82 | } |
| 83 | |
| 84 | /* |
| 85 | **************************************************************** |
| 86 | **************************************************************** |
| 87 | * Processing individual node categories * |
| 88 | **************************************************************** |
| 89 | **************************************************************** |
| 90 | */ |
| 91 | |
| 92 | if (nodeCat.equals("andGroup")) { |
| 93 | LinkedHashMap<String, Object> exprGroup = makeTermGroup("and"); |
| 94 | objectStack.push(exprGroup); |
| 95 | stackedObjects++; |
| 96 | putIntoSuperObject(exprGroup,1); |
| 97 | } |
| 98 | |
| 99 | if (nodeCat.equals("orGroup")) { |
| 100 | LinkedHashMap<String, Object> exprGroup = makeTermGroup("or"); |
| 101 | objectStack.push(exprGroup); |
| 102 | stackedObjects++; |
| 103 | putIntoSuperObject(exprGroup,1); |
| 104 | } |
| 105 | |
| 106 | if (nodeCat.equals("expr")) { |
| 107 | ParseTree fieldNode = getFirstChildWithCat(node, "field"); |
| 108 | String field = fieldNode.getChild(0).toStringTree(parser); |
| 109 | List<ParseTree> operatorNodes = getChildrenWithCat(node, "operator"); |
| 110 | List<ParseTree> valueNodes = getChildrenWithCat(node, "value"); |
| 111 | |
| 112 | if (valueNodes.size()==1) { |
| 113 | LinkedHashMap<String, Object> term = makeTerm(); |
| 114 | term.put("attribute", field); |
| 115 | term.putAll(parseValue(valueNodes.get(0))); |
| 116 | String match = operatorNodes.get(0).getChild(0).toStringTree(parser); |
| 117 | term.put("match", "match:"+interpretMatch(match)); |
| 118 | putIntoSuperObject(term); |
| 119 | } else { // (valueNodes.size()==2) |
| 120 | LinkedHashMap<String, Object> termGroup = makeTermGroup("and"); |
| 121 | @SuppressWarnings("unchecked") |
| 122 | ArrayList<Object> termGroupOperands = (ArrayList<Object>) termGroup.get("operands"); |
| 123 | |
| 124 | LinkedHashMap<String, Object> term1 = makeTerm(); |
| 125 | term1.put("attribute", field); |
| 126 | term1.putAll(parseValue(valueNodes.get(0))); |
| 127 | String match1 = operatorNodes.get(0).getChild(0).toStringTree(parser); |
| 128 | term1.put("match", "match:"+invertInequation(interpretMatch(match1))); |
| 129 | termGroupOperands.add(term1); |
| 130 | |
| 131 | LinkedHashMap<String, Object> term2 = makeTerm(); |
| 132 | term2.put("attribute", field); |
| 133 | term2.putAll(parseValue(valueNodes.get(1))); |
| 134 | String match2 = operatorNodes.get(1).getChild(0).toStringTree(parser); |
| 135 | term2.put("match", "match:"+interpretMatch(match2)); |
| 136 | termGroupOperands.add(term2); |
| 137 | |
| 138 | putIntoSuperObject(termGroup); |
| 139 | } |
| 140 | |
| 141 | } |
| 142 | |
| 143 | objectsToPop.push(stackedObjects); |
| 144 | |
| 145 | /* |
| 146 | **************************************************************** |
| 147 | **************************************************************** |
| 148 | * recursion until 'request' node (root of tree) is processed * |
| 149 | **************************************************************** |
| 150 | **************************************************************** |
| 151 | */ |
| 152 | for (int i=0; i<node.getChildCount(); i++) { |
| 153 | ParseTree child = node.getChild(i); |
| 154 | processNode(child); |
| 155 | } |
| 156 | |
| 157 | /* |
| 158 | ************************************************************** |
| 159 | * Stuff that happens after processing the children of a node * |
| 160 | ************************************************************** |
| 161 | */ |
| 162 | |
| 163 | if (!objectsToPop.isEmpty()) { |
| 164 | int toPop = objectsToPop.pop(); |
| 165 | for (int i=0; i<toPop; i++) { |
| 166 | objectStack.pop(); |
| 167 | } |
| 168 | } |
| 169 | openNodeCats.pop(); |
| 170 | |
| 171 | |
| 172 | } |
| 173 | |
| 174 | |
| 175 | |
| 176 | private LinkedHashMap<String, Object> parseValue(ParseTree node) { |
| 177 | LinkedHashMap<String, Object> map = new LinkedHashMap<String, Object>(); |
| 178 | String key = ""; |
| 179 | if (getNodeCat(node.getChild(0)).equals("regex")) { |
| 180 | key = node.getChild(0).getChild(0).toStringTree(parser); |
| 181 | key = key.substring(1,key.length()-1); //remove leading and trailing slashes |
| 182 | map.put("key", key); |
| 183 | map.put("type", "type:regex"); |
| 184 | } |
| 185 | else { |
| 186 | if (node.getChildCount() == 1) { |
| 187 | key = node.getChild(0).toStringTree(parser); |
| 188 | } else { |
| 189 | Pattern p = Pattern.compile("\" (.*) \""); |
| 190 | Matcher m = p.matcher(node.toStringTree(parser)); |
| 191 | if (m.find()) { |
| 192 | key = m.group(1); |
| 193 | } |
| 194 | } |
| 195 | map.put("key", key); |
| 196 | } |
| 197 | return map; |
| 198 | } |
| 199 | |
| 200 | private String interpretMatch(String match) { |
| 201 | String out = null; |
| 202 | if (match.equals("<")) { |
| 203 | out = "lt"; |
| 204 | } else if (match.equals(">")) { |
| 205 | out = "gt"; |
| 206 | } else if (match.equals("<=")) { |
| 207 | out = "leq"; |
| 208 | } else if (match.equals(">=")) { |
| 209 | out = "geq"; |
| 210 | } else if (match.equals("=")) { |
| 211 | out = "eq"; |
| 212 | } else if (match.equals("!=")) { |
| 213 | out = "ne"; |
| 214 | } |
| 215 | return out; |
| 216 | } |
| 217 | |
| 218 | private String invertInequation(String op) { |
| 219 | String inv = null; |
| 220 | if (op.equals("lt")) { |
| 221 | inv = "gt"; |
| 222 | } else if (op.equals("leq")) { |
| 223 | inv = "geq"; |
| 224 | } else if (op.equals("gt")) { |
| 225 | inv = "lt"; |
| 226 | } else if (op.equals("geq")) { |
| 227 | inv = "leq"; |
| 228 | } |
| 229 | return inv; |
| 230 | } |
| 231 | |
| 232 | private void putIntoSuperObject(LinkedHashMap<String, Object> object) { |
| 233 | putIntoSuperObject(object, 0); |
| 234 | } |
| 235 | |
| 236 | @SuppressWarnings({ "unchecked" }) |
| 237 | private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) { |
| 238 | if (objectStack.size()>objStackPosition) { |
| 239 | ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands"); |
| 240 | topObjectOperands.add(object); |
| 241 | |
| 242 | } else { |
| 243 | requestMap.put("filter", object); |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | private ParserRuleContext parseCollectionQuery (String p) throws QueryException { |
| 248 | Lexer collectionQueryLexer = new CollectionQueryLexer((CharStream)null); |
| 249 | ParserRuleContext tree = null; |
| 250 | // Like p. 111 |
| 251 | try { |
| 252 | |
| 253 | // Tokenize input data |
| 254 | ANTLRInputStream input = new ANTLRInputStream(p); |
| 255 | collectionQueryLexer.setInputStream(input); |
| 256 | CommonTokenStream tokens = new CommonTokenStream(collectionQueryLexer); |
| 257 | parser = new CollectionQueryParser(tokens); |
| 258 | |
| 259 | // Don't throw out erroneous stuff |
| 260 | parser.setErrorHandler(new BailErrorStrategy()); |
| 261 | parser.removeErrorListeners(); |
| 262 | // Get starting rule from parser |
| 263 | Method startRule = CollectionQueryParser.class.getMethod("start"); |
| 264 | tree = (ParserRuleContext) startRule.invoke(parser, (Object[])null); |
| 265 | |
| 266 | } |
| 267 | // Some things went wrong ... |
| 268 | catch (Exception e) { |
| 269 | System.err.println( e.getMessage() ); |
| 270 | log.error(e.getMessage()); |
| 271 | } |
| 272 | if (tree == null) { |
| 273 | log.error("Could not parse expert filter query. Make sure it is correct syntax."); |
| 274 | throw new QueryException("Could not parse expert filter query. Make sure it is correct syntax."); |
| 275 | } |
| 276 | // Return the generated tree |
| 277 | return tree; |
| 278 | } |
| 279 | |
| 280 | |
| 281 | @Override |
| 282 | public Map<String, Object> getRequestMap() { |
| 283 | return requestMap; |
| 284 | } |
| 285 | |
| 286 | |
| 287 | public static void main(String[] args) { |
| 288 | String query = "foo=bar&c=d"; |
| 289 | query = "(1990<year<2010&genre=Sport)|textClass=politk"; |
| 290 | query = "(textClass=wissenschaft & textClass=politik) | textClass=ausland"; |
| 291 | query = "1990<year<2010 oder genre=Sport"; |
| 292 | query = "title=\"Der Titel\""; |
| 293 | query = "(corpusID=A00 & corpusID=WPD) | textClass=wissenschaft "; |
| 294 | query = "(corpusID=A00 | corpusID=WPD) & (textClass=wissenschaft & textClass=politik)"; |
| 295 | // query = "corpusID=A00 & corpusID=WPD & textClass=wissenschaft"; |
| 296 | // query = "corpusID=A00 | corpusID=WPD"; |
| 297 | query = "(textClass=wissenschaft & textClass=politik) & (corpusID=A00 | corpusID=WPD)"; |
| 298 | query = "textClass=wissenschaft | (textClass=politik | corpusID=A00)"; |
| 299 | ExpertFilter filter = new ExpertFilter(); |
| 300 | filter.verbose = true; |
| 301 | try { |
| 302 | filter.process(query); |
| 303 | } catch (QueryException e) { |
| 304 | e.printStackTrace(); |
| 305 | } |
| 306 | System.out.println(filter.getRequestMap()); |
| 307 | |
| 308 | } |
| 309 | |
| 310 | |
| 311 | } |