blob: 8cb87c7295be700ec65c12dfa8019a4134d0efd9 [file] [log] [blame]
Joachim Bingel43607ed2014-05-19 12:39:55 +00001package de.ids_mannheim.korap.query.serialize;
2
3import de.ids_mannheim.korap.query.serialize.util.CollectionQueryParser;
4import de.ids_mannheim.korap.query.serialize.util.CollectionQueryLexer;
5import de.ids_mannheim.korap.util.QueryException;
6
7import java.lang.reflect.Method;
8import java.util.*;
9import java.util.regex.Matcher;
10import java.util.regex.Pattern;
11
12import org.antlr.v4.runtime.ANTLRInputStream;
13import org.antlr.v4.runtime.BailErrorStrategy;
14import org.antlr.v4.runtime.CharStream;
15import org.antlr.v4.runtime.CommonTokenStream;
16import org.antlr.v4.runtime.Lexer;
17import org.antlr.v4.runtime.Parser;
18import org.antlr.v4.runtime.ParserRuleContext;
19import org.antlr.v4.runtime.tree.ParseTree;
20import org.slf4j.LoggerFactory;
21
22/**
23 * @author bingel
24 * @date 12/05/2014
25 */
26public class ExpertFilter extends Antlr4AbstractSyntaxTree {
27
28 private org.slf4j.Logger log = LoggerFactory
29 .getLogger(ExpertFilter.class);
30
31 private Parser parser;
32 private boolean verbose = false;
33 private List<ParseTree> visited = new ArrayList<ParseTree>();
34 /**
35 * Top-level map representing the whole request.
36 */
37 LinkedHashMap<String,Object> requestMap = new LinkedHashMap<String,Object>();
38 /**
39 * Keeps track of active object.
40 */
41 LinkedList<LinkedHashMap<String,Object>> objectStack = new LinkedList<LinkedHashMap<String,Object>>();
42 /**
43 * Keeps track of open node categories
44 */
45 LinkedList<String> openNodeCats = new LinkedList<String>();
46 /**
47 * Keeps track of how many objects there are to pop after every recursion of {@link #processNode(ParseTree)}
48 */
49 LinkedList<Integer> objectsToPop = new LinkedList<Integer>();
50 Integer stackedObjects = 0;
51
52
53 public ExpertFilter() {
54 }
55
56 @Override
57 public void process(String query) throws QueryException {
58 ParseTree tree = parseCollectionQuery(query);
59 if (this.parser != null) {
60 super.parser = this.parser;
61 } else {
62 throw new NullPointerException("Parser has not been instantiated!");
63 }
64
65 log.info("Processing collection query: "+query);
66 if (verbose) System.out.println(tree.toStringTree(parser));
67 requestMap.put("@type", "korap:filter");
68 processNode(tree);
69 log.info(requestMap.toString());
70 }
71
72 private void processNode(ParseTree node) {
73 // Top-down processing
74 String nodeCat = getNodeCat(node);
75 openNodeCats.push(nodeCat);
76
77 stackedObjects = 0;
78
79 if (verbose) {
80 System.err.println(" "+objectStack);
81 System.out.println(openNodeCats);
82 }
83
84 /*
85 ****************************************************************
86 ****************************************************************
87 * Processing individual node categories *
88 ****************************************************************
89 ****************************************************************
90 */
91
92 if (nodeCat.equals("andGroup")) {
93 LinkedHashMap<String, Object> exprGroup = makeTermGroup("and");
94 objectStack.push(exprGroup);
95 stackedObjects++;
96 putIntoSuperObject(exprGroup,1);
97 }
98
99 if (nodeCat.equals("orGroup")) {
100 LinkedHashMap<String, Object> exprGroup = makeTermGroup("or");
101 objectStack.push(exprGroup);
102 stackedObjects++;
103 putIntoSuperObject(exprGroup,1);
104 }
105
106 if (nodeCat.equals("expr")) {
107 ParseTree fieldNode = getFirstChildWithCat(node, "field");
108 String field = fieldNode.getChild(0).toStringTree(parser);
109 List<ParseTree> operatorNodes = getChildrenWithCat(node, "operator");
110 List<ParseTree> valueNodes = getChildrenWithCat(node, "value");
111
112 if (valueNodes.size()==1) {
113 LinkedHashMap<String, Object> term = makeTerm();
114 term.put("attribute", field);
115 term.putAll(parseValue(valueNodes.get(0)));
116 String match = operatorNodes.get(0).getChild(0).toStringTree(parser);
117 term.put("match", "match:"+interpretMatch(match));
118 putIntoSuperObject(term);
119 } else { // (valueNodes.size()==2)
120 LinkedHashMap<String, Object> termGroup = makeTermGroup("and");
121 @SuppressWarnings("unchecked")
122 ArrayList<Object> termGroupOperands = (ArrayList<Object>) termGroup.get("operands");
123
124 LinkedHashMap<String, Object> term1 = makeTerm();
125 term1.put("attribute", field);
126 term1.putAll(parseValue(valueNodes.get(0)));
127 String match1 = operatorNodes.get(0).getChild(0).toStringTree(parser);
128 term1.put("match", "match:"+invertInequation(interpretMatch(match1)));
129 termGroupOperands.add(term1);
130
131 LinkedHashMap<String, Object> term2 = makeTerm();
132 term2.put("attribute", field);
133 term2.putAll(parseValue(valueNodes.get(1)));
134 String match2 = operatorNodes.get(1).getChild(0).toStringTree(parser);
135 term2.put("match", "match:"+interpretMatch(match2));
136 termGroupOperands.add(term2);
137
138 putIntoSuperObject(termGroup);
139 }
140
141 }
142
143 objectsToPop.push(stackedObjects);
144
145 /*
146 ****************************************************************
147 ****************************************************************
148 * recursion until 'request' node (root of tree) is processed *
149 ****************************************************************
150 ****************************************************************
151 */
152 for (int i=0; i<node.getChildCount(); i++) {
153 ParseTree child = node.getChild(i);
154 processNode(child);
155 }
156
157 /*
158 **************************************************************
159 * Stuff that happens after processing the children of a node *
160 **************************************************************
161 */
162
163 if (!objectsToPop.isEmpty()) {
164 int toPop = objectsToPop.pop();
165 for (int i=0; i<toPop; i++) {
166 objectStack.pop();
167 }
168 }
169 openNodeCats.pop();
170
171
172 }
173
174
175
176 private LinkedHashMap<String, Object> parseValue(ParseTree node) {
177 LinkedHashMap<String, Object> map = new LinkedHashMap<String, Object>();
178 String key = "";
179 if (getNodeCat(node.getChild(0)).equals("regex")) {
180 key = node.getChild(0).getChild(0).toStringTree(parser);
181 key = key.substring(1,key.length()-1); //remove leading and trailing slashes
182 map.put("key", key);
183 map.put("type", "type:regex");
184 }
185 else {
186 if (node.getChildCount() == 1) {
187 key = node.getChild(0).toStringTree(parser);
188 } else {
189 Pattern p = Pattern.compile("\" (.*) \"");
190 Matcher m = p.matcher(node.toStringTree(parser));
191 if (m.find()) {
192 key = m.group(1);
193 }
194 }
195 map.put("key", key);
196 }
197 return map;
198 }
199
200 private String interpretMatch(String match) {
201 String out = null;
202 if (match.equals("<")) {
203 out = "lt";
204 } else if (match.equals(">")) {
205 out = "gt";
206 } else if (match.equals("<=")) {
207 out = "leq";
208 } else if (match.equals(">=")) {
209 out = "geq";
210 } else if (match.equals("=")) {
211 out = "eq";
212 } else if (match.equals("!=")) {
213 out = "ne";
214 }
215 return out;
216 }
217
218 private String invertInequation(String op) {
219 String inv = null;
220 if (op.equals("lt")) {
221 inv = "gt";
222 } else if (op.equals("leq")) {
223 inv = "geq";
224 } else if (op.equals("gt")) {
225 inv = "lt";
226 } else if (op.equals("geq")) {
227 inv = "leq";
228 }
229 return inv;
230 }
231
232 private void putIntoSuperObject(LinkedHashMap<String, Object> object) {
233 putIntoSuperObject(object, 0);
234 }
235
236 @SuppressWarnings({ "unchecked" })
237 private void putIntoSuperObject(LinkedHashMap<String, Object> object, int objStackPosition) {
238 if (objectStack.size()>objStackPosition) {
239 ArrayList<Object> topObjectOperands = (ArrayList<Object>) objectStack.get(objStackPosition).get("operands");
240 topObjectOperands.add(object);
241
242 } else {
243 requestMap.put("filter", object);
244 }
245 }
246
247 private ParserRuleContext parseCollectionQuery (String p) throws QueryException {
248 Lexer collectionQueryLexer = new CollectionQueryLexer((CharStream)null);
249 ParserRuleContext tree = null;
250 // Like p. 111
251 try {
252
253 // Tokenize input data
254 ANTLRInputStream input = new ANTLRInputStream(p);
255 collectionQueryLexer.setInputStream(input);
256 CommonTokenStream tokens = new CommonTokenStream(collectionQueryLexer);
257 parser = new CollectionQueryParser(tokens);
258
259 // Don't throw out erroneous stuff
260 parser.setErrorHandler(new BailErrorStrategy());
261 parser.removeErrorListeners();
262 // Get starting rule from parser
263 Method startRule = CollectionQueryParser.class.getMethod("start");
264 tree = (ParserRuleContext) startRule.invoke(parser, (Object[])null);
265
266 }
267 // Some things went wrong ...
268 catch (Exception e) {
269 System.err.println( e.getMessage() );
270 log.error(e.getMessage());
271 }
272 if (tree == null) {
273 log.error("Could not parse expert filter query. Make sure it is correct syntax.");
274 throw new QueryException("Could not parse expert filter query. Make sure it is correct syntax.");
275 }
276 // Return the generated tree
277 return tree;
278 }
279
280
281 @Override
282 public Map<String, Object> getRequestMap() {
283 return requestMap;
284 }
285
286
287 public static void main(String[] args) {
288 String query = "foo=bar&c=d";
289 query = "(1990<year<2010&genre=Sport)|textClass=politk";
290 query = "(textClass=wissenschaft & textClass=politik) | textClass=ausland";
291 query = "1990<year<2010 oder genre=Sport";
292 query = "title=\"Der Titel\"";
293 query = "(corpusID=A00 & corpusID=WPD) | textClass=wissenschaft ";
294 query = "(corpusID=A00 | corpusID=WPD) & (textClass=wissenschaft & textClass=politik)";
295// query = "corpusID=A00 & corpusID=WPD & textClass=wissenschaft";
296// query = "corpusID=A00 | corpusID=WPD";
297 query = "(textClass=wissenschaft & textClass=politik) & (corpusID=A00 | corpusID=WPD)";
298 query = "textClass=wissenschaft | (textClass=politik | corpusID=A00)";
299 ExpertFilter filter = new ExpertFilter();
300 filter.verbose = true;
301 try {
302 filter.process(query);
303 } catch (QueryException e) {
304 e.printStackTrace();
305 }
306 System.out.println(filter.getRequestMap());
307
308 }
309
310
311}