blob: a447ccd8b6fa0c2955b8b4b5c4a542b934d9f807 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap.filter;
2
3import java.util.*;
4
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00005import org.apache.lucene.index.Term;
6
Nils Diewaldf399a672013-11-18 17:55:22 +00007import org.apache.lucene.search.BooleanClause;
8import org.apache.lucene.search.BooleanQuery;
9import org.apache.lucene.search.Query;
10import org.apache.lucene.search.TermQuery;
11import org.apache.lucene.search.RegexpQuery;
Nils Diewaldbaf68c52013-11-20 13:22:19 +000012import org.apache.lucene.search.NumericRangeQuery;
13
14import de.ids_mannheim.korap.util.KorapDate;
15import de.ids_mannheim.korap.filter.RegexFilter;
16import de.ids_mannheim.korap.KorapFilter;
17
Nils Diewald01b4ce32013-12-05 22:39:25 +000018import com.fasterxml.jackson.databind.ObjectMapper;
19import com.fasterxml.jackson.databind.JsonNode;
20
Nils Diewaldb1c3b652013-12-28 22:47:00 +000021import de.ids_mannheim.korap.util.QueryException;
22
Nils Diewaldbaf68c52013-11-20 13:22:19 +000023import org.slf4j.Logger;
24import org.slf4j.LoggerFactory;
25
Nils Diewaldf399a672013-11-18 17:55:22 +000026
27/*
28 Todo: !not
Nils Diewald01b4ce32013-12-05 22:39:25 +000029
30THE JSON STUFF DEFINITIVELY BELONGS INTO KORAPFILTER
31
Nils Diewaldf399a672013-11-18 17:55:22 +000032*/
33
34/**
Nils Diewaldbaf68c52013-11-20 13:22:19 +000035 * @author Nils Diewald
Nils Diewaldf399a672013-11-18 17:55:22 +000036 *
37 * BooleanFilter implements a simple API for boolean operations
38 * on constraints for KorapFilter.
39 */
40public class BooleanFilter {
41 private String type;
Nils Diewaldf399a672013-11-18 17:55:22 +000042
Nils Diewaldbaf68c52013-11-20 13:22:19 +000043 // Logger
Nils Diewaldb1c3b652013-12-28 22:47:00 +000044 private final static Logger log = LoggerFactory.getLogger(KorapFilter.class);
Nils Diewaldbaf68c52013-11-20 13:22:19 +000045
46 private BooleanQuery bool;
Nils Diewaldb1c3b652013-12-28 22:47:00 +000047 private String error;
Nils Diewaldbaf68c52013-11-20 13:22:19 +000048
49 public BooleanFilter () {
50 bool = new BooleanQuery();
Nils Diewaldf399a672013-11-18 17:55:22 +000051 };
52
Nils Diewaldb1c3b652013-12-28 22:47:00 +000053 public BooleanFilter (JsonNode json) throws QueryException {
Nils Diewald01b4ce32013-12-05 22:39:25 +000054 bool = new BooleanQuery();
Nils Diewaldb1c3b652013-12-28 22:47:00 +000055 this.fromJSON(json, "tokens");
56 /*
Nils Diewald01b4ce32013-12-05 22:39:25 +000057 String type = json.get("@type").asText();
58 String field = _getField(json);
59
60 if (type.equals("korap:term")) {
61 this.fromJSON(json, field);
62 }
63 else if (type.equals("korap:group")) {
64 // TODO: relation
65 for (JsonNode operand : json.get("operands")) {
66 this.fromJSON(operand, field);
67 };
68 };
Nils Diewaldb1c3b652013-12-28 22:47:00 +000069 */
Nils Diewald01b4ce32013-12-05 22:39:25 +000070 };
71
72
Nils Diewaldb1c3b652013-12-28 22:47:00 +000073 private void fromJSON (JsonNode json, String field) throws QueryException {
Nils Diewald01b4ce32013-12-05 22:39:25 +000074 String type = json.get("@type").asText();
75
Nils Diewaldb1c3b652013-12-28 22:47:00 +000076 log.trace("@type: " + type);
77
Nils Diewald01b4ce32013-12-05 22:39:25 +000078 if (json.has("@field"))
79 field = _getField(json);
80
81 if (type.equals("korap:term")) {
82 if (field != null && json.has("@value"))
83 this.and(field, json.get("@value").asText());
84 return;
85 }
86 else if (type.equals("korap:group")) {
87 if (!json.has("relation"))
88 return;
89
90 String date, till;
91
Nils Diewaldb1c3b652013-12-28 22:47:00 +000092 log.trace("relation: " + json.get("relation").asText());
93
Nils Diewald01b4ce32013-12-05 22:39:25 +000094 switch (json.get("relation").asText()) {
Nils Diewaldb1c3b652013-12-28 22:47:00 +000095
Nils Diewald01b4ce32013-12-05 22:39:25 +000096 case "between":
97 date = _getDate(json, 0);
98 till = _getDate(json, 1);
99 if (date != null && till != null)
100 this.between(date, till);
101 break;
102
103 case "until":
104 date = _getDate(json, 0);
105 if (date != null)
106 this.till(date);
107 break;
108
109 case "since":
110 date = _getDate(json, 0);
111 if (date != null)
112 this.since(date);
113 break;
114
115 case "equals":
116 date = _getDate(json, 0);
117 if (date != null)
118 this.date(date);
119 break;
Nils Diewaldb1c3b652013-12-28 22:47:00 +0000120
121 case "and":
122 if (!json.has("operands"))
123 return;
124
125 for (JsonNode operand : json.get("operands")) {
126 this.fromJSON(operand, field);
127 };
128 break;
129
130 default:
131 throw new QueryException(json.get("relation").asText() + " is not a supported relation");
Nils Diewald01b4ce32013-12-05 22:39:25 +0000132 };
133 }
Nils Diewaldb1c3b652013-12-28 22:47:00 +0000134 else {
135 throw new QueryException(type + " is not a supported group");
136 };
Nils Diewald01b4ce32013-12-05 22:39:25 +0000137 };
138
139 private static String _getField (JsonNode json) {
140 if (!json.has("@field"))
141 return (String) null;
142
143 String field = json.get("@field").asText();
144 return field.replaceFirst("korap:field#", "");
145 };
146
147 private static String _getDate (JsonNode json, int index) {
148 if (!json.has("operands"))
149 return (String) null;
150
151 if (!json.get("operands").has(index))
152 return (String) null;
153
154 JsonNode date = json.get("operands").get(index);
155 if (!date.get("@type").asText().equals("korap:date"))
156 return (String) null;
157
158 if (!date.has("@value"))
159 return (String) null;
160
161 return date.get("@value").asText();
162 };
163
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000164 public BooleanFilter or (String type, String ... terms) {
165 for (String term : terms) {
166 bool.add(
167 new TermQuery(new Term(type, term)),
168 BooleanClause.Occur.SHOULD
169 );
Nils Diewaldf399a672013-11-18 17:55:22 +0000170 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000171 return this;
172 };
173
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000174 public BooleanFilter or (String type, RegexFilter value) {
175 bool.add(
176 value.toQuery(type),
177 BooleanClause.Occur.SHOULD
178 );
Nils Diewaldf399a672013-11-18 17:55:22 +0000179 return this;
180 };
181
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000182 public BooleanFilter or (BooleanFilter bf) {
183 bool.add(
184 bf.toQuery(),
185 BooleanClause.Occur.SHOULD
186 );
Nils Diewaldf399a672013-11-18 17:55:22 +0000187 return this;
188 };
189
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000190 public BooleanFilter or (NumericRangeQuery<Integer> nrq) {
191 bool.add(nrq, BooleanClause.Occur.SHOULD);
Nils Diewaldf399a672013-11-18 17:55:22 +0000192 return this;
193 };
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000194
195 public BooleanFilter and (String type, String ... terms) {
196 for (String term : terms) {
197 bool.add(
198 new TermQuery(new Term(type, term)),
199 BooleanClause.Occur.MUST
200 );
201 };
202 return this;
203 };
204
205 public BooleanFilter and (String type, RegexFilter value) {
206 bool.add(
207 value.toQuery(type),
208 BooleanClause.Occur.MUST
209 );
210 return this;
211 };
212
213 public BooleanFilter and (BooleanFilter bf) {
214 bool.add(
215 bf.toQuery(),
216 BooleanClause.Occur.MUST
217 );
218 return this;
219 };
220
221 public BooleanFilter since (String date) {
222 int since = new KorapDate(date).floor();
223
224 if (since == 0 || since == KorapDate.BEGINNING)
225 return this;
226
227 bool.add(
228 NumericRangeQuery.newIntRange(
229 "pubDate",
230 since,
231 KorapDate.END,
232 true,
233 true
234 ),
235 BooleanClause.Occur.MUST
236 );
237
238 return this;
239 };
240
241
242 public BooleanFilter till (String date) {
243 try {
244 int till = new KorapDate(date).ceil();
245 if (till == 0 || till == KorapDate.END)
246 return this;
247
248 bool.add(
249 NumericRangeQuery.newIntRange(
250 "pubDate",
251 KorapDate.BEGINNING,
252 till,
253 true,
254 true
255 ),
256 BooleanClause.Occur.MUST
257 );
258 }
259 catch (NumberFormatException e) {
Nils Diewaldb1c3b652013-12-28 22:47:00 +0000260 log.warn("Parameter of till(date) is invalid");
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000261 };
262 return this;
263 };
264
265
266 public BooleanFilter between (String beginStr, String endStr) {
267 KorapDate beginDF = new KorapDate(beginStr);
268
269 int begin = beginDF.floor();
270
271 int end = new KorapDate(endStr).ceil();
272
273 if (end == 0)
274 return this;
275
276 if (begin == KorapDate.BEGINNING && end == KorapDate.END)
277 return this;
278
279 if (begin == end) {
280 this.and("pubDate", beginDF.toString());
281 return this;
282 };
283
284 this.bool.add(
285 NumericRangeQuery.newIntRange(
286 "pubDate",
287 begin,
288 end,
289 true,
290 true
291 ),
292 BooleanClause.Occur.MUST
293 );
294 return this;
295 };
296
297
298 public BooleanFilter date (String date) {
299 KorapDate dateDF = new KorapDate(date);
300
301 if (dateDF.year() == 0)
302 return this;
303
304 if (dateDF.day() == 0 || dateDF.month() == 0) {
305 int begin = dateDF.floor();
306 int end = dateDF.ceil();
307
308 if (end == 0 || (begin == KorapDate.BEGINNING && end == KorapDate.END))
309 return this;
310
311 this.bool.add(
312 NumericRangeQuery.newIntRange(
313 "pubDate",
314 begin,
315 end,
316 true,
317 true
318 ),
319 BooleanClause.Occur.MUST
320 );
321 return this;
322 };
323
324 this.and("pubDate", dateDF.toString());
325 return this;
326 };
327
Nils Diewaldf399a672013-11-18 17:55:22 +0000328
329 public Query toQuery () {
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000330 return this.bool;
Nils Diewaldf399a672013-11-18 17:55:22 +0000331 };
332
333 public String toString () {
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000334 return this.bool.toString();
Nils Diewaldf399a672013-11-18 17:55:22 +0000335 };
336};