| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.filter; |
| 2 | |
| 3 | import java.util.*; |
| 4 | |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 5 | import org.apache.lucene.index.Term; |
| 6 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 7 | import org.apache.lucene.search.BooleanClause; |
| 8 | import org.apache.lucene.search.BooleanQuery; |
| 9 | import org.apache.lucene.search.Query; |
| 10 | import org.apache.lucene.search.TermQuery; |
| 11 | import org.apache.lucene.search.RegexpQuery; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 12 | import org.apache.lucene.search.NumericRangeQuery; |
| 13 | |
| 14 | import de.ids_mannheim.korap.util.KorapDate; |
| 15 | import de.ids_mannheim.korap.filter.RegexFilter; |
| 16 | import de.ids_mannheim.korap.KorapFilter; |
| 17 | |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 18 | import com.fasterxml.jackson.databind.ObjectMapper; |
| 19 | import com.fasterxml.jackson.databind.JsonNode; |
| 20 | |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 21 | import de.ids_mannheim.korap.util.QueryException; |
| 22 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 23 | import org.slf4j.Logger; |
| 24 | import org.slf4j.LoggerFactory; |
| 25 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 26 | |
| 27 | /* |
| 28 | Todo: !not |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 29 | |
| 30 | THE JSON STUFF DEFINITIVELY BELONGS INTO KORAPFILTER |
| 31 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 32 | */ |
| 33 | |
| 34 | /** |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 35 | * @author Nils Diewald |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 36 | * |
| 37 | * BooleanFilter implements a simple API for boolean operations |
| 38 | * on constraints for KorapFilter. |
| 39 | */ |
| 40 | public class BooleanFilter { |
| 41 | private String type; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 42 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 43 | // Logger |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 44 | private final static Logger log = LoggerFactory.getLogger(KorapFilter.class); |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 45 | |
| 46 | private BooleanQuery bool; |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 47 | private String error; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 48 | |
| 49 | public BooleanFilter () { |
| 50 | bool = new BooleanQuery(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 51 | }; |
| 52 | |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 53 | public BooleanFilter (JsonNode json) throws QueryException { |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 54 | bool = new BooleanQuery(); |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 55 | this.fromJSON(json, "tokens"); |
| 56 | /* |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 57 | String type = json.get("@type").asText(); |
| 58 | String field = _getField(json); |
| 59 | |
| 60 | if (type.equals("korap:term")) { |
| 61 | this.fromJSON(json, field); |
| 62 | } |
| 63 | else if (type.equals("korap:group")) { |
| 64 | // TODO: relation |
| 65 | for (JsonNode operand : json.get("operands")) { |
| 66 | this.fromJSON(operand, field); |
| 67 | }; |
| 68 | }; |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 69 | */ |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 70 | }; |
| 71 | |
| 72 | |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 73 | private void fromJSON (JsonNode json, String field) throws QueryException { |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 74 | String type = json.get("@type").asText(); |
| 75 | |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 76 | log.trace("@type: " + type); |
| 77 | |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 78 | if (json.has("@field")) |
| 79 | field = _getField(json); |
| 80 | |
| 81 | if (type.equals("korap:term")) { |
| 82 | if (field != null && json.has("@value")) |
| 83 | this.and(field, json.get("@value").asText()); |
| 84 | return; |
| 85 | } |
| 86 | else if (type.equals("korap:group")) { |
| 87 | if (!json.has("relation")) |
| 88 | return; |
| 89 | |
| 90 | String date, till; |
| 91 | |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 92 | log.trace("relation: " + json.get("relation").asText()); |
| 93 | |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 94 | switch (json.get("relation").asText()) { |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 95 | |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 96 | case "between": |
| 97 | date = _getDate(json, 0); |
| 98 | till = _getDate(json, 1); |
| 99 | if (date != null && till != null) |
| 100 | this.between(date, till); |
| 101 | break; |
| 102 | |
| 103 | case "until": |
| 104 | date = _getDate(json, 0); |
| 105 | if (date != null) |
| 106 | this.till(date); |
| 107 | break; |
| 108 | |
| 109 | case "since": |
| 110 | date = _getDate(json, 0); |
| 111 | if (date != null) |
| 112 | this.since(date); |
| 113 | break; |
| 114 | |
| 115 | case "equals": |
| 116 | date = _getDate(json, 0); |
| 117 | if (date != null) |
| 118 | this.date(date); |
| 119 | break; |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 120 | |
| 121 | case "and": |
| 122 | if (!json.has("operands")) |
| 123 | return; |
| 124 | |
| 125 | for (JsonNode operand : json.get("operands")) { |
| 126 | this.fromJSON(operand, field); |
| 127 | }; |
| 128 | break; |
| 129 | |
| 130 | default: |
| 131 | throw new QueryException(json.get("relation").asText() + " is not a supported relation"); |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 132 | }; |
| 133 | } |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 134 | else { |
| 135 | throw new QueryException(type + " is not a supported group"); |
| 136 | }; |
| Nils Diewald | 01b4ce3 | 2013-12-05 22:39:25 +0000 | [diff] [blame] | 137 | }; |
| 138 | |
| 139 | private static String _getField (JsonNode json) { |
| 140 | if (!json.has("@field")) |
| 141 | return (String) null; |
| 142 | |
| 143 | String field = json.get("@field").asText(); |
| 144 | return field.replaceFirst("korap:field#", ""); |
| 145 | }; |
| 146 | |
| 147 | private static String _getDate (JsonNode json, int index) { |
| 148 | if (!json.has("operands")) |
| 149 | return (String) null; |
| 150 | |
| 151 | if (!json.get("operands").has(index)) |
| 152 | return (String) null; |
| 153 | |
| 154 | JsonNode date = json.get("operands").get(index); |
| 155 | if (!date.get("@type").asText().equals("korap:date")) |
| 156 | return (String) null; |
| 157 | |
| 158 | if (!date.has("@value")) |
| 159 | return (String) null; |
| 160 | |
| 161 | return date.get("@value").asText(); |
| 162 | }; |
| 163 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 164 | public BooleanFilter or (String type, String ... terms) { |
| 165 | for (String term : terms) { |
| 166 | bool.add( |
| 167 | new TermQuery(new Term(type, term)), |
| 168 | BooleanClause.Occur.SHOULD |
| 169 | ); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 170 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 171 | return this; |
| 172 | }; |
| 173 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 174 | public BooleanFilter or (String type, RegexFilter value) { |
| 175 | bool.add( |
| 176 | value.toQuery(type), |
| 177 | BooleanClause.Occur.SHOULD |
| 178 | ); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 179 | return this; |
| 180 | }; |
| 181 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 182 | public BooleanFilter or (BooleanFilter bf) { |
| 183 | bool.add( |
| 184 | bf.toQuery(), |
| 185 | BooleanClause.Occur.SHOULD |
| 186 | ); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 187 | return this; |
| 188 | }; |
| 189 | |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 190 | public BooleanFilter or (NumericRangeQuery<Integer> nrq) { |
| 191 | bool.add(nrq, BooleanClause.Occur.SHOULD); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 192 | return this; |
| 193 | }; |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 194 | |
| 195 | public BooleanFilter and (String type, String ... terms) { |
| 196 | for (String term : terms) { |
| 197 | bool.add( |
| 198 | new TermQuery(new Term(type, term)), |
| 199 | BooleanClause.Occur.MUST |
| 200 | ); |
| 201 | }; |
| 202 | return this; |
| 203 | }; |
| 204 | |
| 205 | public BooleanFilter and (String type, RegexFilter value) { |
| 206 | bool.add( |
| 207 | value.toQuery(type), |
| 208 | BooleanClause.Occur.MUST |
| 209 | ); |
| 210 | return this; |
| 211 | }; |
| 212 | |
| 213 | public BooleanFilter and (BooleanFilter bf) { |
| 214 | bool.add( |
| 215 | bf.toQuery(), |
| 216 | BooleanClause.Occur.MUST |
| 217 | ); |
| 218 | return this; |
| 219 | }; |
| 220 | |
| 221 | public BooleanFilter since (String date) { |
| 222 | int since = new KorapDate(date).floor(); |
| 223 | |
| 224 | if (since == 0 || since == KorapDate.BEGINNING) |
| 225 | return this; |
| 226 | |
| 227 | bool.add( |
| 228 | NumericRangeQuery.newIntRange( |
| 229 | "pubDate", |
| 230 | since, |
| 231 | KorapDate.END, |
| 232 | true, |
| 233 | true |
| 234 | ), |
| 235 | BooleanClause.Occur.MUST |
| 236 | ); |
| 237 | |
| 238 | return this; |
| 239 | }; |
| 240 | |
| 241 | |
| 242 | public BooleanFilter till (String date) { |
| 243 | try { |
| 244 | int till = new KorapDate(date).ceil(); |
| 245 | if (till == 0 || till == KorapDate.END) |
| 246 | return this; |
| 247 | |
| 248 | bool.add( |
| 249 | NumericRangeQuery.newIntRange( |
| 250 | "pubDate", |
| 251 | KorapDate.BEGINNING, |
| 252 | till, |
| 253 | true, |
| 254 | true |
| 255 | ), |
| 256 | BooleanClause.Occur.MUST |
| 257 | ); |
| 258 | } |
| 259 | catch (NumberFormatException e) { |
| Nils Diewald | b1c3b65 | 2013-12-28 22:47:00 +0000 | [diff] [blame] | 260 | log.warn("Parameter of till(date) is invalid"); |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 261 | }; |
| 262 | return this; |
| 263 | }; |
| 264 | |
| 265 | |
| 266 | public BooleanFilter between (String beginStr, String endStr) { |
| 267 | KorapDate beginDF = new KorapDate(beginStr); |
| 268 | |
| 269 | int begin = beginDF.floor(); |
| 270 | |
| 271 | int end = new KorapDate(endStr).ceil(); |
| 272 | |
| 273 | if (end == 0) |
| 274 | return this; |
| 275 | |
| 276 | if (begin == KorapDate.BEGINNING && end == KorapDate.END) |
| 277 | return this; |
| 278 | |
| 279 | if (begin == end) { |
| 280 | this.and("pubDate", beginDF.toString()); |
| 281 | return this; |
| 282 | }; |
| 283 | |
| 284 | this.bool.add( |
| 285 | NumericRangeQuery.newIntRange( |
| 286 | "pubDate", |
| 287 | begin, |
| 288 | end, |
| 289 | true, |
| 290 | true |
| 291 | ), |
| 292 | BooleanClause.Occur.MUST |
| 293 | ); |
| 294 | return this; |
| 295 | }; |
| 296 | |
| 297 | |
| 298 | public BooleanFilter date (String date) { |
| 299 | KorapDate dateDF = new KorapDate(date); |
| 300 | |
| 301 | if (dateDF.year() == 0) |
| 302 | return this; |
| 303 | |
| 304 | if (dateDF.day() == 0 || dateDF.month() == 0) { |
| 305 | int begin = dateDF.floor(); |
| 306 | int end = dateDF.ceil(); |
| 307 | |
| 308 | if (end == 0 || (begin == KorapDate.BEGINNING && end == KorapDate.END)) |
| 309 | return this; |
| 310 | |
| 311 | this.bool.add( |
| 312 | NumericRangeQuery.newIntRange( |
| 313 | "pubDate", |
| 314 | begin, |
| 315 | end, |
| 316 | true, |
| 317 | true |
| 318 | ), |
| 319 | BooleanClause.Occur.MUST |
| 320 | ); |
| 321 | return this; |
| 322 | }; |
| 323 | |
| 324 | this.and("pubDate", dateDF.toString()); |
| 325 | return this; |
| 326 | }; |
| 327 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 328 | |
| 329 | public Query toQuery () { |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 330 | return this.bool; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 331 | }; |
| 332 | |
| 333 | public String toString () { |
| Nils Diewald | baf68c5 | 2013-11-20 13:22:19 +0000 | [diff] [blame] | 334 | return this.bool.toString(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 335 | }; |
| 336 | }; |