blob: 350d85253fef36914d88fca6d09eea797b06321c [file] [log] [blame]
Nils Diewaldea969502015-02-16 21:10:54 +00001package de.ids_mannheim.korap.collection;
Nils Diewaldf399a672013-11-18 17:55:22 +00002
Akron176c9b12015-07-29 19:53:40 +02003import java.util.*;
4import java.io.IOException;
Akron176c9b12015-07-29 19:53:40 +02005
6import org.apache.lucene.index.Term;
7import org.apache.lucene.queries.TermsFilter;
8import org.apache.lucene.search.*;
9import org.apache.lucene.search.NumericRangeFilter;
Nils Diewaldc383ed02015-02-26 21:35:22 +000010import de.ids_mannheim.korap.util.KrillDate;
Nils Diewaldfb4d7b02014-04-09 17:56:17 +000011
Nils Diewaldf399a672013-11-18 17:55:22 +000012import org.slf4j.Logger;
13import org.slf4j.LoggerFactory;
Nils Diewaldf399a672013-11-18 17:55:22 +000014
Akron176c9b12015-07-29 19:53:40 +020015import de.ids_mannheim.korap.KrillCollection;
16import de.ids_mannheim.korap.collection.BooleanGroupFilter;
17
Akronaa74ec62015-07-31 17:22:55 +020018/*
19 * TODO: Optimize!
Akron60dfa7e2015-08-03 22:15:17 +020020 * - Remove identical object in Boolean groups
Akronaa74ec62015-07-31 17:22:55 +020021 * - Flatten boolean groups
Akron60dfa7e2015-08-03 22:15:17 +020022 * - create "between" ranges for multiple date objects
Akronaa74ec62015-07-31 17:22:55 +020023 */
24
Nils Diewaldea969502015-02-16 21:10:54 +000025public class CollectionBuilder {
Nils Diewaldf399a672013-11-18 17:55:22 +000026
27 // Logger
Akron40550172015-08-04 03:06:12 +020028 private final static Logger log = LoggerFactory
29 .getLogger(KrillCollection.class);
Nils Diewaldf399a672013-11-18 17:55:22 +000030
Nils Diewaldfb4d7b02014-04-09 17:56:17 +000031 // This advices the java compiler to ignore all loggings
32 public static final boolean DEBUG = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +000033
Akron40550172015-08-04 03:06:12 +020034
Akron60dfa7e2015-08-03 22:15:17 +020035 public CollectionBuilder.Interface term (String field, String term) {
36 return new CollectionBuilder.Term(field, term);
Nils Diewaldfb4d7b02014-04-09 17:56:17 +000037 };
Nils Diewaldbb33da22015-03-04 16:24:25 +000038
Akron40550172015-08-04 03:06:12 +020039
Akron60dfa7e2015-08-03 22:15:17 +020040 public CollectionBuilder.Interface re (String field, String term) {
41 return new CollectionBuilder.Term(field, term, true);
Nils Diewaldf399a672013-11-18 17:55:22 +000042 };
43
Akron40550172015-08-04 03:06:12 +020044
Akron60dfa7e2015-08-03 22:15:17 +020045 public CollectionBuilder.Interface since (String field, String date) {
Akron176c9b12015-07-29 19:53:40 +020046 int since = new KrillDate(date).floor();
Nils Diewaldbb33da22015-03-04 16:24:25 +000047
Akron176c9b12015-07-29 19:53:40 +020048 if (since == 0 || since == KrillDate.BEGINNING)
49 return null;
50
Akron60dfa7e2015-08-03 22:15:17 +020051 return new CollectionBuilder.Range(field, since, KrillDate.END);
Nils Diewaldf399a672013-11-18 17:55:22 +000052 };
53
Akron40550172015-08-04 03:06:12 +020054
Akron60dfa7e2015-08-03 22:15:17 +020055 public CollectionBuilder.Interface till (String field, String date) {
Akron176c9b12015-07-29 19:53:40 +020056 try {
57 int till = new KrillDate(date).ceil();
58 if (till == 0 || till == KrillDate.END)
59 return null;
Nils Diewaldbb33da22015-03-04 16:24:25 +000060
Akron60dfa7e2015-08-03 22:15:17 +020061 return new CollectionBuilder.Range(field, KrillDate.BEGINNING, till);
Akron176c9b12015-07-29 19:53:40 +020062 }
63 catch (NumberFormatException e) {
64 log.warn("Parameter of till(date) is invalid");
65 };
66 return null;
Nils Diewaldf399a672013-11-18 17:55:22 +000067 };
68
Akron40550172015-08-04 03:06:12 +020069
Akron60dfa7e2015-08-03 22:15:17 +020070 // This will be optimized away in future versions
Akron40550172015-08-04 03:06:12 +020071 public CollectionBuilder.Interface between (String field, String start,
72 String end) {
Akron60dfa7e2015-08-03 22:15:17 +020073 CollectionBuilder.Interface startObj = this.since(field, start);
74 if (startObj == null)
75 return null;
76
77 CollectionBuilder.Interface endObj = this.till(field, end);
78 if (endObj == null)
79 return null;
80
81 return this.andGroup().with(startObj).with(endObj);
82 };
83
Akron40550172015-08-04 03:06:12 +020084
Akron60dfa7e2015-08-03 22:15:17 +020085 public CollectionBuilder.Interface date (String field, String date) {
Akron176c9b12015-07-29 19:53:40 +020086 KrillDate dateDF = new KrillDate(date);
Nils Diewaldbb33da22015-03-04 16:24:25 +000087
Akron176c9b12015-07-29 19:53:40 +020088 if (dateDF.year == 0)
89 return null;
90
91 if (dateDF.day == 0 || dateDF.month == 0) {
92 int begin = dateDF.floor();
93 int end = dateDF.ceil();
94
95 if (end == 0
Akron40550172015-08-04 03:06:12 +020096 || (begin == KrillDate.BEGINNING && end == KrillDate.END))
Akron176c9b12015-07-29 19:53:40 +020097 return null;
98
Akron60dfa7e2015-08-03 22:15:17 +020099 return new CollectionBuilder.Range(field, begin, end);
Akron176c9b12015-07-29 19:53:40 +0200100 };
101
Akron60dfa7e2015-08-03 22:15:17 +0200102 return new CollectionBuilder.Range(field, dateDF.floor(), dateDF.ceil());
Nils Diewaldf399a672013-11-18 17:55:22 +0000103 };
104
Akron40550172015-08-04 03:06:12 +0200105
Akron60dfa7e2015-08-03 22:15:17 +0200106 public CollectionBuilder.Group andGroup () {
107 return new CollectionBuilder.Group(false);
Nils Diewaldf399a672013-11-18 17:55:22 +0000108 };
109
Akron40550172015-08-04 03:06:12 +0200110
Akron60dfa7e2015-08-03 22:15:17 +0200111 public CollectionBuilder.Group orGroup () {
112 return new CollectionBuilder.Group(true);
Nils Diewaldf399a672013-11-18 17:55:22 +0000113 };
114
Akron60dfa7e2015-08-03 22:15:17 +0200115 public interface Interface {
Akron176c9b12015-07-29 19:53:40 +0200116 public String toString ();
Akron40550172015-08-04 03:06:12 +0200117
118
Akron176c9b12015-07-29 19:53:40 +0200119 public Filter toFilter ();
Akron40550172015-08-04 03:06:12 +0200120
121
Akron176c9b12015-07-29 19:53:40 +0200122 public boolean isNegative ();
Akron40550172015-08-04 03:06:12 +0200123
124
Akron60dfa7e2015-08-03 22:15:17 +0200125 public CollectionBuilder.Interface not ();
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000126 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000127
Akron60dfa7e2015-08-03 22:15:17 +0200128 public class Term implements CollectionBuilder.Interface {
Akron176c9b12015-07-29 19:53:40 +0200129 private boolean isNegative = false;
130 private boolean regex = false;
131 private String field;
132 private String term;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000133
Akron40550172015-08-04 03:06:12 +0200134
Akron60dfa7e2015-08-03 22:15:17 +0200135 public Term (String field, String term) {
Akron176c9b12015-07-29 19:53:40 +0200136 this.field = field;
137 this.term = term;
138 };
139
Akron40550172015-08-04 03:06:12 +0200140
Akron60dfa7e2015-08-03 22:15:17 +0200141 public Term (String field, String term, boolean regex) {
Akron176c9b12015-07-29 19:53:40 +0200142 this.field = field;
143 this.term = term;
144 this.regex = regex;
145 };
146
Akron40550172015-08-04 03:06:12 +0200147
Akron176c9b12015-07-29 19:53:40 +0200148 public Filter toFilter () {
149 // Regular expression
150 if (this.regex)
151 return new QueryWrapperFilter(
Akron40550172015-08-04 03:06:12 +0200152 new RegexpQuery(new org.apache.lucene.index.Term(
153 this.field, this.term)));
154
Akron176c9b12015-07-29 19:53:40 +0200155 // Simple term
Akron40550172015-08-04 03:06:12 +0200156 return new TermsFilter(new org.apache.lucene.index.Term(this.field,
157 this.term));
Akron176c9b12015-07-29 19:53:40 +0200158 };
159
Akron40550172015-08-04 03:06:12 +0200160
Akron176c9b12015-07-29 19:53:40 +0200161 public String toString () {
Akron60dfa7e2015-08-03 22:15:17 +0200162 Filter filter = this.toFilter();
163 if (filter == null)
164 return "";
165 return filter.toString();
Akron176c9b12015-07-29 19:53:40 +0200166 };
167
Akron40550172015-08-04 03:06:12 +0200168
Akron176c9b12015-07-29 19:53:40 +0200169 public boolean isNegative () {
170 return this.isNegative;
171 };
172
173
Akron60dfa7e2015-08-03 22:15:17 +0200174 public CollectionBuilder.Interface not () {
Akron176c9b12015-07-29 19:53:40 +0200175 this.isNegative = true;
176 return this;
177 };
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000178 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000179
Akron60dfa7e2015-08-03 22:15:17 +0200180 public class Group implements CollectionBuilder.Interface {
Akron176c9b12015-07-29 19:53:40 +0200181 private boolean isOptional = false;
182 private boolean isNegative = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000183
Akron40550172015-08-04 03:06:12 +0200184
Akron176c9b12015-07-29 19:53:40 +0200185 public boolean isNegative () {
186 return this.isNegative;
187 };
188
Akron40550172015-08-04 03:06:12 +0200189
Akron176c9b12015-07-29 19:53:40 +0200190 public boolean isOptional () {
191 return this.isOptional;
192 };
193
Akron60dfa7e2015-08-03 22:15:17 +0200194 private ArrayList<CollectionBuilder.Interface> operands;
Akron176c9b12015-07-29 19:53:40 +0200195
Akron40550172015-08-04 03:06:12 +0200196
Akron60dfa7e2015-08-03 22:15:17 +0200197 public Group (boolean optional) {
Akron176c9b12015-07-29 19:53:40 +0200198 this.isOptional = optional;
Akron60dfa7e2015-08-03 22:15:17 +0200199 this.operands = new ArrayList<CollectionBuilder.Interface>(3);
Akron176c9b12015-07-29 19:53:40 +0200200 };
201
Akron40550172015-08-04 03:06:12 +0200202
Akron60dfa7e2015-08-03 22:15:17 +0200203 public Group with (CollectionBuilder.Interface cb) {
Akronfd05f502015-07-30 18:34:26 +0200204 if (cb == null)
205 return this;
206
Akron176c9b12015-07-29 19:53:40 +0200207 if (!cb.isNegative())
208 this.isNegative = false;
209 this.operands.add(cb);
210 return this;
211 };
212
Akron40550172015-08-04 03:06:12 +0200213
Akron60dfa7e2015-08-03 22:15:17 +0200214 public Group with (String field, String term) {
215 if (field == null || term == null)
216 return this;
217 return this.with(new CollectionBuilder.Term(field, term));
218 };
Akron176c9b12015-07-29 19:53:40 +0200219
Akron40550172015-08-04 03:06:12 +0200220
Akron176c9b12015-07-29 19:53:40 +0200221 public Filter toFilter () {
222 if (this.operands == null || this.operands.isEmpty())
223 return null;
224
225 if (this.operands.size() == 1)
226 return this.operands.get(0).toFilter();
227
228 // BooleanFilter bool = new BooleanFilter();
229 BooleanGroupFilter bool = new BooleanGroupFilter(this.isOptional);
230
Akron60dfa7e2015-08-03 22:15:17 +0200231 Iterator<CollectionBuilder.Interface> i = this.operands.iterator();
Akron176c9b12015-07-29 19:53:40 +0200232 while (i.hasNext()) {
Akron60dfa7e2015-08-03 22:15:17 +0200233 CollectionBuilder.Interface cb = i.next();
Akron176c9b12015-07-29 19:53:40 +0200234 if (cb.isNegative()) {
235 bool.without(cb.toFilter());
236 }
237 else {
238 bool.with(cb.toFilter());
239 };
240 };
241
242 return bool;
243 };
244
Akron40550172015-08-04 03:06:12 +0200245
Akron176c9b12015-07-29 19:53:40 +0200246 public String toString () {
Akron60dfa7e2015-08-03 22:15:17 +0200247 Filter filter = this.toFilter();
248 if (filter == null)
249 return "";
250 return filter.toString();
Akron176c9b12015-07-29 19:53:40 +0200251 };
252
Akron40550172015-08-04 03:06:12 +0200253
Akron60dfa7e2015-08-03 22:15:17 +0200254 public CollectionBuilder.Interface not () {
Akron176c9b12015-07-29 19:53:40 +0200255 this.isNegative = true;
256 return this;
257 };
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000258 };
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000259
Akron60dfa7e2015-08-03 22:15:17 +0200260 public class Range implements CollectionBuilder.Interface {
Akron176c9b12015-07-29 19:53:40 +0200261 private boolean isNegative = false;
262 private String field;
263 private int start, end;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000264
Akron40550172015-08-04 03:06:12 +0200265
Akron60dfa7e2015-08-03 22:15:17 +0200266 public Range (String field, int start, int end) {
Akron176c9b12015-07-29 19:53:40 +0200267 this.field = field;
268 this.start = start;
269 this.end = end;
270 };
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000271
Akron40550172015-08-04 03:06:12 +0200272
Akron176c9b12015-07-29 19:53:40 +0200273 public boolean isNegative () {
274 return this.isNegative;
275 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000276
Akron40550172015-08-04 03:06:12 +0200277
Akron176c9b12015-07-29 19:53:40 +0200278 public String toString () {
Akron60dfa7e2015-08-03 22:15:17 +0200279 Filter filter = this.toFilter();
280 if (filter == null)
281 return "";
282 return filter.toString();
Akron176c9b12015-07-29 19:53:40 +0200283 };
Nils Diewald8db8f922014-10-24 17:43:13 +0000284
Akron40550172015-08-04 03:06:12 +0200285
Akron176c9b12015-07-29 19:53:40 +0200286 public Filter toFilter () {
Akron40550172015-08-04 03:06:12 +0200287 return NumericRangeFilter.newIntRange(this.field, this.start,
288 this.end, true, true);
Akron176c9b12015-07-29 19:53:40 +0200289 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000290
Akron40550172015-08-04 03:06:12 +0200291
Akron60dfa7e2015-08-03 22:15:17 +0200292 public CollectionBuilder.Interface not () {
Akron176c9b12015-07-29 19:53:40 +0200293 this.isNegative = true;
294 return this;
295 };
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000296 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000297};