blob: 4716804ff81b4e240c2d963d227646f5c503f8fa [file] [log] [blame]
Nils Diewaldea969502015-02-16 21:10:54 +00001package de.ids_mannheim.korap.collection;
Nils Diewaldf399a672013-11-18 17:55:22 +00002
Akron176c9b12015-07-29 19:53:40 +02003import java.util.*;
4import java.io.IOException;
Akron176c9b12015-07-29 19:53:40 +02005
6import org.apache.lucene.index.Term;
7import org.apache.lucene.queries.TermsFilter;
8import org.apache.lucene.search.*;
9import org.apache.lucene.search.NumericRangeFilter;
Nils Diewaldc383ed02015-02-26 21:35:22 +000010import de.ids_mannheim.korap.util.KrillDate;
Nils Diewaldfb4d7b02014-04-09 17:56:17 +000011
Nils Diewaldf399a672013-11-18 17:55:22 +000012import org.slf4j.Logger;
13import org.slf4j.LoggerFactory;
Nils Diewaldf399a672013-11-18 17:55:22 +000014
Akron176c9b12015-07-29 19:53:40 +020015import de.ids_mannheim.korap.KrillCollection;
16import de.ids_mannheim.korap.collection.BooleanGroupFilter;
17
Akronaa74ec62015-07-31 17:22:55 +020018/*
19 * TODO: Optimize!
Akron60dfa7e2015-08-03 22:15:17 +020020 * - Remove identical object in Boolean groups
Akronaa74ec62015-07-31 17:22:55 +020021 * - Flatten boolean groups
Akron60dfa7e2015-08-03 22:15:17 +020022 * - create "between" ranges for multiple date objects
Akronaa74ec62015-07-31 17:22:55 +020023 */
24
Nils Diewaldea969502015-02-16 21:10:54 +000025public class CollectionBuilder {
Nils Diewaldf399a672013-11-18 17:55:22 +000026
27 // Logger
Akron40550172015-08-04 03:06:12 +020028 private final static Logger log = LoggerFactory
29 .getLogger(KrillCollection.class);
Nils Diewaldf399a672013-11-18 17:55:22 +000030
Nils Diewaldfb4d7b02014-04-09 17:56:17 +000031 // This advices the java compiler to ignore all loggings
32 public static final boolean DEBUG = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +000033
Akron40550172015-08-04 03:06:12 +020034
Akron60dfa7e2015-08-03 22:15:17 +020035 public CollectionBuilder.Interface term (String field, String term) {
36 return new CollectionBuilder.Term(field, term);
Nils Diewaldfb4d7b02014-04-09 17:56:17 +000037 };
Nils Diewaldbb33da22015-03-04 16:24:25 +000038
Akron40550172015-08-04 03:06:12 +020039
Akron60dfa7e2015-08-03 22:15:17 +020040 public CollectionBuilder.Interface re (String field, String term) {
41 return new CollectionBuilder.Term(field, term, true);
Nils Diewaldf399a672013-11-18 17:55:22 +000042 };
43
Akron40550172015-08-04 03:06:12 +020044
Akron60dfa7e2015-08-03 22:15:17 +020045 public CollectionBuilder.Interface since (String field, String date) {
Akron176c9b12015-07-29 19:53:40 +020046 int since = new KrillDate(date).floor();
Nils Diewaldbb33da22015-03-04 16:24:25 +000047
Akron176c9b12015-07-29 19:53:40 +020048 if (since == 0 || since == KrillDate.BEGINNING)
49 return null;
50
Akron60dfa7e2015-08-03 22:15:17 +020051 return new CollectionBuilder.Range(field, since, KrillDate.END);
Nils Diewaldf399a672013-11-18 17:55:22 +000052 };
53
Akron5e3436f2017-07-04 15:28:03 +020054 public CollectionBuilder.Interface nothing () {
55
56 // Requires that a field with name "0---" does not exist
57 return new CollectionBuilder.Term("0---", "0");
58 };
59
Akron40550172015-08-04 03:06:12 +020060
Akron60dfa7e2015-08-03 22:15:17 +020061 public CollectionBuilder.Interface till (String field, String date) {
Akron176c9b12015-07-29 19:53:40 +020062 try {
63 int till = new KrillDate(date).ceil();
64 if (till == 0 || till == KrillDate.END)
65 return null;
Nils Diewaldbb33da22015-03-04 16:24:25 +000066
Eliza Margaretha6f989202016-10-14 21:48:29 +020067 return new CollectionBuilder.Range(field, KrillDate.BEGINNING,
68 till);
Akron176c9b12015-07-29 19:53:40 +020069 }
70 catch (NumberFormatException e) {
71 log.warn("Parameter of till(date) is invalid");
72 };
73 return null;
Nils Diewaldf399a672013-11-18 17:55:22 +000074 };
75
Akron40550172015-08-04 03:06:12 +020076
Akron60dfa7e2015-08-03 22:15:17 +020077 // This will be optimized away in future versions
Akron40550172015-08-04 03:06:12 +020078 public CollectionBuilder.Interface between (String field, String start,
79 String end) {
Akron60dfa7e2015-08-03 22:15:17 +020080 CollectionBuilder.Interface startObj = this.since(field, start);
81 if (startObj == null)
82 return null;
83
84 CollectionBuilder.Interface endObj = this.till(field, end);
85 if (endObj == null)
86 return null;
87
88 return this.andGroup().with(startObj).with(endObj);
89 };
90
Akron40550172015-08-04 03:06:12 +020091
Akron60dfa7e2015-08-03 22:15:17 +020092 public CollectionBuilder.Interface date (String field, String date) {
Akron176c9b12015-07-29 19:53:40 +020093 KrillDate dateDF = new KrillDate(date);
Nils Diewaldbb33da22015-03-04 16:24:25 +000094
Akron176c9b12015-07-29 19:53:40 +020095 if (dateDF.year == 0)
96 return null;
97
98 if (dateDF.day == 0 || dateDF.month == 0) {
99 int begin = dateDF.floor();
100 int end = dateDF.ceil();
101
102 if (end == 0
Akron40550172015-08-04 03:06:12 +0200103 || (begin == KrillDate.BEGINNING && end == KrillDate.END))
Akron176c9b12015-07-29 19:53:40 +0200104 return null;
105
Akron60dfa7e2015-08-03 22:15:17 +0200106 return new CollectionBuilder.Range(field, begin, end);
Akron176c9b12015-07-29 19:53:40 +0200107 };
108
Eliza Margaretha6f989202016-10-14 21:48:29 +0200109 return new CollectionBuilder.Range(field, dateDF.floor(),
110 dateDF.ceil());
Nils Diewaldf399a672013-11-18 17:55:22 +0000111 };
112
Akron40550172015-08-04 03:06:12 +0200113
Akron60dfa7e2015-08-03 22:15:17 +0200114 public CollectionBuilder.Group andGroup () {
115 return new CollectionBuilder.Group(false);
Nils Diewaldf399a672013-11-18 17:55:22 +0000116 };
117
Akron40550172015-08-04 03:06:12 +0200118
Akron60dfa7e2015-08-03 22:15:17 +0200119 public CollectionBuilder.Group orGroup () {
120 return new CollectionBuilder.Group(true);
Nils Diewaldf399a672013-11-18 17:55:22 +0000121 };
122
Akron60dfa7e2015-08-03 22:15:17 +0200123 public interface Interface {
Akron176c9b12015-07-29 19:53:40 +0200124 public String toString ();
Akron40550172015-08-04 03:06:12 +0200125
126
Akron176c9b12015-07-29 19:53:40 +0200127 public Filter toFilter ();
Akron40550172015-08-04 03:06:12 +0200128
129
Akron176c9b12015-07-29 19:53:40 +0200130 public boolean isNegative ();
Akron40550172015-08-04 03:06:12 +0200131
132
Akron60dfa7e2015-08-03 22:15:17 +0200133 public CollectionBuilder.Interface not ();
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000134 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000135
Akron60dfa7e2015-08-03 22:15:17 +0200136 public class Term implements CollectionBuilder.Interface {
Akron176c9b12015-07-29 19:53:40 +0200137 private boolean isNegative = false;
138 private boolean regex = false;
139 private String field;
140 private String term;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000141
Akron40550172015-08-04 03:06:12 +0200142
Akron60dfa7e2015-08-03 22:15:17 +0200143 public Term (String field, String term) {
Akron176c9b12015-07-29 19:53:40 +0200144 this.field = field;
145 this.term = term;
146 };
147
Akron40550172015-08-04 03:06:12 +0200148
Akron60dfa7e2015-08-03 22:15:17 +0200149 public Term (String field, String term, boolean regex) {
Akron176c9b12015-07-29 19:53:40 +0200150 this.field = field;
151 this.term = term;
152 this.regex = regex;
153 };
154
Akron40550172015-08-04 03:06:12 +0200155
Akron176c9b12015-07-29 19:53:40 +0200156 public Filter toFilter () {
157 // Regular expression
158 if (this.regex)
159 return new QueryWrapperFilter(
Akron40550172015-08-04 03:06:12 +0200160 new RegexpQuery(new org.apache.lucene.index.Term(
161 this.field, this.term)));
162
Akron176c9b12015-07-29 19:53:40 +0200163 // Simple term
Eliza Margaretha6f989202016-10-14 21:48:29 +0200164 return new TermsFilter(
165 new org.apache.lucene.index.Term(this.field, this.term));
Akron176c9b12015-07-29 19:53:40 +0200166 };
167
Akron40550172015-08-04 03:06:12 +0200168
Akron176c9b12015-07-29 19:53:40 +0200169 public String toString () {
Akron60dfa7e2015-08-03 22:15:17 +0200170 Filter filter = this.toFilter();
171 if (filter == null)
172 return "";
173 return filter.toString();
Akron176c9b12015-07-29 19:53:40 +0200174 };
175
Akron40550172015-08-04 03:06:12 +0200176
Akron176c9b12015-07-29 19:53:40 +0200177 public boolean isNegative () {
178 return this.isNegative;
179 };
180
181
Akron60dfa7e2015-08-03 22:15:17 +0200182 public CollectionBuilder.Interface not () {
Akron176c9b12015-07-29 19:53:40 +0200183 this.isNegative = true;
184 return this;
185 };
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000186 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000187
Akron60dfa7e2015-08-03 22:15:17 +0200188 public class Group implements CollectionBuilder.Interface {
Akron176c9b12015-07-29 19:53:40 +0200189 private boolean isOptional = false;
190 private boolean isNegative = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000191
Akron40550172015-08-04 03:06:12 +0200192
Akron176c9b12015-07-29 19:53:40 +0200193 public boolean isNegative () {
194 return this.isNegative;
195 };
196
Akron40550172015-08-04 03:06:12 +0200197
Akron176c9b12015-07-29 19:53:40 +0200198 public boolean isOptional () {
199 return this.isOptional;
200 };
201
Akron60dfa7e2015-08-03 22:15:17 +0200202 private ArrayList<CollectionBuilder.Interface> operands;
Akron176c9b12015-07-29 19:53:40 +0200203
Akron40550172015-08-04 03:06:12 +0200204
Akron60dfa7e2015-08-03 22:15:17 +0200205 public Group (boolean optional) {
Akron176c9b12015-07-29 19:53:40 +0200206 this.isOptional = optional;
Akron60dfa7e2015-08-03 22:15:17 +0200207 this.operands = new ArrayList<CollectionBuilder.Interface>(3);
Akron176c9b12015-07-29 19:53:40 +0200208 };
209
Akron40550172015-08-04 03:06:12 +0200210
Akron60dfa7e2015-08-03 22:15:17 +0200211 public Group with (CollectionBuilder.Interface cb) {
Akronfd05f502015-07-30 18:34:26 +0200212 if (cb == null)
213 return this;
214
Akron176c9b12015-07-29 19:53:40 +0200215 if (!cb.isNegative())
216 this.isNegative = false;
217 this.operands.add(cb);
218 return this;
219 };
220
Akron40550172015-08-04 03:06:12 +0200221
Akron60dfa7e2015-08-03 22:15:17 +0200222 public Group with (String field, String term) {
223 if (field == null || term == null)
224 return this;
225 return this.with(new CollectionBuilder.Term(field, term));
226 };
Akron176c9b12015-07-29 19:53:40 +0200227
Akron40550172015-08-04 03:06:12 +0200228
Akron176c9b12015-07-29 19:53:40 +0200229 public Filter toFilter () {
230 if (this.operands == null || this.operands.isEmpty())
231 return null;
232
233 if (this.operands.size() == 1)
234 return this.operands.get(0).toFilter();
235
236 // BooleanFilter bool = new BooleanFilter();
237 BooleanGroupFilter bool = new BooleanGroupFilter(this.isOptional);
238
Akron60dfa7e2015-08-03 22:15:17 +0200239 Iterator<CollectionBuilder.Interface> i = this.operands.iterator();
Akron176c9b12015-07-29 19:53:40 +0200240 while (i.hasNext()) {
Akron60dfa7e2015-08-03 22:15:17 +0200241 CollectionBuilder.Interface cb = i.next();
Akron176c9b12015-07-29 19:53:40 +0200242 if (cb.isNegative()) {
243 bool.without(cb.toFilter());
244 }
245 else {
246 bool.with(cb.toFilter());
247 };
248 };
249
250 return bool;
251 };
252
Akron40550172015-08-04 03:06:12 +0200253
Akron176c9b12015-07-29 19:53:40 +0200254 public String toString () {
Akron60dfa7e2015-08-03 22:15:17 +0200255 Filter filter = this.toFilter();
256 if (filter == null)
257 return "";
258 return filter.toString();
Akron176c9b12015-07-29 19:53:40 +0200259 };
260
Akron40550172015-08-04 03:06:12 +0200261
Akron60dfa7e2015-08-03 22:15:17 +0200262 public CollectionBuilder.Interface not () {
Akron176c9b12015-07-29 19:53:40 +0200263 this.isNegative = true;
264 return this;
265 };
Nils Diewaldbaf68c52013-11-20 13:22:19 +0000266 };
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000267
Akron60dfa7e2015-08-03 22:15:17 +0200268 public class Range implements CollectionBuilder.Interface {
Akron176c9b12015-07-29 19:53:40 +0200269 private boolean isNegative = false;
270 private String field;
271 private int start, end;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000272
Akron40550172015-08-04 03:06:12 +0200273
Akron60dfa7e2015-08-03 22:15:17 +0200274 public Range (String field, int start, int end) {
Akron176c9b12015-07-29 19:53:40 +0200275 this.field = field;
276 this.start = start;
277 this.end = end;
278 };
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000279
Akron40550172015-08-04 03:06:12 +0200280
Akron176c9b12015-07-29 19:53:40 +0200281 public boolean isNegative () {
282 return this.isNegative;
283 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000284
Akron40550172015-08-04 03:06:12 +0200285
Akron176c9b12015-07-29 19:53:40 +0200286 public String toString () {
Akron60dfa7e2015-08-03 22:15:17 +0200287 Filter filter = this.toFilter();
288 if (filter == null)
289 return "";
290 return filter.toString();
Akron176c9b12015-07-29 19:53:40 +0200291 };
Nils Diewald8db8f922014-10-24 17:43:13 +0000292
Akron40550172015-08-04 03:06:12 +0200293
Akron176c9b12015-07-29 19:53:40 +0200294 public Filter toFilter () {
Akron40550172015-08-04 03:06:12 +0200295 return NumericRangeFilter.newIntRange(this.field, this.start,
296 this.end, true, true);
Akron176c9b12015-07-29 19:53:40 +0200297 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000298
Akron40550172015-08-04 03:06:12 +0200299
Akron60dfa7e2015-08-03 22:15:17 +0200300 public CollectionBuilder.Interface not () {
Akron176c9b12015-07-29 19:53:40 +0200301 this.isNegative = true;
302 return this;
303 };
Nils Diewaldfb4d7b02014-04-09 17:56:17 +0000304 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000305};