blob: 475b5be430fbedf90610c9c2b009a9ee4144a1a2 [file] [log] [blame]
Akron3ba74f22015-07-24 18:46:17 +02001package de.ids_mannheim.korap.collection;
2
3import java.io.IOException;
4import java.util.*;
5
6import org.apache.lucene.search.Filter;
7import org.apache.lucene.util.FixedBitSet;
8import org.apache.lucene.search.DocIdSet;
9import org.apache.lucene.search.DocIdSetIterator;
10import org.apache.lucene.search.BitsFilteredDocIdSet;
Akron700c1eb2015-09-25 16:57:30 +020011import org.apache.lucene.index.LeafReaderContext;
12import org.apache.lucene.index.LeafReader;
Akron3ba74f22015-07-24 18:46:17 +020013import org.apache.lucene.util.Bits;
Akron700c1eb2015-09-25 16:57:30 +020014import org.apache.lucene.util.BitDocIdSet;
Akron3ba74f22015-07-24 18:46:17 +020015
16import de.ids_mannheim.korap.KrillCollection;
17
18import org.slf4j.Logger;
19import org.slf4j.LoggerFactory;
20
21/**
22 * A container Filter that allows Boolean composition of Filters
23 * in groups (either or-groups or and-groups).
Akron40550172015-08-04 03:06:12 +020024 *
Akron3ba74f22015-07-24 18:46:17 +020025 * @author Nils Diewald
Akron40550172015-08-04 03:06:12 +020026 *
27 * This filter is roughly based on
28 * org.apache.lucene.queries.BooleanFilter.
Akron3ba74f22015-07-24 18:46:17 +020029 */
30public class BooleanGroupFilter extends Filter {
31 // Group is either an or- or an and-Group
32 private boolean isOptional;
33
34 // Logger
Akron40550172015-08-04 03:06:12 +020035 private final static Logger log = LoggerFactory
36 .getLogger(KrillCollection.class);
Akron3ba74f22015-07-24 18:46:17 +020037
38 // This advices the java compiler to ignore all loggings
Akron1d63f272015-07-28 12:19:49 +020039 public static final boolean DEBUG = false;
Akron3ba74f22015-07-24 18:46:17 +020040
41 // Init operands list
42 private final List<GroupFilterOperand> operands = new ArrayList<>(3);
43
44 // Operand in the filter group
45 private class GroupFilterOperand {
46 public Filter filter;
47 public boolean isNegative;
48
Akron40550172015-08-04 03:06:12 +020049
Akron3ba74f22015-07-24 18:46:17 +020050 // Operand has filter and negativity information
51 public GroupFilterOperand (Filter filter, boolean negative) {
52 this.filter = filter;
53 this.isNegative = negative;
54 };
55 };
56
Akron40550172015-08-04 03:06:12 +020057
Akron3ba74f22015-07-24 18:46:17 +020058 /**
59 * Create a new BooleanGroupFilter.
60 * Accepts a boolean parameter to make it an or-Group
61 * (<pre>true</pre>) or an and-Group (<pre>true</pre>).
62 */
63 public BooleanGroupFilter (boolean optional) {
64 this.isOptional = optional;
65 };
66
67
68 /**
69 * Add an operand to the list of filter operands.
70 * The operand is a positive filter that won't be flipped.
71 */
72 public final void with (Filter filter) {
73 this.operands.add(new GroupFilterOperand(filter, false));
74 };
75
76
77 /**
78 * Add an operand to the list of filter operands.
79 * The operand is a negative filter that will be flipped.
80 */
81 public final void without (Filter filter) {
82 this.operands.add(new GroupFilterOperand(filter, true));
83 };
84
85
86 @Override
87 public boolean equals (Object obj) {
88 if (this == obj)
89 return true;
Akron40550172015-08-04 03:06:12 +020090
Akron3ba74f22015-07-24 18:46:17 +020091 if ((obj == null) || (obj.getClass() != this.getClass()))
92 return false;
93
94 final BooleanGroupFilter other = (BooleanGroupFilter) obj;
95 return operands.equals(other.operands);
96 };
97
98
99 @Override
Akron40550172015-08-04 03:06:12 +0200100 public int hashCode () {
Akron3ba74f22015-07-24 18:46:17 +0200101 return 657153719 ^ operands.hashCode();
102 };
103
Akron40550172015-08-04 03:06:12 +0200104
Akron3ba74f22015-07-24 18:46:17 +0200105 @Override
106 public String toString () {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200107 StringBuilder buffer = new StringBuilder(
108 this.isOptional ? "OrGroup(" : "AndGroup(");
Akron3ba74f22015-07-24 18:46:17 +0200109 boolean first = true;
110 for (final GroupFilterOperand operand : this.operands) {
111 if (first)
112 first = false;
113 else
114 buffer.append(" ");
115
116 if (operand.isNegative)
117 buffer.append('-');
118
119 buffer.append(operand.filter.toString());
120 };
121 return buffer.append(')').toString();
122 };
123
Akron42993552016-02-04 13:24:24 +0100124
Akron700c1eb2015-09-25 16:57:30 +0200125 /*
126 @Override
127 public String toString (String str) {
128 return this.toString();
129 };
130 */
131
Akron40550172015-08-04 03:06:12 +0200132
Akron3ba74f22015-07-24 18:46:17 +0200133 @Override
Akron700c1eb2015-09-25 16:57:30 +0200134 public DocIdSet getDocIdSet (LeafReaderContext context, Bits acceptDocs)
Akron40550172015-08-04 03:06:12 +0200135 throws IOException {
Akron700c1eb2015-09-25 16:57:30 +0200136 final LeafReader reader = context.reader();
Akron3ba74f22015-07-24 18:46:17 +0200137 int maxDoc = reader.maxDoc();
Akron40550172015-08-04 03:06:12 +0200138 FixedBitSet bitset = new FixedBitSet(maxDoc);
Akron3ba74f22015-07-24 18:46:17 +0200139 FixedBitSet combinator = new FixedBitSet(maxDoc);
140 boolean init = true;
141
142 if (DEBUG)
143 log.debug("Start trying to filter on bitset of length {}", maxDoc);
144
145 for (final GroupFilterOperand operand : this.operands) {
146 final DocIdSet docids = operand.filter.getDocIdSet(context, null);
Akron40550172015-08-04 03:06:12 +0200147 final DocIdSetIterator filterIter = (docids == null) ? null
148 : docids.iterator();
Akron3ba74f22015-07-24 18:46:17 +0200149
150 if (DEBUG)
151 log.debug("> Filter to bitset of {} ({} negative)",
Akron40550172015-08-04 03:06:12 +0200152 operand.filter.toString(), operand.isNegative);
Akron3ba74f22015-07-24 18:46:17 +0200153
154 // Filter resulted in no docs
155 if (filterIter == null) {
156
Akron40550172015-08-04 03:06:12 +0200157 if (DEBUG)
158 log.debug("- Filter is null");
Akron3ba74f22015-07-24 18:46:17 +0200159
160 // Filter matches
161 if (operand.isNegative) {
162
163 // This means, everything is allowed
164 if (this.isOptional) {
165
166 // Everything is allowed
Akron40550172015-08-04 03:06:12 +0200167 if (DEBUG)
168 log.debug("- Filter to allow all documents");
Akron3ba74f22015-07-24 18:46:17 +0200169
170 bitset.set(0, maxDoc);
Eliza Margaretha6f989202016-10-14 21:48:29 +0200171 return BitsFilteredDocIdSet
172 .wrap(new BitDocIdSet(bitset), acceptDocs);
Akron3ba74f22015-07-24 18:46:17 +0200173 };
174
175 // There is no possible match
Akron40550172015-08-04 03:06:12 +0200176 if (DEBUG)
177 log.debug("- Filter to allow no documents (1)");
Akron3ba74f22015-07-24 18:46:17 +0200178 return null;
179 }
180
181 // The result is unimportant
182 else if (this.isOptional) {
Akron40550172015-08-04 03:06:12 +0200183 if (DEBUG)
184 log.debug("- Filter is ignorable");
Akron3ba74f22015-07-24 18:46:17 +0200185 continue;
186 };
187
188 // There is no possible match
Akron40550172015-08-04 03:06:12 +0200189 if (DEBUG)
190 log.debug("- Filter to allow no documents (2)");
Akron3ba74f22015-07-24 18:46:17 +0200191 return null;
192 }
193
194 // Initialize bitset
195 else if (init) {
196
197 bitset.or(filterIter);
198
Akron40550172015-08-04 03:06:12 +0200199 if (DEBUG)
200 log.debug("- Filter is inial with card {}",
201 bitset.cardinality());
Akron3ba74f22015-07-24 18:46:17 +0200202
203 // Flip the matching documents
204 if (operand.isNegative) {
205 bitset.flip(0, maxDoc);
Akron40550172015-08-04 03:06:12 +0200206 if (DEBUG)
207 log.debug(
208 "- Filter is negative - so flipped to card {} (1)",
209 bitset.cardinality());
Akron3ba74f22015-07-24 18:46:17 +0200210 };
211
212 init = false;
213 }
214 else {
215
Akron40550172015-08-04 03:06:12 +0200216 if (DEBUG)
217 log.debug("- Filter is fine and operating");
Akron3ba74f22015-07-24 18:46:17 +0200218
219 // Operator is negative and needs to be flipped
220 if (operand.isNegative) {
221 if (this.isOptional) {
Akron40550172015-08-04 03:06:12 +0200222 if (DEBUG)
223 log.debug("- Filter is negative optional");
Akron3ba74f22015-07-24 18:46:17 +0200224
225 // Negative or ... may be slow
226 combinator.or(filterIter);
227 combinator.flip(0, maxDoc);
228
Akron40550172015-08-04 03:06:12 +0200229 if (DEBUG)
230 log.debug(
231 "- Filter is negative - so flipped to card {} (2)",
232 combinator.cardinality());
Akron3ba74f22015-07-24 18:46:17 +0200233
234 bitset.or(combinator);
235 combinator.clear(0, maxDoc);
236 }
237
238 // Negative and
239 else {
Akron40550172015-08-04 03:06:12 +0200240 if (DEBUG)
241 log.debug("- Filter is negative not optional");
Akron3ba74f22015-07-24 18:46:17 +0200242 bitset.andNot(filterIter);
Akron40550172015-08-04 03:06:12 +0200243 if (DEBUG)
244 log.debug("- Filter is negative - so andNotted");
Akron3ba74f22015-07-24 18:46:17 +0200245 }
246 }
247 else if (this.isOptional) {
Akron40550172015-08-04 03:06:12 +0200248 if (DEBUG)
249 log.debug("- Filter is simply optional");
Akron3ba74f22015-07-24 18:46:17 +0200250 bitset.or(filterIter);
251 }
252 else {
Akron40550172015-08-04 03:06:12 +0200253 if (DEBUG)
254 log.debug("- Filter is simply not optional");
Akron3ba74f22015-07-24 18:46:17 +0200255 bitset.and(filterIter);
256 // TODO: Check with nextSetBit() if the filter is not applicable
257 };
258
Akron40550172015-08-04 03:06:12 +0200259 if (DEBUG)
260 log.debug("- Subresult has card {} ", bitset.cardinality());
Akron3ba74f22015-07-24 18:46:17 +0200261 };
262 };
Akron700c1eb2015-09-25 16:57:30 +0200263 return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bitset), acceptDocs);
Akron3ba74f22015-07-24 18:46:17 +0200264 };
265};