blob: 2fb8e84d7e76bd703f97690516cac9bbd49a6aea [file] [log] [blame]
Eliza Margaretha7ee76da2014-08-12 15:32:33 +00001package de.ids_mannheim.korap.query;
2
3import java.io.IOException;
4import java.util.Map;
5
6import org.apache.lucene.index.AtomicReaderContext;
7import org.apache.lucene.index.Term;
8import org.apache.lucene.index.TermContext;
9import org.apache.lucene.search.spans.SpanQuery;
10import org.apache.lucene.search.spans.Spans;
Eliza Margarethaf0171c52015-01-14 17:38:16 +000011import org.apache.lucene.search.spans.TermSpans;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000012import org.apache.lucene.util.Bits;
13
Eliza Margaretha99c72c22014-09-17 08:38:25 +000014import de.ids_mannheim.korap.query.spans.ExpandedExclusionSpans;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000015import de.ids_mannheim.korap.query.spans.ExpandedSpans;
16
Eliza Margarethaf0171c52015-01-14 17:38:16 +000017/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000018 * SpanExpansionQuery makes a span longer by stretching out the start
19 * or the end
20 * position of the span. The constraints of the expansion, such as how
21 * large the
22 * expansion should be (min and max position) and the direction of the
23 * expansion
24 * with respect to the original span, are specified in
25 * ExpansionConstraint. The
26 * direction is designated with the sign of a number, namely a
27 * negative number
28 * signifies the left direction, and a positive number (including 0)
29 * signifies
Eliza Margarethadffd0592015-01-15 18:24:39 +000030 * the right direction.
Eliza Margaretha7788a982014-08-29 16:10:52 +000031 *
Eliza Margarethaf0171c52015-01-14 17:38:16 +000032 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000033 * SpanTermQuery stq = new SpanTermQuery(new Term(&quot;tokens&quot;,
34 * &quot;s:lightning&quot;));
35 * SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1,
36 * true);
Eliza Margarethaf0171c52015-01-14 17:38:16 +000037 * </pre>
Eliza Margaretha7788a982014-08-29 16:10:52 +000038 *
Eliza Margarethaf0171c52015-01-14 17:38:16 +000039 * In the example above, the SpanExpansionQuery describes that the
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 * {@link TermSpans} of "lightning" may be expanded up to two token
41 * positions to
Eliza Margarethaf0171c52015-01-14 17:38:16 +000042 * the left.
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000043 *
Eliza Margarethaf0171c52015-01-14 17:38:16 +000044 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000045 * &quot;Trees are often struck by lightning because they are natural
46 * lightning conductors to the ground.&quot;
Eliza Margarethaf0171c52015-01-14 17:38:16 +000047 * </pre>
48 *
49 * The matches for the sample text are:
50 *
51 * <pre>
52 * [struck by lightning]
53 * [by lightning]
54 * [lightning]
55 * [are natural lightning]
56 * [natural lightning]
57 * [lightning]
58 * </pre>
59 *
60 * The expansion can also be specified to <em>not</em> contain any
Nils Diewaldbb33da22015-03-04 16:24:25 +000061 * direct/immediate /adjacent occurrence(s) of another span. Examples
62 * in
Eliza Margarethaf0171c52015-01-14 17:38:16 +000063 * Poliqarp:
64 *
65 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000066 * [orth=the][orth!=lightning] "the" must not be followed by
67 * "lightning"
68 * [pos!=ADJ]{1,2}[orth=jacket] one or two adjectives cannot precedes
69 * "jacket"
Eliza Margarethadffd0592015-01-15 18:24:39 +000070 * </pre>
71 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000072 * The SpanExpansionQuery for the latter Poliqarp query with left
73 * direction from
Eliza Margarethadffd0592015-01-15 18:24:39 +000074 * "jacket" example is:
75 *
76 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000077 * SpanTermQuery notQuery = new SpanTermQuery(new
78 * Term(&quot;tokens&quot;, &quot;tt:p:/ADJ&quot;));
79 * SpanTermQuery stq = new SpanTermQuery(new Term(&quot;tokens&quot;,
80 * &quot;s:jacket&quot;));
81 * SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 1,
82 * 2, -1, true);
Eliza Margarethadffd0592015-01-15 18:24:39 +000083 * </pre>
84 *
85 * Matches and non matches example:
86 *
87 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000088 * [a jacket] match
89 * [such a jacket] non match, where such is an ADJ
90 * [leather jacket] non match
91 * [black leather jacket] non match
92 * [large black leather jacket] non match
Eliza Margarethaf0171c52015-01-14 17:38:16 +000093 * </pre>
94 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000095 * The positions of the expansion parts can be optionally stored in
96 * payloads
Eliza Margarethaafe98122015-01-23 17:37:57 +000097 * together with a class number.
Eliza Margarethaf0171c52015-01-14 17:38:16 +000098 *
99 * @author margaretha
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000100 * */
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000101public class SpanExpansionQuery extends SimpleSpanQuery {
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000102
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000103 private int min, max; // min, max expansion position
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000104
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000105 // if > 0, collect expansion offsets using this label
106 private byte classNumber;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000107
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000108 // expansion direction with regard to the main span:
109 // < 0 to the left of main span
110 // >= 0 to the right of main span
111 private int direction;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000112
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000113 // if true, no occurrence of another span
114 final boolean isExclusion;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000115
Nils Diewaldbb33da22015-03-04 16:24:25 +0000116
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000117 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000118 * Constructs a SpanExpansionQuery for simple expansion of the
119 * specified {@link SpanQuery}.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000120 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000121 * @param firstClause
122 * a {@link SpanQuery}
123 * @param min
124 * the minimum length of the expansion
125 * @param max
126 * the maximum length of the expansion
127 * @param direction
128 * the direction of the expansion
129 * @param collectPayloads
130 * a boolean flag representing the value
131 * <code>true</code> if payloads are to be collected,
132 * otherwise
133 * <code>false</code>.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000134 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000135 public SpanExpansionQuery (SpanQuery firstClause, int min, int max,
136 int direction, boolean collectPayloads) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000137 super(firstClause, collectPayloads);
138 if (max < min) {
139 throw new IllegalArgumentException("The max position has to be "
140 + "bigger than or the same as min position.");
141 }
142 this.min = min;
143 this.max = max;
144 this.direction = direction;
145 this.isExclusion = false;
146 }
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000147
Nils Diewaldbb33da22015-03-04 16:24:25 +0000148
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000149 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000150 * Constructs a SpanExpansionQuery for simple expansion of the
151 * specified {@link SpanQuery} and stores expansion offsets in
152 * payloads associated
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000153 * with the given class number.
154 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000155 * @param firstClause
156 * a {@link SpanQuery}
157 * @param min
158 * the minimum length of the expansion
159 * @param max
160 * the maximum length of the expansion
161 * @param direction
162 * the direction of the expansion
163 * @param classNumber
164 * the class number for storing expansion offsets in
165 * payloads
166 * @param collectPayloads
167 * a boolean flag representing the value
168 * <code>true</code> if payloads are to be collected,
169 * otherwise
170 * <code>false</code>.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000171 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000172 public SpanExpansionQuery (SpanQuery firstClause, int min, int max,
173 int direction, byte classNumber,
174 boolean collectPayloads) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000175 this(firstClause, min, max, direction, collectPayloads);
176 this.classNumber = classNumber;
177 }
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000178
Nils Diewaldbb33da22015-03-04 16:24:25 +0000179
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000180 /**
181 * Constructs a SpanExpansionQuery for expansion of the first
Nils Diewaldbb33da22015-03-04 16:24:25 +0000182 * {@link SpanQuery} with exclusions of the second
183 * {@link SpanQuery}.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000184 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000185 * @param firstClause
186 * the SpanQuery to be expanded
187 * @param notClause
188 * the SpanQuery to be excluded
189 * @param min
190 * the minimum length of the expansion
191 * @param max
192 * the maximum length of the expansion
193 * @param direction
194 * the direction of the expansion
195 * @param collectPayloads
196 * a boolean flag representing the value
197 * <code>true</code> if payloads are to be collected,
198 * otherwise
199 * <code>false</code>.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000200 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000201 public SpanExpansionQuery (SpanQuery firstClause, SpanQuery notClause,
202 int min, int max, int direction,
203 boolean collectPayloads) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000204 super(firstClause, notClause, collectPayloads);
205 if (max < min) {
206 throw new IllegalArgumentException("The max position has to be "
207 + "bigger than or the same as min position.");
208 }
209 this.min = min;
210 this.max = max;
211 this.direction = direction;
212 this.isExclusion = true;
213 }
Eliza Margaretha656cb312014-08-14 12:42:26 +0000214
Nils Diewaldbb33da22015-03-04 16:24:25 +0000215
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000216 /**
217 * Constructs a SpanExpansionQuery for expansion of the first
Nils Diewaldbb33da22015-03-04 16:24:25 +0000218 * {@link SpanQuery} with exclusions of the second
219 * {@link SpanQuery}, and
220 * stores expansion offsets in payloads associated with the given
221 * class
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000222 * number.
223 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000224 * @param firstClause
225 * the SpanQuery to be expanded
226 * @param notClause
227 * the SpanQuery to be excluded
228 * @param min
229 * the minimum length of the expansion
230 * @param max
231 * the maximum length of the expansion
232 * @param direction
233 * the direction of the expansion
234 * @param classNumber
235 * the class number for storing expansion offsets in
236 * payloads
237 * @param collectPayloads
238 * a boolean flag representing the value
239 * <code>true</code> if payloads are to be collected,
240 * otherwise
241 * <code>false</code>.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000242 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000243 public SpanExpansionQuery (SpanQuery firstClause, SpanQuery notClause,
244 int min, int max, int direction,
245 byte classNumber, boolean collectPayloads) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000246 this(firstClause, notClause, min, max, direction, collectPayloads);
247 this.classNumber = classNumber;
248 }
Eliza Margaretha7788a982014-08-29 16:10:52 +0000249
Nils Diewaldbb33da22015-03-04 16:24:25 +0000250
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000251 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000252 public SimpleSpanQuery clone () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000253 SpanExpansionQuery sq = null;
254 if (isExclusion) {
255 sq = new SpanExpansionQuery(firstClause, secondClause, min, max,
256 direction, classNumber, collectPayloads);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000257 }
258 else {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000259 sq = new SpanExpansionQuery(firstClause, min, max, direction,
260 classNumber, collectPayloads);
261 }
262 //sq.setBoost(sq.getBoost());
263 return sq;
264 }
Eliza Margaretha7788a982014-08-29 16:10:52 +0000265
Nils Diewaldbb33da22015-03-04 16:24:25 +0000266
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000267 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000268 public Spans getSpans (AtomicReaderContext context, Bits acceptDocs,
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000269 Map<Term, TermContext> termContexts) throws IOException {
270
271 // Temporary:
272 if (isExclusion)
273 return new ExpandedExclusionSpans(this, context, acceptDocs,
274 termContexts);
275 else
276
277 return new ExpandedSpans(this, context, acceptDocs, termContexts);
278 }
279
Nils Diewaldbb33da22015-03-04 16:24:25 +0000280
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000281 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000282 public String toString (String field) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000283 StringBuilder sb = new StringBuilder();
284 sb.append("spanExpansion(");
285 sb.append(firstClause.toString());
286 if (isExclusion && secondClause != null) {
287 sb.append(", !");
288 sb.append(secondClause.toString());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000289 }
290 else {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000291 sb.append(", []");
292 }
293 sb.append("{");
294 sb.append(min);
295 sb.append(", ");
296 sb.append(max);
297 sb.append("}, ");
298 if (direction < 0)
299 sb.append("left");
300 else
301 sb.append("right");
302 if (classNumber > 0) {
303 sb.append(", class:");
304 sb.append(classNumber);
305 }
306 sb.append(")");
307 return sb.toString();
308 }
309
Nils Diewaldbb33da22015-03-04 16:24:25 +0000310
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000311 /**
312 * Returns the minimum length of the expansion.
313 *
314 * @return the minimum length of the expansion
315 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000316 public int getMin () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000317 return min;
318 }
319
Nils Diewaldbb33da22015-03-04 16:24:25 +0000320
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000321 /**
322 * Sets the minimum length of the expansion.
323 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000324 * @param min
325 * the minimum length of the expansion
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000326 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000327 public void setMin (int min) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000328 this.min = min;
329 }
330
Nils Diewaldbb33da22015-03-04 16:24:25 +0000331
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000332 /**
333 * Returns the maximum length of the expansion.
334 *
335 * @return the maximum length of the expansion
336 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000337 public int getMax () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000338 return max;
339 }
340
Nils Diewaldbb33da22015-03-04 16:24:25 +0000341
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000342 /**
343 * Sets the maximum length of the expansion.
344 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000345 * @param max
346 * the maximum length of the expansion
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000347 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000348 public void setMax (int max) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000349 this.max = max;
350 }
351
Nils Diewaldbb33da22015-03-04 16:24:25 +0000352
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000353 /**
354 * Returns the class number associated with the expansion offsets
355 *
356 * @return the class number associated with the expansion offsets
357 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000358 public byte getClassNumber () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000359 return classNumber;
360 }
361
Nils Diewaldbb33da22015-03-04 16:24:25 +0000362
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000363 /**
364 * Sets the class number associated with the expansion offsets
365 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000366 * @param classNumber
367 * the class number associated with the expansion
368 * offsets
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000369 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000370 public void setClassNumber (byte classNumber) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000371 this.classNumber = classNumber;
372 }
373
Nils Diewaldbb33da22015-03-04 16:24:25 +0000374
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000375 /**
376 * Returns the direction of the expansion
377 *
378 * @return the direction of the expansion
379 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000380 public int getDirection () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000381 return direction;
382 }
383
Nils Diewaldbb33da22015-03-04 16:24:25 +0000384
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000385 /**
386 * Sets the direction of the expansion
387 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000388 * @param direction
389 * the direction of the expansion
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000390 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000391 public void setDirection (int direction) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000392 this.direction = direction;
393 }
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000394}