blob: 6458da1b1913b1a6c6c8b89248b914d1e28e0911 [file] [log] [blame]
Eliza Margaretha7ee76da2014-08-12 15:32:33 +00001package de.ids_mannheim.korap.query;
2
3import java.io.IOException;
4import java.util.Map;
5
Akron700c1eb2015-09-25 16:57:30 +02006import org.apache.lucene.index.LeafReaderContext;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +00007import org.apache.lucene.index.Term;
8import org.apache.lucene.index.TermContext;
9import org.apache.lucene.search.spans.SpanQuery;
10import org.apache.lucene.search.spans.Spans;
Eliza Margarethaf0171c52015-01-14 17:38:16 +000011import org.apache.lucene.search.spans.TermSpans;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000012import org.apache.lucene.util.Bits;
13
Eliza Margaretha99c72c22014-09-17 08:38:25 +000014import de.ids_mannheim.korap.query.spans.ExpandedExclusionSpans;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000015import de.ids_mannheim.korap.query.spans.ExpandedSpans;
16
Eliza Margarethaf0171c52015-01-14 17:38:16 +000017/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000018 * SpanExpansionQuery makes a span longer by stretching out the start
Eliza Margarethadc98dc12016-11-16 14:33:42 +010019 * or the end position of the span. The constraints of the expansion,
20 * such as how large the expansion should be (min and max position)
21 * and the direction of the expansion with respect to the original
22 * span, are specified in ExpansionConstraint. The direction is
23 * designated with the sign of a number, namely a negative number
Nils Diewaldbb33da22015-03-04 16:24:25 +000024 * signifies the left direction, and a positive number (including 0)
Eliza Margarethadc98dc12016-11-16 14:33:42 +010025 * signifies the right direction.
Eliza Margaretha7788a982014-08-29 16:10:52 +000026 *
Eliza Margarethaf0171c52015-01-14 17:38:16 +000027 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000028 * SpanTermQuery stq = new SpanTermQuery(new Term(&quot;tokens&quot;,
29 * &quot;s:lightning&quot;));
30 * SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1,
31 * true);
Eliza Margarethaf0171c52015-01-14 17:38:16 +000032 * </pre>
Eliza Margaretha7788a982014-08-29 16:10:52 +000033 *
Eliza Margarethaf0171c52015-01-14 17:38:16 +000034 * In the example above, the SpanExpansionQuery describes that the
Nils Diewaldbb33da22015-03-04 16:24:25 +000035 * {@link TermSpans} of "lightning" may be expanded up to two token
36 * positions to
Eliza Margarethaf0171c52015-01-14 17:38:16 +000037 * the left.
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000038 *
Eliza Margarethaf0171c52015-01-14 17:38:16 +000039 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 * &quot;Trees are often struck by lightning because they are natural
41 * lightning conductors to the ground.&quot;
Eliza Margarethaf0171c52015-01-14 17:38:16 +000042 * </pre>
43 *
44 * The matches for the sample text are:
45 *
46 * <pre>
47 * [struck by lightning]
48 * [by lightning]
49 * [lightning]
50 * [are natural lightning]
51 * [natural lightning]
52 * [lightning]
53 * </pre>
54 *
55 * The expansion can also be specified to <em>not</em> contain any
Nils Diewaldbb33da22015-03-04 16:24:25 +000056 * direct/immediate /adjacent occurrence(s) of another span. Examples
57 * in
Eliza Margarethaf0171c52015-01-14 17:38:16 +000058 * Poliqarp:
59 *
60 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000061 * [orth=the][orth!=lightning] "the" must not be followed by
62 * "lightning"
63 * [pos!=ADJ]{1,2}[orth=jacket] one or two adjectives cannot precedes
64 * "jacket"
Eliza Margarethadffd0592015-01-15 18:24:39 +000065 * </pre>
66 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000067 * The SpanExpansionQuery for the latter Poliqarp query with left
68 * direction from
Eliza Margarethadffd0592015-01-15 18:24:39 +000069 * "jacket" example is:
70 *
71 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000072 * SpanTermQuery notQuery = new SpanTermQuery(new
73 * Term(&quot;tokens&quot;, &quot;tt:p:/ADJ&quot;));
74 * SpanTermQuery stq = new SpanTermQuery(new Term(&quot;tokens&quot;,
75 * &quot;s:jacket&quot;));
76 * SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 1,
77 * 2, -1, true);
Eliza Margarethadffd0592015-01-15 18:24:39 +000078 * </pre>
79 *
80 * Matches and non matches example:
81 *
82 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000083 * [a jacket] match
84 * [such a jacket] non match, where such is an ADJ
85 * [leather jacket] non match
86 * [black leather jacket] non match
87 * [large black leather jacket] non match
Eliza Margarethaf0171c52015-01-14 17:38:16 +000088 * </pre>
89 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000090 * The positions of the expansion parts can be optionally stored in
91 * payloads
Eliza Margarethaafe98122015-01-23 17:37:57 +000092 * together with a class number.
Eliza Margarethaf0171c52015-01-14 17:38:16 +000093 *
94 * @author margaretha
Eliza Margaretha6f989202016-10-14 21:48:29 +020095 */
Eliza Margarethaf0171c52015-01-14 17:38:16 +000096public class SpanExpansionQuery extends SimpleSpanQuery {
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000097
Eliza Margarethaf0171c52015-01-14 17:38:16 +000098 private int min, max; // min, max expansion position
Eliza Margaretha7ee76da2014-08-12 15:32:33 +000099
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000100 // if > 0, collect expansion offsets using this label
101 private byte classNumber;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000102
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000103 // expansion direction with regard to the main span:
104 // < 0 to the left of main span
105 // >= 0 to the right of main span
106 private int direction;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000107
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000108 // if true, no occurrence of another span
109 final boolean isExclusion;
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000110
Nils Diewaldbb33da22015-03-04 16:24:25 +0000111
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000112 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000113 * Constructs a SpanExpansionQuery for simple expansion of the
114 * specified {@link SpanQuery}.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000115 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000116 * @param firstClause
117 * a {@link SpanQuery}
118 * @param min
119 * the minimum length of the expansion
120 * @param max
121 * the maximum length of the expansion
122 * @param direction
123 * the direction of the expansion
124 * @param collectPayloads
125 * a boolean flag representing the value
126 * <code>true</code> if payloads are to be collected,
127 * otherwise
128 * <code>false</code>.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000129 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000130 public SpanExpansionQuery (SpanQuery firstClause, int min, int max,
131 int direction, boolean collectPayloads) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000132 super(firstClause, collectPayloads);
133 if (max < min) {
134 throw new IllegalArgumentException("The max position has to be "
135 + "bigger than or the same as min position.");
136 }
137 this.min = min;
138 this.max = max;
139 this.direction = direction;
140 this.isExclusion = false;
141 }
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000142
Nils Diewaldbb33da22015-03-04 16:24:25 +0000143
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000144 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000145 * Constructs a SpanExpansionQuery for simple expansion of the
146 * specified {@link SpanQuery} and stores expansion offsets in
147 * payloads associated
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000148 * with the given class number.
149 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000150 * @param firstClause
151 * a {@link SpanQuery}
152 * @param min
153 * the minimum length of the expansion
154 * @param max
155 * the maximum length of the expansion
156 * @param direction
157 * the direction of the expansion
158 * @param classNumber
159 * the class number for storing expansion offsets in
160 * payloads
161 * @param collectPayloads
162 * a boolean flag representing the value
163 * <code>true</code> if payloads are to be collected,
164 * otherwise
165 * <code>false</code>.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000166 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000167 public SpanExpansionQuery (SpanQuery firstClause, int min, int max,
168 int direction, byte classNumber,
169 boolean collectPayloads) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000170 this(firstClause, min, max, direction, collectPayloads);
171 this.classNumber = classNumber;
172 }
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000173
Nils Diewaldbb33da22015-03-04 16:24:25 +0000174
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000175 /**
176 * Constructs a SpanExpansionQuery for expansion of the first
Nils Diewaldbb33da22015-03-04 16:24:25 +0000177 * {@link SpanQuery} with exclusions of the second
178 * {@link SpanQuery}.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000179 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000180 * @param firstClause
181 * the SpanQuery to be expanded
182 * @param notClause
183 * the SpanQuery to be excluded
184 * @param min
185 * the minimum length of the expansion
186 * @param max
187 * the maximum length of the expansion
188 * @param direction
189 * the direction of the expansion
190 * @param collectPayloads
191 * a boolean flag representing the value
192 * <code>true</code> if payloads are to be collected,
193 * otherwise
194 * <code>false</code>.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000195 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000196 public SpanExpansionQuery (SpanQuery firstClause, SpanQuery notClause,
197 int min, int max, int direction,
198 boolean collectPayloads) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000199 super(firstClause, notClause, collectPayloads);
200 if (max < min) {
201 throw new IllegalArgumentException("The max position has to be "
202 + "bigger than or the same as min position.");
203 }
204 this.min = min;
205 this.max = max;
206 this.direction = direction;
207 this.isExclusion = true;
208 }
Eliza Margaretha656cb312014-08-14 12:42:26 +0000209
Nils Diewaldbb33da22015-03-04 16:24:25 +0000210
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000211 /**
212 * Constructs a SpanExpansionQuery for expansion of the first
Nils Diewaldbb33da22015-03-04 16:24:25 +0000213 * {@link SpanQuery} with exclusions of the second
214 * {@link SpanQuery}, and
215 * stores expansion offsets in payloads associated with the given
216 * class
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000217 * number.
218 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000219 * @param firstClause
220 * the SpanQuery to be expanded
221 * @param notClause
222 * the SpanQuery to be excluded
223 * @param min
224 * the minimum length of the expansion
225 * @param max
226 * the maximum length of the expansion
227 * @param direction
228 * the direction of the expansion
229 * @param classNumber
230 * the class number for storing expansion offsets in
231 * payloads
232 * @param collectPayloads
233 * a boolean flag representing the value
234 * <code>true</code> if payloads are to be collected,
235 * otherwise
236 * <code>false</code>.
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000237 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000238 public SpanExpansionQuery (SpanQuery firstClause, SpanQuery notClause,
239 int min, int max, int direction,
240 byte classNumber, boolean collectPayloads) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000241 this(firstClause, notClause, min, max, direction, collectPayloads);
242 this.classNumber = classNumber;
243 }
Eliza Margaretha7788a982014-08-29 16:10:52 +0000244
Nils Diewaldbb33da22015-03-04 16:24:25 +0000245
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000246 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000247 public SimpleSpanQuery clone () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000248 SpanExpansionQuery sq = null;
249 if (isExclusion) {
250 sq = new SpanExpansionQuery(firstClause, secondClause, min, max,
251 direction, classNumber, collectPayloads);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000252 }
253 else {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000254 sq = new SpanExpansionQuery(firstClause, min, max, direction,
255 classNumber, collectPayloads);
256 }
257 //sq.setBoost(sq.getBoost());
258 return sq;
259 }
Eliza Margaretha7788a982014-08-29 16:10:52 +0000260
Nils Diewaldbb33da22015-03-04 16:24:25 +0000261
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000262 @Override
Akron700c1eb2015-09-25 16:57:30 +0200263 public Spans getSpans (LeafReaderContext context, Bits acceptDocs,
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000264 Map<Term, TermContext> termContexts) throws IOException {
265
266 // Temporary:
267 if (isExclusion)
268 return new ExpandedExclusionSpans(this, context, acceptDocs,
269 termContexts);
270 else
271
272 return new ExpandedSpans(this, context, acceptDocs, termContexts);
273 }
274
Nils Diewaldbb33da22015-03-04 16:24:25 +0000275
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000276 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000277 public String toString (String field) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000278 StringBuilder sb = new StringBuilder();
279 sb.append("spanExpansion(");
280 sb.append(firstClause.toString());
281 if (isExclusion && secondClause != null) {
282 sb.append(", !");
283 sb.append(secondClause.toString());
Nils Diewaldbb33da22015-03-04 16:24:25 +0000284 }
285 else {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000286 sb.append(", []");
287 }
288 sb.append("{");
289 sb.append(min);
290 sb.append(", ");
291 sb.append(max);
292 sb.append("}, ");
293 if (direction < 0)
294 sb.append("left");
295 else
296 sb.append("right");
297 if (classNumber > 0) {
298 sb.append(", class:");
299 sb.append(classNumber);
300 }
301 sb.append(")");
302 return sb.toString();
303 }
304
Nils Diewaldbb33da22015-03-04 16:24:25 +0000305
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000306 /**
307 * Returns the minimum length of the expansion.
308 *
309 * @return the minimum length of the expansion
310 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000311 public int getMin () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000312 return min;
313 }
314
Nils Diewaldbb33da22015-03-04 16:24:25 +0000315
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000316 /**
317 * Sets the minimum length of the expansion.
318 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000319 * @param min
320 * the minimum length of the expansion
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000321 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000322 public void setMin (int min) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000323 this.min = min;
324 }
325
Nils Diewaldbb33da22015-03-04 16:24:25 +0000326
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000327 /**
328 * Returns the maximum length of the expansion.
329 *
330 * @return the maximum length of the expansion
331 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000332 public int getMax () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000333 return max;
334 }
335
Nils Diewaldbb33da22015-03-04 16:24:25 +0000336
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000337 /**
338 * Sets the maximum length of the expansion.
339 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000340 * @param max
341 * the maximum length of the expansion
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000342 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000343 public void setMax (int max) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000344 this.max = max;
345 }
346
Nils Diewaldbb33da22015-03-04 16:24:25 +0000347
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000348 /**
349 * Returns the class number associated with the expansion offsets
350 *
351 * @return the class number associated with the expansion offsets
352 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000353 public byte getClassNumber () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000354 return classNumber;
355 }
356
Nils Diewaldbb33da22015-03-04 16:24:25 +0000357
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000358 /**
359 * Sets the class number associated with the expansion offsets
360 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000361 * @param classNumber
362 * the class number associated with the expansion
363 * offsets
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000364 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000365 public void setClassNumber (byte classNumber) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000366 this.classNumber = classNumber;
367 }
368
Nils Diewaldbb33da22015-03-04 16:24:25 +0000369
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000370 /**
371 * Returns the direction of the expansion
372 *
373 * @return the direction of the expansion
374 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000375 public int getDirection () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000376 return direction;
377 }
378
Nils Diewaldbb33da22015-03-04 16:24:25 +0000379
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000380 /**
381 * Sets the direction of the expansion
382 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000383 * @param direction
384 * the direction of the expansion
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000385 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000386 public void setDirection (int direction) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000387 this.direction = direction;
388 }
Eliza Margaretha7ee76da2014-08-12 15:32:33 +0000389}