blob: 88c4de1043edf199731cec61ac3b766c412fe836 [file] [log] [blame]
Eliza Margarethaa2603fa2014-01-22 10:59:25 +00001package de.ids_mannheim.korap.query;
2
3import java.io.IOException;
4import java.util.Map;
5
Akron700c1eb2015-09-25 16:57:30 +02006import org.apache.lucene.index.LeafReaderContext;
Eliza Margarethaa2603fa2014-01-22 10:59:25 +00007import org.apache.lucene.index.Term;
8import org.apache.lucene.index.TermContext;
9import org.apache.lucene.search.spans.SpanQuery;
10import org.apache.lucene.search.spans.Spans;
11import org.apache.lucene.util.Bits;
Eliza Margaretha609fcc62014-02-13 14:10:20 +000012import org.apache.lucene.util.ToStringUtils;
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000013
Eliza Margaretha371eab32014-10-29 14:53:37 +000014import de.ids_mannheim.korap.query.spans.DistanceExclusionSpans;
15import de.ids_mannheim.korap.query.spans.ElementDistanceExclusionSpans;
Eliza Margaretha795937c2014-02-06 13:08:28 +000016import de.ids_mannheim.korap.query.spans.ElementDistanceSpans;
17import de.ids_mannheim.korap.query.spans.TokenDistanceSpans;
Eliza Margaretha795937c2014-02-06 13:08:28 +000018import de.ids_mannheim.korap.query.spans.UnorderedElementDistanceSpans;
19import de.ids_mannheim.korap.query.spans.UnorderedTokenDistanceSpans;
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000020
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +000021/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000022 * SpanDistanceQuery calculates the distance between two spans and
23 * compares it
24 * to the distance constraints. The distance constraints are specified
25 * as a {@link DistanceConstraint} instance having various properties:
26 * the distance
27 * unit, the order of the spans (ordered or unordered), co-occurrence
28 * (i.e. the
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +000029 * spans should co-occur or not), minimum and maximum distance. <br/>
30 * <br/>
Nils Diewaldbb33da22015-03-04 16:24:25 +000031 * The distance unit can be a word (token), a sentence or a paragraph.
32 * The
33 * resulting spans typically stretch from the starting position of a
34 * former span
Eliza Margaretha8551e5b2014-12-15 16:46:18 +000035 * to the end position of the latter span. <br/>
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +000036 * <br/>
37 * Query examples:
Eliza Margarethaa2603fa2014-01-22 10:59:25 +000038 *
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +000039 * <ol>
Nils Diewaldbb33da22015-03-04 16:24:25 +000040 * <li>Search two terms x and y which are separated by minimum two and
41 * maximum
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +000042 * three other words. The order of x and y does not matter.
43 *
44 * <pre>
45 * DistanceConstraint dc = new DistanceConstraint(2, 3, false, false);
46 * </pre>
47 *
48 * </li>
Nils Diewaldbb33da22015-03-04 16:24:25 +000049 * <li>Search two terms x and y which are separated by minimum two and
50 * maximum
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +000051 * three other words. X must precede y.
52 *
53 * <pre>
54 * DistanceConstraint dc = new DistanceConstraint(2, 3, true, false);
55 * </pre>
56 *
57 * </li>
58 * <li>
Nils Diewaldbb33da22015-03-04 16:24:25 +000059 * Search term x which do not occur with term y in minimum two and
60 * maximum three
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +000061 * other words. X must precede y.
62 *
63 * <pre>
64 * DistanceConstraint dc = new DistanceConstraint(2, 3, true, true);
65 * </pre>
66 *
67 * </li>
Nils Diewaldbb33da22015-03-04 16:24:25 +000068 * <li>Search two terms x and y separated by minimum one and maximum
69 * two
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +000070 * sentences. X must precede y.
71 *
72 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000073 * SpanElementQuery e = new SpanElementQuery(&quot;tokens&quot;,
74 * &quot;s&quot;);
75 * DistanceConstraint dc = new DistanceConstraint(e, 2, 3, true,
76 * false);
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +000077 * </pre>
78 *
79 * </li>
80 * </ol>
81 *
82 * SpanDistanceQuery examples:
83 *
84 * <ol>
85 * <li>
86 *
87 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000088 * SpanDistanceQuery sq = new SpanDistanceQuery(new SpanTermQuery(new
89 * Term(
90 * &quot;tokens&quot;, x)), new SpanTermQuery(new
91 * Term(&quot;tokens&quot;, y)), dc, true);
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +000092 * </pre>
93 *
94 * </li>
95 * <li>
96 *
97 * <pre>
98 * SpanDistanceQuery sq = new SpanDistanceQuery(
Nils Diewaldbb33da22015-03-04 16:24:25 +000099 * new SpanElementQuery(&quot;tokens&quot;, &quot;s&quot;), new
100 * SpanElementQuery(&quot;tokens&quot;, y),
101 * dc, true);
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000102 * </pre>
103 *
104 * </li>
105 * </ol>
106 *
107 *
108 * @author margaretha
Eliza Margaretha6f989202016-10-14 21:48:29 +0200109 */
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000110public class SpanDistanceQuery extends SimpleSpanQuery {
Eliza Margarethadb292872014-02-03 09:36:43 +0000111
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000112 private boolean exclusion;
113 private boolean isOrdered;
114 private int minDistance, maxDistance;
115 private SpanElementQuery elementQuery; // element distance unit (sentence or
Eliza Margaretha6f989202016-10-14 21:48:29 +0200116 // paragraph)
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000117 private String distanceUnit;
118 private String spanName;
119 private DistanceConstraint constraint;
120
Nils Diewaldbb33da22015-03-04 16:24:25 +0000121
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000122 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000123 * Constructs a SpanDistanceQuery comparing the distance between
124 * the spans
125 * of the two specified spanqueries and based-on the given
126 * distance
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000127 * constraints.
128 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000129 * @param firstClause
130 * a span query
131 * @param secondClause
132 * a span query
133 * @param constraint
134 * a DistanceConstraint containing all the constraints
135 * required for the distance query
136 * @param collectPayloads
137 * a boolean flag representing the value
138 * <code>true</code> if payloads are to be collected,
139 * otherwise
140 * <code>false</code>.
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000141 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000142 public SpanDistanceQuery (SpanQuery firstClause, SpanQuery secondClause,
143 DistanceConstraint constraint,
144 boolean collectPayloads) {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000145 super(firstClause, secondClause, collectPayloads);
146
147 if (constraint == null) {
148 throw new IllegalArgumentException(
149 "Distance constraint cannot be null.");
150 }
151
152 this.constraint = constraint;
153 this.minDistance = constraint.getMinDistance();
154 this.maxDistance = constraint.getMaxDistance();
155 this.isOrdered = constraint.isOrdered();
156 this.exclusion = constraint.isExclusion();
157 this.distanceUnit = constraint.getUnit();
158
159 if (constraint.getElementQuery() != null) {
160 spanName = "spanElementDistance";
161 this.elementQuery = constraint.getElementQuery();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000162 }
163 else {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000164 spanName = "spanDistance";
165 }
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000166 }
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000167
Nils Diewaldbb33da22015-03-04 16:24:25 +0000168
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000169 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000170 public String toString (String field) {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000171 StringBuilder sb = new StringBuilder();
172 sb.append(this.spanName);
173 sb.append("(");
174 sb.append(firstClause.toString(field));
175 sb.append(", ");
176 sb.append(secondClause.toString(field));
177 sb.append(", ");
178 sb.append("[(");
179 sb.append(distanceUnit);
180 sb.append("[");
181 sb.append(minDistance);
182 sb.append(":");
183 sb.append(maxDistance);
184 sb.append("], ");
185 sb.append(isOrdered ? "ordered, " : "notOrdered, ");
186 sb.append(exclusion ? "excluded)])" : "notExcluded)])");
187 sb.append(ToStringUtils.boost(getBoost()));
188 return sb.toString();
189 }
Eliza Margaretha83b95372014-01-23 09:18:07 +0000190
Nils Diewaldbb33da22015-03-04 16:24:25 +0000191
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000192 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +0000193 public SpanDistanceQuery clone () {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000194 SpanDistanceQuery spanDistanceQuery = new SpanDistanceQuery(
195 (SpanQuery) firstClause.clone(),
196 (SpanQuery) secondClause.clone(), this.constraint,
197 this.collectPayloads);
Eliza Margaretha83b95372014-01-23 09:18:07 +0000198
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000199 if (this.elementQuery != null) {
200 spanDistanceQuery.setElementQuery(this.elementQuery);
201 }
202 spanDistanceQuery.setBoost(getBoost());
203 return spanDistanceQuery;
204 }
Eliza Margaretha83b95372014-01-23 09:18:07 +0000205
Nils Diewaldbb33da22015-03-04 16:24:25 +0000206
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000207 @Override
Akron700c1eb2015-09-25 16:57:30 +0200208 public Spans getSpans (LeafReaderContext context, Bits acceptDocs,
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000209 Map<Term, TermContext> termContexts) throws IOException {
Eliza Margaretha83b95372014-01-23 09:18:07 +0000210
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000211 if (this.elementQuery != null) {
212 if (isExclusion()) {
213 return new ElementDistanceExclusionSpans(this, context,
Eliza Margaretha95449782014-12-16 16:23:37 +0000214 acceptDocs, termContexts);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000215 }
216 else if (isOrdered) {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000217 return new ElementDistanceSpans(this, context, acceptDocs,
218 termContexts);
219 }
220 return new UnorderedElementDistanceSpans(this, context, acceptDocs,
221 termContexts);
Eliza Margaretha8e274e32014-01-28 15:09:30 +0000222
Nils Diewaldbb33da22015-03-04 16:24:25 +0000223 }
224 else if (isExclusion()) {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000225 return new DistanceExclusionSpans(this, context, acceptDocs,
Eliza Margaretha95449782014-12-16 16:23:37 +0000226 termContexts);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000227 }
228 else if (isOrdered) {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000229 return new TokenDistanceSpans(this, context, acceptDocs,
230 termContexts);
231 }
232 return new UnorderedTokenDistanceSpans(this, context, acceptDocs,
233 termContexts);
234 }
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000235
Nils Diewaldbb33da22015-03-04 16:24:25 +0000236
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000237 /**
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000238 * Returns the minimum distance constraint.
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000239 *
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000240 * @return the minimum distance constraint
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000241 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000242 public int getMinDistance () {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000243 return minDistance;
244 }
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000245
Nils Diewaldbb33da22015-03-04 16:24:25 +0000246
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000247 /**
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000248 * Sets the minimum distance constraint.
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000249 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000250 * @param minDistance
251 * the minimum distance constraint
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000252 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000253 public void setMinDistance (int minDistance) {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000254 this.minDistance = minDistance;
255 }
256
Nils Diewaldbb33da22015-03-04 16:24:25 +0000257
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000258 /**
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000259 * Returns the maximum distance.
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000260 *
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000261 * @return the maximum distance constraint
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000262 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000263 public int getMaxDistance () {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000264 return maxDistance;
265 }
266
Nils Diewaldbb33da22015-03-04 16:24:25 +0000267
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000268 /**
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000269 * Sets a maximum distance.
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000270 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000271 * @param maxDistance
272 * the maximum distance
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000273 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000274 public void setMaxDistance (int maxDistance) {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000275 this.maxDistance = maxDistance;
276 }
277
Nils Diewaldbb33da22015-03-04 16:24:25 +0000278
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000279 /**
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000280 * Returns the element query used as the distance unit.
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000281 *
282 * @return the element distance unit
283 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000284 public SpanElementQuery getElementQuery () {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000285 return elementQuery;
286 }
287
Nils Diewaldbb33da22015-03-04 16:24:25 +0000288
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000289 /**
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000290 * Sets the specified element query used as the distance unit.
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000291 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000292 * @param elementQuery
293 * the SpanElementQuery used as the distance unit
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000294 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000295 public void setElementQuery (SpanElementQuery elementQuery) {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000296 this.elementQuery = elementQuery;
297 }
298
Nils Diewaldbb33da22015-03-04 16:24:25 +0000299
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000300 /**
301 * Tells weather the second sub-span should co-occur or not.
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000302 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000303 * @return a boolean with <code>true</code> if the second sub-span
304 * should
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000305 * <em>not</em> co-occur, <code>false</code> otherwise.
306 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000307 public boolean isExclusion () {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000308 return exclusion;
309 }
310
Nils Diewaldbb33da22015-03-04 16:24:25 +0000311
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000312 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +0000313 * Sets <code>true</code> if the second sub-span should
314 * <em>not</em>
Eliza Margaretha8551e5b2014-12-15 16:46:18 +0000315 * co-occur, <code>false</code> otherwise.
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000316 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000317 * @param exclusion
318 * a boolean with value <code>true</code> if the second
319 * sub-span should <em>not</em> co-occur,
320 * <code>false</code>
321 * otherwise.
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000322 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000323 public void setExclusion (boolean exclusion) {
Eliza Margaretha7ebd6d92014-12-02 11:48:36 +0000324 this.exclusion = exclusion;
325 }
Eliza Margaretha609fcc62014-02-13 14:10:20 +0000326
Nils Diewaldbb33da22015-03-04 16:24:25 +0000327
Eliza Margaretha95449782014-12-16 16:23:37 +0000328 /**
329 * Tells whether the spans must occur in order or not.
330 *
331 * @return <code>true</code> if the spans must occur in order,
332 * <code>false</code> otherwise.
333 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000334 public boolean isOrdered () {
Eliza Margaretha95449782014-12-16 16:23:37 +0000335 return isOrdered;
336 }
337
Nils Diewaldbb33da22015-03-04 16:24:25 +0000338
Eliza Margaretha95449782014-12-16 16:23:37 +0000339 /**
340 * Sets whether the spans must occur in order or not.
341 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000342 * @param isOrdered
343 * <code>true</code> if the spans must occur in order,
344 * <code>false</code> otherwise.
Eliza Margaretha95449782014-12-16 16:23:37 +0000345 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000346 public void setOrder (boolean isOrdered) {
Eliza Margaretha95449782014-12-16 16:23:37 +0000347 this.isOrdered = isOrdered;
348 }
349
Eliza Margarethaa2603fa2014-01-22 10:59:25 +0000350}