blob: fa7f5aab295d89073e12d5dbe3ecbfcfeaa1fd0c [file] [log] [blame]
Eliza Margaretha72f20f02014-03-12 16:10:58 +00001package de.ids_mannheim.korap.query;
2
3import java.io.IOException;
4import java.util.Map;
5
Akron700c1eb2015-09-25 16:57:30 +02006import org.apache.lucene.index.LeafReaderContext;
Eliza Margaretha72f20f02014-03-12 16:10:58 +00007import org.apache.lucene.index.Term;
8import org.apache.lucene.index.TermContext;
9import org.apache.lucene.search.spans.SpanQuery;
10import org.apache.lucene.search.spans.Spans;
11import org.apache.lucene.util.Bits;
12import org.apache.lucene.util.ToStringUtils;
13
Eliza Margarethad4693462014-03-17 13:16:18 +000014import de.ids_mannheim.korap.query.spans.RepetitionSpans;
Eliza Margaretha72f20f02014-03-12 16:10:58 +000015
Eliza Margarethaf0171c52015-01-14 17:38:16 +000016/**
Nils Diewaldbb33da22015-03-04 16:24:25 +000017 * SpanRepetitionQuery means that the given SpanQuery must appears
Eliza Margarethadc98dc12016-11-16 14:33:42 +010018 * multiple times in a sequence. The number of repetition depends on
19 * the minimum and the maximum number parameters. <br />
Eliza Margarethaf0171c52015-01-14 17:38:16 +000020 * <br />
21 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000022 * In the example below, SpanRepetitionQuery retrieves
23 * {@link RepetitionSpans} consisting of the TermSpans "tt:p/ADJ" that
Eliza Margarethadc98dc12016-11-16 14:33:42 +010024 * must appear at least once or consecutively two times. What appears
25 * after the RepetitionSpans is not considered, so it is possible that
26 * it is another "tt:p/ADJ". <br />
Eliza Margarethaf0171c52015-01-14 17:38:16 +000027 * <br />
28 *
29 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000030 * SpanRepetitionQuery sq = new SpanRepetitionQuery(new
31 * SpanTermQuery(new Term(
32 * &quot;tokens&quot;, &quot;tt:p/ADJ&quot;)), 1, 2, true);
Eliza Margarethaf0171c52015-01-14 17:38:16 +000033 * </pre>
34 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000035 * For instance, "a large black leather jacket" contains the following
36 * matches.
Eliza Margarethaf0171c52015-01-14 17:38:16 +000037 *
38 * <pre>
Nils Diewaldbb33da22015-03-04 16:24:25 +000039 * [large]
40 * [large black]
41 * [black]
42 * [black leather]
43 * [leather]
Eliza Margarethaf0171c52015-01-14 17:38:16 +000044 * </pre>
Eliza Margarethafb25cef2014-06-06 14:19:07 +000045 *
46 * @author margaretha
Eliza Margaretha6f989202016-10-14 21:48:29 +020047 */
Eliza Margarethaf0171c52015-01-14 17:38:16 +000048public class SpanRepetitionQuery extends SimpleSpanQuery {
Eliza Margaretha72f20f02014-03-12 16:10:58 +000049
Eliza Margarethaf0171c52015-01-14 17:38:16 +000050 private int min, max;
Eliza Margaretha72f20f02014-03-12 16:10:58 +000051
Eliza Margarethaf0171c52015-01-14 17:38:16 +000052 /**
Nils Diewaldbb33da22015-03-04 16:24:25 +000053 * Constructs a SpanRepetitionQuery for the given
54 * {@link SpanQuery}.
Eliza Margarethaf0171c52015-01-14 17:38:16 +000055 *
Nils Diewaldbb33da22015-03-04 16:24:25 +000056 * @param sq
57 * a SpanQuery
58 * @param min
59 * the minimum number of the required repetition
60 * @param max
61 * the maximum number of the required repetition
62 * @param collectPayloads
63 * a boolean flag representing the value
64 * <code>true</code> if payloads are to be collected,
65 * otherwise
66 * <code>false</code>.
Eliza Margarethaf0171c52015-01-14 17:38:16 +000067 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000068 public SpanRepetitionQuery (SpanQuery sq, int min, int max,
69 boolean collectPayloads) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +000070 super(sq, collectPayloads);
Eliza Margaretha6f989202016-10-14 21:48:29 +020071 if (min < 1) {
72 throw new IllegalArgumentException(
73 "Minimum repetition must not lower than 1.");
74 }
Eliza Margarethaf0171c52015-01-14 17:38:16 +000075 this.min = min;
76 this.max = max;
77 }
Eliza Margaretha72f20f02014-03-12 16:10:58 +000078
Nils Diewaldbb33da22015-03-04 16:24:25 +000079
Eliza Margarethaf0171c52015-01-14 17:38:16 +000080 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +000081 public SimpleSpanQuery clone () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +000082 SpanRepetitionQuery sq = new SpanRepetitionQuery(
83 (SpanQuery) this.firstClause.clone(), this.min, this.max,
84 this.collectPayloads);
85 sq.setBoost(getBoost());
86 return sq;
87 }
Eliza Margaretha72f20f02014-03-12 16:10:58 +000088
Nils Diewaldbb33da22015-03-04 16:24:25 +000089
Eliza Margarethaf0171c52015-01-14 17:38:16 +000090 @Override
Akron700c1eb2015-09-25 16:57:30 +020091 public Spans getSpans (LeafReaderContext context, Bits acceptDocs,
Eliza Margarethaf0171c52015-01-14 17:38:16 +000092 Map<Term, TermContext> termContexts) throws IOException {
93 return new RepetitionSpans(this, context, acceptDocs, termContexts);
94 }
Eliza Margaretha72f20f02014-03-12 16:10:58 +000095
Nils Diewaldbb33da22015-03-04 16:24:25 +000096
Eliza Margarethaf0171c52015-01-14 17:38:16 +000097 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +000098 public String toString (String field) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +000099 StringBuilder sb = new StringBuilder();
100 sb.append("spanRepetition(");
101 sb.append(firstClause.toString(field));
102 sb.append("{");
103 sb.append(min);
104 sb.append(",");
105 sb.append(max);
106 sb.append("})");
107 sb.append(ToStringUtils.boost(getBoost()));
108 return sb.toString();
109 }
Eliza Margaretha72f20f02014-03-12 16:10:58 +0000110
Nils Diewaldbb33da22015-03-04 16:24:25 +0000111
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000112 /**
113 * Returns the minimum number of required repetitions.
114 *
115 * @return the minimum number of required repetitions
116 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000117 public int getMin () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000118 return min;
119 }
Eliza Margaretha72f20f02014-03-12 16:10:58 +0000120
Nils Diewaldbb33da22015-03-04 16:24:25 +0000121
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000122 /**
123 * Sets the minimum number of required repetitions.
124 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000125 * @param min
126 * the minimum number of required repetitions
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000127 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000128 public void setMin (int min) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000129 this.min = min;
130 }
131
Nils Diewaldbb33da22015-03-04 16:24:25 +0000132
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000133 /**
134 * Returns the maximum number of required repetitions.
135 *
136 * @return the maximum number of required repetitions
137 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000138 public int getMax () {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000139 return max;
140 }
141
Nils Diewaldbb33da22015-03-04 16:24:25 +0000142
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000143 /**
144 * Sets the maximum number of required repetitions.
145 *
Nils Diewaldbb33da22015-03-04 16:24:25 +0000146 * @param max
147 * the maximum number of required repetitions
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000148 */
Nils Diewaldbb33da22015-03-04 16:24:25 +0000149 public void setMax (int max) {
Eliza Margarethaf0171c52015-01-14 17:38:16 +0000150 this.max = max;
151 }
152
Eliza Margaretha72f20f02014-03-12 16:10:58 +0000153}