| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query; |
| 2 | |
| 3 | import java.io.IOException; |
| 4 | import java.util.Map; |
| 5 | |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 6 | import org.apache.lucene.index.LeafReaderContext; |
| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.index.TermContext; |
| 9 | import org.apache.lucene.search.spans.SpanQuery; |
| 10 | import org.apache.lucene.search.spans.Spans; |
| 11 | import org.apache.lucene.util.Bits; |
| 12 | import org.apache.lucene.util.ToStringUtils; |
| 13 | |
| Eliza Margaretha | d469346 | 2014-03-17 13:16:18 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.query.spans.RepetitionSpans; |
| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 15 | |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 16 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 17 | * SpanRepetitionQuery means that the given SpanQuery must appears |
| Eliza Margaretha | dc98dc1 | 2016-11-16 14:33:42 +0100 | [diff] [blame] | 18 | * multiple times in a sequence. The number of repetition depends on |
| 19 | * the minimum and the maximum number parameters. <br /> |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 20 | * <br /> |
| 21 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 22 | * In the example below, SpanRepetitionQuery retrieves |
| 23 | * {@link RepetitionSpans} consisting of the TermSpans "tt:p/ADJ" that |
| Eliza Margaretha | dc98dc1 | 2016-11-16 14:33:42 +0100 | [diff] [blame] | 24 | * must appear at least once or consecutively two times. What appears |
| 25 | * after the RepetitionSpans is not considered, so it is possible that |
| 26 | * it is another "tt:p/ADJ". <br /> |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 27 | * <br /> |
| 28 | * |
| 29 | * <pre> |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 30 | * SpanRepetitionQuery sq = new SpanRepetitionQuery(new |
| 31 | * SpanTermQuery(new Term( |
| 32 | * "tokens", "tt:p/ADJ")), 1, 2, true); |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 33 | * </pre> |
| 34 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 35 | * For instance, "a large black leather jacket" contains the following |
| 36 | * matches. |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 37 | * |
| 38 | * <pre> |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 39 | * [large] |
| 40 | * [large black] |
| 41 | * [black] |
| 42 | * [black leather] |
| 43 | * [leather] |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 44 | * </pre> |
| Eliza Margaretha | fb25cef | 2014-06-06 14:19:07 +0000 | [diff] [blame] | 45 | * |
| 46 | * @author margaretha |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 47 | */ |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 48 | public class SpanRepetitionQuery extends SimpleSpanQuery { |
| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 49 | |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 50 | private int min, max; |
| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 51 | |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 52 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 53 | * Constructs a SpanRepetitionQuery for the given |
| 54 | * {@link SpanQuery}. |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 55 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 56 | * @param sq |
| 57 | * a SpanQuery |
| 58 | * @param min |
| 59 | * the minimum number of the required repetition |
| 60 | * @param max |
| 61 | * the maximum number of the required repetition |
| 62 | * @param collectPayloads |
| 63 | * a boolean flag representing the value |
| 64 | * <code>true</code> if payloads are to be collected, |
| 65 | * otherwise |
| 66 | * <code>false</code>. |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 67 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 68 | public SpanRepetitionQuery (SpanQuery sq, int min, int max, |
| 69 | boolean collectPayloads) { |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 70 | super(sq, collectPayloads); |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 71 | if (min < 1) { |
| 72 | throw new IllegalArgumentException( |
| 73 | "Minimum repetition must not lower than 1."); |
| 74 | } |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 75 | this.min = min; |
| 76 | this.max = max; |
| 77 | } |
| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 78 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 79 | |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 80 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 81 | public SimpleSpanQuery clone () { |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 82 | SpanRepetitionQuery sq = new SpanRepetitionQuery( |
| 83 | (SpanQuery) this.firstClause.clone(), this.min, this.max, |
| 84 | this.collectPayloads); |
| 85 | sq.setBoost(getBoost()); |
| 86 | return sq; |
| 87 | } |
| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 88 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 89 | |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 90 | @Override |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 91 | public Spans getSpans (LeafReaderContext context, Bits acceptDocs, |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 92 | Map<Term, TermContext> termContexts) throws IOException { |
| 93 | return new RepetitionSpans(this, context, acceptDocs, termContexts); |
| 94 | } |
| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 95 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 96 | |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 97 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 98 | public String toString (String field) { |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 99 | StringBuilder sb = new StringBuilder(); |
| 100 | sb.append("spanRepetition("); |
| 101 | sb.append(firstClause.toString(field)); |
| 102 | sb.append("{"); |
| 103 | sb.append(min); |
| 104 | sb.append(","); |
| 105 | sb.append(max); |
| 106 | sb.append("})"); |
| 107 | sb.append(ToStringUtils.boost(getBoost())); |
| 108 | return sb.toString(); |
| 109 | } |
| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 110 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 111 | |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 112 | /** |
| 113 | * Returns the minimum number of required repetitions. |
| 114 | * |
| 115 | * @return the minimum number of required repetitions |
| 116 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 117 | public int getMin () { |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 118 | return min; |
| 119 | } |
| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 120 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 121 | |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 122 | /** |
| 123 | * Sets the minimum number of required repetitions. |
| 124 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 125 | * @param min |
| 126 | * the minimum number of required repetitions |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 127 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 128 | public void setMin (int min) { |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 129 | this.min = min; |
| 130 | } |
| 131 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 132 | |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 133 | /** |
| 134 | * Returns the maximum number of required repetitions. |
| 135 | * |
| 136 | * @return the maximum number of required repetitions |
| 137 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 138 | public int getMax () { |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 139 | return max; |
| 140 | } |
| 141 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 142 | |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 143 | /** |
| 144 | * Sets the maximum number of required repetitions. |
| 145 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 146 | * @param max |
| 147 | * the maximum number of required repetitions |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 148 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 149 | public void setMax (int max) { |
| Eliza Margaretha | f0171c5 | 2015-01-14 17:38:16 +0000 | [diff] [blame] | 150 | this.max = max; |
| 151 | } |
| 152 | |
| Eliza Margaretha | 72f20f0 | 2014-03-12 16:10:58 +0000 | [diff] [blame] | 153 | } |