| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query; |
| 2 | |
| 3 | import java.io.IOException; |
| 4 | import java.util.Map; |
| 5 | |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 6 | import org.apache.lucene.index.LeafReaderContext; |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.index.TermContext; |
| 9 | import org.apache.lucene.search.spans.SpanQuery; |
| 10 | import org.apache.lucene.search.spans.Spans; |
| 11 | import org.apache.lucene.util.Bits; |
| 12 | |
| 13 | import de.ids_mannheim.korap.query.spans.SubSpans; |
| 14 | |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 15 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 16 | * This query extracts a subspan from another span. The subspan starts |
| Eliza Margaretha | dc98dc1 | 2016-11-16 14:33:42 +0100 | [diff] [blame] | 17 | * from a startOffset until startOffset + length. A positive |
| 18 | * startOffset is counted from the start of the span, while a negative |
| 19 | * startOffset is calculated from the end of the span. <br /> |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 20 | * <br /> |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 21 | * SpanSubspanQuery takes a SpanQuery as its input and creates |
| 22 | * subspans from the |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 23 | * resulting spans of the SpanQuery. For instance: |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 24 | * |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 25 | * <pre> |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 26 | * SpanElementQuery seq = new SpanElementQuery(new |
| 27 | * SpanElementQuery("tokens", "s"); |
| Eliza Margaretha | afe9812 | 2015-01-23 17:37:57 +0000 | [diff] [blame] | 28 | * SpanSubspanQuery ssq = new SpanSubspanQuery(seq, 0, 2, true); |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 29 | * </pre> |
| 30 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 31 | * In this example, the SpanSubspanQuery creates subspans, that are |
| 32 | * the first |
| Akron | f796b86 | 2016-04-29 18:51:25 +0200 | [diff] [blame] | 33 | * two tokens of all sentences. |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 34 | * |
| 35 | * @author margaretha |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 36 | */ |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 37 | public class SpanSubspanQuery extends SimpleSpanQuery { |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 38 | |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 39 | private int startOffset, length; |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 40 | private int windowSize = 10; |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 41 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 42 | |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 43 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 44 | * Creates a SpanSubspanQuery (subspan) from the given |
| 45 | * {@link SpanQuery} with the specified startOffset and length. |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 46 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 47 | * @param firstClause |
| 48 | * a SpanQuery |
| 49 | * @param startOffset |
| 50 | * the start offset of the subspan relative to the |
| 51 | * original span |
| 52 | * @param length |
| 53 | * the length of the subspan |
| 54 | * @param collectPayloads |
| 55 | * a boolean flag representing the value |
| 56 | * <code>true</code> if payloads are to be collected, |
| 57 | * otherwise |
| 58 | * <code>false</code>. |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 59 | */ |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 60 | public SpanSubspanQuery (SpanQuery firstClause, int startOffset, int length, |
| 61 | boolean collectPayloads) { |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 62 | super(firstClause, collectPayloads); |
| 63 | this.startOffset = startOffset; |
| 64 | this.length = length; |
| 65 | } |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 66 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 67 | |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 68 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 69 | public SimpleSpanQuery clone () { |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 70 | SpanSubspanQuery sq = new SpanSubspanQuery(this.getFirstClause(), |
| 71 | this.startOffset, this.length, this.collectPayloads); |
| 72 | sq.setBoost(this.getBoost()); |
| 73 | return sq; |
| 74 | } |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 75 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 76 | |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 77 | @Override |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 78 | public Spans getSpans (LeafReaderContext context, Bits acceptDocs, |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 79 | Map<Term, TermContext> termContexts) throws IOException { |
| 80 | return new SubSpans(this, context, acceptDocs, termContexts); |
| 81 | } |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 82 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 83 | |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 84 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 85 | public String toString (String field) { |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 86 | StringBuilder sb = new StringBuilder(); |
| 87 | sb.append("subspan("); |
| 88 | sb.append(this.firstClause.toString()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 89 | sb.append(", "); |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 90 | sb.append(this.startOffset); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 91 | sb.append(", "); |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 92 | sb.append(this.length); |
| 93 | sb.append(")"); |
| 94 | return sb.toString(); |
| 95 | } |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 96 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 97 | |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 98 | /** |
| 99 | * Returns the start offset. |
| 100 | * |
| 101 | * @return the start offset. |
| 102 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 103 | public int getStartOffset () { |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 104 | return startOffset; |
| 105 | } |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 106 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 107 | |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 108 | /** |
| 109 | * Sets the start offset. |
| 110 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 111 | * @param startOffset |
| 112 | * the start offset |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 113 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 114 | public void setStartOffset (int startOffset) { |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 115 | this.startOffset = startOffset; |
| 116 | } |
| 117 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 118 | |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 119 | /** |
| 120 | * Returns the length of the subspan. |
| 121 | * |
| 122 | * @return the length of the subspan |
| 123 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 124 | public int getLength () { |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 125 | return length; |
| 126 | } |
| 127 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 128 | |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 129 | /** |
| 130 | * Sets the length of the subspan. |
| 131 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 132 | * @param length |
| 133 | * the length of the subspan. |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 134 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 135 | public void setLength (int length) { |
| Eliza Margaretha | 7612bde | 2015-01-14 10:28:42 +0000 | [diff] [blame] | 136 | this.length = length; |
| 137 | } |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 138 | |
| 139 | |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 140 | public int getWindowSize () { |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 141 | return windowSize; |
| 142 | } |
| 143 | |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 144 | |
| 145 | public void setWindowSize (int windowSize) { |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 146 | this.windowSize = windowSize; |
| 147 | } |
| Eliza Margaretha | 9d1ebeb | 2014-08-12 11:42:58 +0000 | [diff] [blame] | 148 | } |