| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query; |
| 2 | |
| 3 | import java.io.IOException; |
| 4 | import java.util.Map; |
| 5 | |
| 6 | import org.apache.lucene.index.AtomicReaderContext; |
| 7 | import org.apache.lucene.index.Term; |
| 8 | import org.apache.lucene.index.TermContext; |
| 9 | import org.apache.lucene.search.spans.SpanQuery; |
| 10 | import org.apache.lucene.search.spans.Spans; |
| 11 | import org.apache.lucene.util.Bits; |
| 12 | |
| Nils Diewald | 5380aa6 | 2014-09-01 13:21:07 +0000 | [diff] [blame] | 13 | // Temporary: |
| Eliza Margaretha | 99c72c2 | 2014-09-17 08:38:25 +0000 | [diff] [blame] | 14 | import de.ids_mannheim.korap.query.spans.ExpandedExclusionSpans; |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 15 | import de.ids_mannheim.korap.query.spans.ExpandedSpans; |
| 16 | |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 17 | /** Query to make a span longer by stretching out the start or the end |
| 18 | * position of the span. The constraints of the expansion, such as how |
| 19 | * large the expansion should be (min and max position) and the |
| 20 | * direction of the expansion with respect to the "main" span, are |
| 21 | * specified in ExpansionConstraint. |
| 22 | * |
| 23 | * The expansion can be specified to not contain any direct/immediate |
| 24 | * /adjacent occurrence(s) of another span. Examples: |
| 25 | * [orth=der][orth!=Baum] "der" cannot be followed by "Baum" |
| 26 | * [pos!=ADJ]{1,2}[orth=Baum] one or two adjectives cannot precedes |
| 27 | * "Baum" |
| 28 | * |
| 29 | * The offsets of the expansion parts can be collected by using a class |
| 30 | * number. |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 31 | * |
| 32 | * @author margaretha |
| 33 | * */ |
| 34 | public class SpanExpansionQuery extends SimpleSpanQuery{ |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 35 | |
| 36 | private int min, max; // min, max expansion position |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 37 | |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 38 | // if > 0, collect expansion offsets using this label |
| 39 | private byte classNumber; |
| 40 | |
| 41 | // expansion direction with regard to the main span: |
| 42 | // < 0 to the left of main span |
| 43 | // >= 0 to the right of main span |
| 44 | private int direction; |
| 45 | |
| 46 | // if true, no occurrence of another span |
| 47 | final boolean isExclusion; |
| 48 | |
| 49 | /** Simple expansion for any/empty token. Use |
| 50 | * {@link #SpanExpansionQuery(SpanQuery, SpanQuery, ExpansionConstraint, |
| 51 | * boolean)} for expansion with exclusions of a specific spanquery. |
| 52 | * */ |
| 53 | public SpanExpansionQuery(SpanQuery firstClause, int min, int max, int direction, |
| 54 | boolean collectPayloads) { |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 55 | super(firstClause, collectPayloads); |
| Eliza Margaretha | 3eaafc6 | 2014-09-17 12:34:26 +0000 | [diff] [blame] | 56 | if (max < min){ |
| 57 | throw new IllegalArgumentException("The max position has to be " + |
| 58 | "bigger than or the same as min position."); |
| 59 | } |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 60 | this.min = min; |
| 61 | this.max = max; |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 62 | this.direction = direction; |
| 63 | this.isExclusion = false; |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 64 | } |
| Eliza Margaretha | 656cb31 | 2014-08-14 12:42:26 +0000 | [diff] [blame] | 65 | |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 66 | public SpanExpansionQuery(SpanQuery firstClause, int min, int max, int direction, |
| 67 | byte classNumber, boolean collectPayloads) { |
| 68 | this(firstClause, min, max, direction, collectPayloads); |
| Eliza Margaretha | 656cb31 | 2014-08-14 12:42:26 +0000 | [diff] [blame] | 69 | this.classNumber = classNumber; |
| 70 | } |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 71 | |
| 72 | /** Expansion with exclusions of the spanquery specified as the second |
| 73 | * parameter. |
| 74 | * */ |
| 75 | public SpanExpansionQuery(SpanQuery firstClause, SpanQuery notClause, int min, |
| 76 | int max, int direction, boolean collectPayloads) { |
| 77 | super(firstClause, notClause, collectPayloads); |
| Eliza Margaretha | 3eaafc6 | 2014-09-17 12:34:26 +0000 | [diff] [blame] | 78 | if (max < min){ |
| 79 | throw new IllegalArgumentException("The max position has to be " + |
| 80 | "bigger than or the same as min position."); |
| 81 | } |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 82 | this.min = min; |
| 83 | this.max = max; |
| 84 | this.direction = direction; |
| 85 | this.isExclusion = true; |
| 86 | } |
| Eliza Margaretha | 99c72c2 | 2014-09-17 08:38:25 +0000 | [diff] [blame] | 87 | |
| 88 | public SpanExpansionQuery(SpanQuery firstClause, SpanQuery notClause, int min, |
| 89 | int max, int direction, byte classNumber, boolean collectPayloads) { |
| 90 | this(firstClause, notClause, min, max, direction, collectPayloads); |
| 91 | this.classNumber = classNumber; |
| 92 | } |
| 93 | |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 94 | |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 95 | @Override |
| 96 | public SimpleSpanQuery clone() { |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 97 | SpanExpansionQuery sq = null; |
| 98 | if (isExclusion){ |
| Eliza Margaretha | 99c72c2 | 2014-09-17 08:38:25 +0000 | [diff] [blame] | 99 | sq = new SpanExpansionQuery(firstClause, secondClause, min, max, direction, |
| 100 | classNumber, collectPayloads); |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 101 | } |
| 102 | else{ |
| 103 | sq = new SpanExpansionQuery(firstClause, min, max, direction, classNumber, |
| 104 | collectPayloads); |
| 105 | } |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 106 | //sq.setBoost(sq.getBoost()); |
| 107 | return sq; |
| 108 | } |
| 109 | |
| 110 | @Override |
| 111 | public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 112 | Map<Term, TermContext> termContexts) throws IOException { |
| Eliza Margaretha | 99c72c2 | 2014-09-17 08:38:25 +0000 | [diff] [blame] | 113 | |
| 114 | // Temporary: |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 115 | if (isExclusion) |
| 116 | return new ExpandedExclusionSpans(this, context, acceptDocs, termContexts); |
| 117 | else |
| Eliza Margaretha | 99c72c2 | 2014-09-17 08:38:25 +0000 | [diff] [blame] | 118 | |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 119 | return new ExpandedSpans(this, context, acceptDocs, termContexts); |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 120 | } |
| 121 | |
| 122 | @Override |
| 123 | public String toString(String field) { |
| 124 | StringBuilder sb = new StringBuilder(); |
| 125 | sb.append("spanExpansion("); |
| 126 | sb.append(firstClause.toString()); |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 127 | if (isExclusion && secondClause != null){ |
| 128 | sb.append(", !"); |
| 129 | sb.append(secondClause.toString()); |
| 130 | } |
| 131 | else{ |
| 132 | sb.append(", []"); |
| 133 | } |
| 134 | sb.append("{"); |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 135 | sb.append(min); |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 136 | sb.append(", "); |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 137 | sb.append(max); |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 138 | sb.append("}, "); |
| 139 | if (direction < 0) |
| 140 | sb.append("left"); |
| 141 | else sb.append("right"); |
| 142 | if (classNumber > 0){ |
| 143 | sb.append(", class:"); |
| 144 | sb.append(classNumber); |
| 145 | } |
| 146 | sb.append(")"); |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 147 | return sb.toString(); |
| 148 | } |
| 149 | |
| 150 | public int getMin() { |
| 151 | return min; |
| 152 | } |
| 153 | |
| 154 | public void setMin(int min) { |
| 155 | this.min = min; |
| 156 | } |
| 157 | |
| 158 | public int getMax() { |
| 159 | return max; |
| 160 | } |
| 161 | |
| 162 | public void setMax(int max) { |
| 163 | this.max = max; |
| 164 | } |
| 165 | |
| Eliza Margaretha | 656cb31 | 2014-08-14 12:42:26 +0000 | [diff] [blame] | 166 | public byte getClassNumber() { |
| 167 | return classNumber; |
| 168 | } |
| 169 | |
| 170 | public void setClassNumber(byte classNumber) { |
| 171 | this.classNumber = classNumber; |
| 172 | } |
| Eliza Margaretha | 7788a98 | 2014-08-29 16:10:52 +0000 | [diff] [blame] | 173 | |
| 174 | public int getDirection() { |
| 175 | return direction; |
| 176 | } |
| 177 | |
| 178 | public void setDirection(int direction) { |
| 179 | this.direction = direction; |
| 180 | } |
| Eliza Margaretha | 7ee76da | 2014-08-12 15:32:33 +0000 | [diff] [blame] | 181 | } |