| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query; |
| 2 | |
| 3 | import java.io.IOException; |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 4 | import java.util.ArrayList; |
| 5 | import java.util.List; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 6 | import java.util.Map; |
| 7 | |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 8 | import org.apache.lucene.index.LeafReaderContext; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 9 | import org.apache.lucene.index.IndexReader; |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 10 | import org.apache.lucene.index.Term; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 11 | import org.apache.lucene.index.TermContext; |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 12 | import org.apache.lucene.search.Query; |
| 13 | import org.apache.lucene.search.spans.SpanQuery; |
| 14 | import org.apache.lucene.search.spans.Spans; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 15 | import org.apache.lucene.util.Bits; |
| 16 | import org.apache.lucene.util.ToStringUtils; |
| 17 | |
| Nils Diewald | f075df0 | 2015-03-03 20:34:00 +0000 | [diff] [blame] | 18 | import de.ids_mannheim.korap.query.spans.FocusSpans; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 19 | |
| 20 | /** |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 21 | * Modify the span of a match to the boundaries of a certain class. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 22 | * |
| 23 | * In case multiple classes are found with the very same number, the |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 24 | * span is maximized to start on the first occurrence from the left |
| 25 | * and end on the last occurrence on the right. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 26 | * |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 27 | * In case the class to modify on is not found in the subquery, the |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 28 | * match is ignored. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 29 | * |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 30 | * @author diewald, margaretha |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 31 | * |
| Nils Diewald | f075df0 | 2015-03-03 20:34:00 +0000 | [diff] [blame] | 32 | * @see FocusSpans |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 33 | */ |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 34 | public class SpanFocusQuery extends SimpleSpanQuery { |
| 35 | |
| 36 | private List<Byte> classNumbers = new ArrayList<Byte>(); |
| 37 | private boolean isSorted = true; |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 38 | private boolean matchTemporaryClass = false; |
| 39 | private boolean removeTemporaryClasses = false; |
| Eliza Margaretha | dc98dc1 | 2016-11-16 14:33:42 +0100 | [diff] [blame] | 40 | private int windowSize = 10; // default |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 41 | |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 42 | |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 43 | /** |
| Nils Diewald | cec40f9 | 2015-02-19 22:20:02 +0000 | [diff] [blame] | 44 | * Construct a new SpanFocusQuery. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 45 | * |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 46 | * @param firstClause |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 47 | * The nested {@link SpanQuery}, that contains one or |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 48 | * more |
| 49 | * classed spans. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 50 | * @param number |
| 51 | * The class number to focus on. |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 52 | */ |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 53 | public SpanFocusQuery (SpanQuery sq, byte classNumber) { |
| 54 | super(sq, true); |
| 55 | classNumbers.add(classNumber); |
| 56 | }; |
| 57 | |
| 58 | |
| 59 | public SpanFocusQuery (SpanQuery sq, List<Byte> classNumbers) { |
| 60 | super(sq, true); |
| 61 | this.classNumbers = classNumbers; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 62 | }; |
| 63 | |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 64 | |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 65 | /** |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 66 | * Construct a new SpanFocusQuery. The class to focus on defaults |
| 67 | * to |
| 68 | * <tt>1</tt>. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 69 | * |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 70 | * @param firstClause |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 71 | * The nested {@link SpanQuery}, that contains one or |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 72 | * more |
| 73 | * classed spans. |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 74 | */ |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 75 | public SpanFocusQuery (SpanQuery sq) { |
| 76 | super(sq, true); |
| 77 | classNumbers.add((byte) 1); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 78 | }; |
| 79 | |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 80 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 81 | @Override |
| 82 | public String toString (String field) { |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 83 | StringBuffer buffer = new StringBuffer(); |
| 84 | buffer.append("focus("); |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 85 | if (matchTemporaryClass) { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 86 | buffer.append("#"); |
| 87 | } |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 88 | if (classNumbers.size() > 1) { |
| 89 | buffer.append("["); |
| 90 | for (int i = 0; i < classNumbers.size(); i++) { |
| 91 | buffer.append((short) classNumbers.get(i) & 0xFF); |
| 92 | if (i != classNumbers.size() - 1) { |
| 93 | buffer.append(","); |
| 94 | } |
| 95 | } |
| 96 | buffer.append("]"); |
| 97 | } |
| 98 | else { |
| 99 | buffer.append((short) classNumbers.get(0) & 0xFF).append(": "); |
| 100 | } |
| 101 | buffer.append(this.firstClause.toString()); |
| Akron | a26184e | 2018-12-05 15:37:34 +0100 | [diff] [blame] | 102 | if (!this.isSorted()) { |
| 103 | buffer.append(",sorting"); |
| 104 | } |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 105 | buffer.append(')'); |
| 106 | buffer.append(ToStringUtils.boost(getBoost())); |
| 107 | return buffer.toString(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 108 | }; |
| 109 | |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 110 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 111 | @Override |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 112 | public Spans getSpans (final LeafReaderContext context, Bits acceptDocs, |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 113 | Map<Term, TermContext> termContexts) throws IOException { |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 114 | return new FocusSpans(this, context, acceptDocs, termContexts); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 115 | }; |
| 116 | |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 117 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 118 | @Override |
| 119 | public Query rewrite (IndexReader reader) throws IOException { |
| Nils Diewald | cec40f9 | 2015-02-19 22:20:02 +0000 | [diff] [blame] | 120 | SpanFocusQuery clone = null; |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 121 | SpanQuery query = (SpanQuery) this.firstClause.rewrite(reader); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 122 | |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 123 | if (query != this.firstClause) { |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 124 | if (clone == null) |
| 125 | clone = this.clone(); |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 126 | clone.firstClause = query; |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 127 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 128 | |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 129 | if (clone != null) |
| 130 | return clone; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 131 | |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 132 | return this; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 133 | }; |
| 134 | |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 135 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 136 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 137 | public SpanFocusQuery clone () { |
| Nils Diewald | cec40f9 | 2015-02-19 22:20:02 +0000 | [diff] [blame] | 138 | SpanFocusQuery spanFocusQuery = new SpanFocusQuery( |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 139 | (SpanQuery) this.firstClause.clone(), this.getClassNumbers()); |
| Nils Diewald | cec40f9 | 2015-02-19 22:20:02 +0000 | [diff] [blame] | 140 | spanFocusQuery.setBoost(getBoost()); |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 141 | spanFocusQuery.setMatchTemporaryClass(this.matchTemporaryClass); |
| 142 | spanFocusQuery.setSorted(this.isSorted); |
| 143 | spanFocusQuery.setRemoveTemporaryClasses(this.removeTemporaryClasses); |
| Nils Diewald | cec40f9 | 2015-02-19 22:20:02 +0000 | [diff] [blame] | 144 | return spanFocusQuery; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 145 | }; |
| 146 | |
| 147 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 148 | @Override |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 149 | public boolean equals (Object o) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 150 | if (this == o) |
| 151 | return true; |
| 152 | if (!(o instanceof SpanFocusQuery)) |
| 153 | return false; |
| 154 | |
| 155 | final SpanFocusQuery spanFocusQuery = (SpanFocusQuery) o; |
| 156 | |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 157 | if (!this.firstClause.equals(spanFocusQuery.firstClause)) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 158 | return false; |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 159 | if (this.getClassNumbers() != spanFocusQuery.getClassNumbers()) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 160 | return false; |
| Nils Diewald | cec40f9 | 2015-02-19 22:20:02 +0000 | [diff] [blame] | 161 | |
| 162 | // Probably not necessary |
| 163 | return getBoost() == spanFocusQuery.getBoost(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 164 | }; |
| 165 | |
| 166 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 167 | @Override |
| Nils Diewald | 3e3cbf3 | 2015-02-06 21:30:49 +0000 | [diff] [blame] | 168 | public int hashCode () { |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 169 | int result = firstClause.hashCode(); |
| 170 | for (byte number : classNumbers) |
| 171 | result = 31 * result + number; |
| Nils Diewald | 85f9c42 | 2015-02-06 21:09:16 +0000 | [diff] [blame] | 172 | result += Float.floatToRawIntBits(getBoost()); |
| 173 | return result; |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 174 | } |
| 175 | |
| 176 | |
| 177 | public List<Byte> getClassNumbers () { |
| 178 | return classNumbers; |
| 179 | } |
| 180 | |
| 181 | |
| 182 | public void setClassNumbers (List<Byte> classNumbers) { |
| 183 | this.classNumbers = classNumbers; |
| 184 | } |
| 185 | |
| 186 | |
| 187 | public boolean isSorted () { |
| 188 | return isSorted; |
| 189 | } |
| 190 | |
| 191 | |
| 192 | public void setSorted (boolean isSorted) { |
| 193 | this.isSorted = isSorted; |
| 194 | } |
| 195 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 196 | |
| 197 | public boolean matchTemporaryClass () { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 198 | return matchTemporaryClass; |
| 199 | } |
| 200 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 201 | |
| 202 | public void setMatchTemporaryClass (boolean matchTemporaryClass) { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 203 | this.matchTemporaryClass = matchTemporaryClass; |
| 204 | } |
| 205 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 206 | |
| 207 | public boolean removeTemporaryClasses () { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 208 | return removeTemporaryClasses; |
| 209 | } |
| 210 | |
| Akron | bb5d173 | 2015-06-22 01:22:40 +0200 | [diff] [blame] | 211 | |
| 212 | public void setRemoveTemporaryClasses (boolean rem) { |
| margaretha | f70addb | 2015-04-27 13:17:18 +0200 | [diff] [blame] | 213 | this.removeTemporaryClasses = rem; |
| 214 | } |
| 215 | |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 216 | |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 217 | public int getWindowSize () { |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 218 | return windowSize; |
| 219 | } |
| 220 | |
| 221 | |
| Akron | c12567c | 2016-06-03 00:40:52 +0200 | [diff] [blame] | 222 | public void setWindowSize (int windowSize) { |
| margaretha | 3865e52 | 2016-05-02 13:24:51 +0200 | [diff] [blame] | 223 | this.windowSize = windowSize; |
| 224 | } |
| 225 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 226 | }; |