| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.query.spans; |
| 2 | |
| 3 | import java.io.IOException; |
| 4 | import java.util.ArrayList; |
| 5 | import java.util.Iterator; |
| 6 | import java.util.List; |
| 7 | import java.util.Map; |
| 8 | |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 9 | import org.apache.lucene.index.LeafReaderContext; |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 10 | import org.apache.lucene.index.Term; |
| 11 | import org.apache.lucene.index.TermContext; |
| 12 | import org.apache.lucene.search.spans.Spans; |
| 13 | import org.apache.lucene.util.Bits; |
| 14 | |
| 15 | import de.ids_mannheim.korap.query.SpanDistanceQuery; |
| 16 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 17 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 18 | * Span enumeration of spans (firstSpans) which do <em>not</em> occur |
| 19 | * together |
| 20 | * with other spans (secondSpans) on the right side, within a range of |
| 21 | * an |
| 22 | * element-based distance (i.e. a sentence or a paragraph as the |
| 23 | * distance unit). |
| 24 | * If the query requires that the spans are ordered, then the |
| 25 | * firstSpans must |
| 26 | * occur before the secondSpans. In this class, firstSpans are also |
| 27 | * referred to |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 28 | * as target spans and second spans as candidate spans.<br/> |
| 29 | * <br/> |
| 30 | * Note: The element distance unit does not overlap to each other. |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 31 | * |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 32 | * @author margaretha |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 33 | * */ |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 34 | public class ElementDistanceExclusionSpans extends DistanceSpans { |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 35 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 36 | private Spans elements; |
| 37 | private boolean hasMoreElements; |
| 38 | private int elementPosition; |
| Nils Diewald | 34eaa86 | 2014-06-03 10:56:27 +0000 | [diff] [blame] | 39 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 40 | private boolean isOrdered; |
| 41 | private boolean hasMoreSecondSpans; |
| Nils Diewald | 34eaa86 | 2014-06-03 10:56:27 +0000 | [diff] [blame] | 42 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 43 | // other first spans occurred between the current target and the second |
| 44 | // spans |
| 45 | protected List<CandidateSpan> targetList; |
| 46 | // secondSpans occurring near the firstSpans |
| 47 | protected List<CandidateSpan> candidateList; |
| 48 | private int currentDocNum; |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 49 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 50 | private int minDistance, maxDistance; |
| 51 | private int firstSpanPostion; |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 52 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 53 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 54 | /** |
| 55 | * Constructs ElementDistanceExclusionSpans from the specified |
| 56 | * {@link SpanDistanceQuery}. |
| 57 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 58 | * @param query |
| 59 | * a SpanDistanceQuery |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 60 | * @param context |
| 61 | * @param acceptDocs |
| 62 | * @param termContexts |
| 63 | * @throws IOException |
| 64 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 65 | public ElementDistanceExclusionSpans (SpanDistanceQuery query, |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 66 | LeafReaderContext context, |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 67 | Bits acceptDocs, |
| 68 | Map<Term, TermContext> termContexts) |
| 69 | throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 70 | super(query, context, acceptDocs, termContexts); |
| Nils Diewald | 34eaa86 | 2014-06-03 10:56:27 +0000 | [diff] [blame] | 71 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 72 | elements = query.getElementQuery().getSpans(context, acceptDocs, |
| 73 | termContexts); |
| 74 | hasMoreElements = elements.next(); |
| 75 | hasMoreSpans = firstSpans.next() && hasMoreElements; |
| 76 | hasMoreSecondSpans = secondSpans.next(); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 77 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 78 | elementPosition = 0; |
| 79 | this.isOrdered = query.isOrdered(); |
| 80 | candidateList = new ArrayList<CandidateSpan>(); |
| 81 | targetList = new ArrayList<CandidateSpan>(); |
| 82 | currentDocNum = firstSpans.doc(); |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 83 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 84 | minDistance = query.getMinDistance(); |
| 85 | maxDistance = query.getMaxDistance(); |
| 86 | } |
| 87 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 88 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 89 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 90 | protected boolean advance () throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 91 | while (!targetList.isEmpty() |
| 92 | || (hasMoreSpans && ensureSameDoc(firstSpans, elements))) { |
| 93 | if (!targetList.isEmpty()) { |
| 94 | if (isFirstTargetValid()) |
| 95 | return true; |
| 96 | else |
| 97 | continue; |
| 98 | } |
| 99 | if (findMatch()) |
| 100 | return true; |
| 101 | } |
| 102 | return false; |
| 103 | } |
| 104 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 105 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 106 | /** |
| 107 | * Tells if the first target from the target list is a match. |
| 108 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 109 | * @return <code>true</code> if the first target from the target |
| 110 | * list is a |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 111 | * match, <code>false</code> otherwise. |
| 112 | * @throws IOException |
| 113 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 114 | private boolean isFirstTargetValid () throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 115 | CandidateSpan target = targetList.get(0); |
| 116 | targetList.remove(0); |
| 117 | firstSpanPostion = target.getPosition(); |
| 118 | filterCandidateList(firstSpanPostion); |
| 119 | collectRightCandidates(); |
| 120 | |
| 121 | if (isWithinDistance()) { |
| 122 | return false; |
| 123 | } |
| 124 | setMatchProperties(target); |
| 125 | return true; |
| 126 | } |
| 127 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 128 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 129 | /** |
| 130 | * Validate if the current firstSpan is a match. |
| 131 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 132 | * @return <code>true</code> if a match is found, |
| 133 | * <code>false</code> |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 134 | * otherwise. |
| 135 | * @throws IOException |
| 136 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 137 | private boolean findMatch () throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 138 | if (firstSpans.doc() != currentDocNum) { |
| 139 | currentDocNum = firstSpans.doc(); |
| 140 | candidateList.clear(); |
| 141 | } |
| 142 | |
| 143 | if (hasMoreSecondSpans) { |
| 144 | if (secondSpans.doc() == firstSpans.doc()) { |
| 145 | return (isFirstSpanValid() ? true : false); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 146 | } |
| 147 | else if (secondSpans.doc() < firstSpans.doc()) { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 148 | hasMoreSecondSpans = secondSpans.skipTo(firstSpans.doc()); |
| 149 | return false; |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | // return (isFirstSpanValid() ? true : false); |
| 154 | |
| 155 | if (candidateList.isEmpty()) { |
| 156 | if (isFirstSpanInElement()) { |
| 157 | setMatchProperties(new CandidateSpan(firstSpans, |
| 158 | elementPosition)); |
| 159 | hasMoreSpans = firstSpans.next(); |
| 160 | return true; |
| 161 | } |
| 162 | hasMoreSpans = firstSpans.next(); |
| 163 | return false; |
| 164 | } |
| 165 | return (isFirstSpanValid() ? true : false); |
| 166 | } |
| 167 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 168 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 169 | /** |
| 170 | * Tells if the current firstSpan is a match. |
| 171 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 172 | * @return <code>true</code> if a match is found, |
| 173 | * <code>false</code> |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 174 | * otherwise. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 175 | * @throws IOException |
| 176 | * <pre> |
| 177 | * private boolean isFirstSpanValid() throws |
| 178 | * IOException { |
| 179 | * if (candidateList.isEmpty()) { |
| 180 | * if (isFirstSpanInElement()) { |
| 181 | * setMatchProperties(new CandidateSpan(firstSpans, |
| 182 | * elementPosition)); |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 183 | * hasMoreSpans = firstSpans.next(); |
| 184 | * return true; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 185 | * } |
| 186 | * hasMoreSpans = firstSpans.next(); |
| 187 | * return false; |
| 188 | * } |
| 189 | * return (findMatch() ? true : false); |
| 190 | * } |
| 191 | * </pre> |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 192 | */ |
| 193 | |
| 194 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 195 | * Tells if the given span is in an element distance unit, or not, |
| 196 | * by |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 197 | * advancing the element distance unit to the span position. |
| 198 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 199 | * @param span |
| 200 | * a span |
| 201 | * @return <code>true</code> if the element distance unit can be |
| 202 | * advanced to |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 203 | * contain the given span, <code>false</code> otherwise. |
| 204 | * @throws IOException |
| 205 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 206 | private boolean advanceElementTo (Spans span) throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 207 | while (hasMoreElements && elements.doc() == currentDocNum |
| 208 | && elements.start() < span.end()) { |
| 209 | |
| 210 | if (span.start() >= elements.start() |
| 211 | && span.end() <= elements.end()) { |
| 212 | return true; |
| 213 | } |
| 214 | |
| 215 | hasMoreElements = elements.next(); |
| 216 | elementPosition++; |
| 217 | } |
| 218 | return false; |
| 219 | } |
| 220 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 221 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 222 | /** |
| 223 | * Tells if the current firstSpan is a match. |
| 224 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 225 | * @return <code>true</code> if a match is found, |
| 226 | * <code>false</code> |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 227 | * otherwise. |
| 228 | * @throws IOException |
| 229 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 230 | private boolean isFirstSpanValid () throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 231 | if (!isOrdered) |
| 232 | collectLeftCandidates(); |
| 233 | |
| 234 | if (isFirstSpanInElement()) { |
| 235 | CandidateSpan target = new CandidateSpan(firstSpans, |
| 236 | elementPosition); |
| 237 | hasMoreSpans = firstSpans.next(); |
| 238 | // Checking if the secondspans in the *left* side are not within the |
| 239 | // distance range |
| 240 | if (!isOrdered && isWithinDistance()) |
| 241 | return false; |
| 242 | // Checking if the secondspans in the *right* side are not within |
| 243 | // the distance range |
| 244 | collectRightCandidates(); |
| 245 | if (isWithinDistance()) |
| 246 | return false; |
| 247 | |
| 248 | setMatchProperties(target); |
| 249 | return true; |
| 250 | } |
| 251 | hasMoreSpans = firstSpans.next(); |
| 252 | return false; |
| 253 | } |
| 254 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 255 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 256 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 257 | * Collects all second spans (candidates) on the right side of the |
| 258 | * current |
| 259 | * first span (target) position. At the same time, also collects |
| 260 | * all other |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 261 | * first spans occurring before the second spans. |
| 262 | * |
| 263 | * @throws IOException |
| 264 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 265 | private void collectRightCandidates () throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 266 | while (hasMoreSecondSpans && secondSpans.doc() == currentDocNum) { |
| 267 | |
| 268 | if (elementPosition > firstSpanPostion + maxDistance) { |
| 269 | break; |
| 270 | } |
| 271 | // stores all first spans occurring before the current second span |
| 272 | // in the target list. |
| 273 | if (hasMoreSpans && firstSpans.start() < secondSpans.start() |
| 274 | && firstSpans.doc() == currentDocNum) { |
| 275 | if (advanceElementTo(firstSpans)) { |
| 276 | targetList.add(new CandidateSpan(firstSpans, |
| 277 | elementPosition)); |
| 278 | } |
| 279 | hasMoreSpans = firstSpans.next(); |
| 280 | continue; |
| 281 | } |
| 282 | // collects only second spans occurring inside an element |
| 283 | if (advanceElementTo(secondSpans)) { |
| 284 | candidateList.add(new CandidateSpan(secondSpans, |
| 285 | elementPosition)); |
| 286 | } |
| 287 | hasMoreSecondSpans = secondSpans.next(); |
| 288 | } |
| 289 | } |
| 290 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 291 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 292 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 293 | * Collects all the second spans (candidates) occurring before the |
| 294 | * first |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 295 | * spans, and are within an element distance unit. |
| 296 | * |
| 297 | * @throws IOException |
| 298 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 299 | private void collectLeftCandidates () throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 300 | while (hasMoreSecondSpans && secondSpans.doc() == firstSpans.doc() |
| 301 | && secondSpans.start() < firstSpans.end()) { |
| 302 | if (advanceElementTo(secondSpans)) { |
| 303 | candidateList.add(new CandidateSpan(secondSpans, |
| 304 | elementPosition)); |
| 305 | filterCandidateList(elementPosition); |
| 306 | } |
| 307 | hasMoreSecondSpans = secondSpans.next(); |
| 308 | } |
| 309 | } |
| 310 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 311 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 312 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 313 | * Tells if there is a candidate span (second span) occurring |
| 314 | * together with |
| 315 | * the target span (firstspan) within the minimum and maximum |
| 316 | * distance |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 317 | * range. |
| 318 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 319 | * @return <code>true</code> if there is a candidate span (second |
| 320 | * span) |
| 321 | * occurring together with the target span (firstspan) |
| 322 | * within the |
| 323 | * minimum and maximum distance range, <code>false</code> |
| 324 | * otherwise. |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 325 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 326 | private boolean isWithinDistance () { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 327 | int actualDistance; |
| 328 | for (CandidateSpan cs : candidateList) { |
| 329 | actualDistance = cs.getPosition() - firstSpanPostion; |
| 330 | if (!isOrdered) |
| 331 | actualDistance = Math.abs(actualDistance); |
| 332 | |
| 333 | if (minDistance <= actualDistance && actualDistance <= maxDistance) |
| 334 | return true; |
| 335 | } |
| 336 | return false; |
| 337 | } |
| 338 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 339 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 340 | /** |
| 341 | * Tells if the current firstSpans is in an element. |
| 342 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 343 | * @return <code>true</code> if the current firstSpans in is an |
| 344 | * element, |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 345 | * <code>false</code> otherwise. |
| 346 | * @throws IOException |
| 347 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 348 | private boolean isFirstSpanInElement () throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 349 | if (advanceElementTo(firstSpans)) { |
| 350 | firstSpanPostion = elementPosition; |
| 351 | filterCandidateList(firstSpanPostion); |
| 352 | return true; |
| 353 | } |
| 354 | return false; |
| 355 | } |
| 356 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 357 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 358 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 359 | * From the candidateList, removes all candidate spans that are |
| 360 | * too far from |
| 361 | * the given target position, and have exactly the same position |
| 362 | * as the |
| 363 | * target position. Only candidate spans occurring within a range |
| 364 | * of |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 365 | * distance from the target position, are retained. |
| 366 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 367 | * @param position |
| 368 | * target/firstSpan position |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 369 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 370 | private void filterCandidateList (int position) { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 371 | |
| 372 | Iterator<CandidateSpan> i = candidateList.iterator(); |
| 373 | CandidateSpan cs; |
| 374 | while (i.hasNext()) { |
| 375 | cs = i.next(); |
| 376 | if (cs.getPosition() == position |
| 377 | || cs.getPosition() + maxDistance >= position) { |
| 378 | break; |
| 379 | } |
| 380 | i.remove(); |
| 381 | } |
| 382 | } |
| 383 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 384 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 385 | /** |
| 386 | * Sets the given target/match CandidateSpan as the current match. |
| 387 | * |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 388 | * @param match |
| 389 | * a target/firstSpan wrapped as a CandidateSpan |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 390 | * @throws IOException |
| 391 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 392 | private void setMatchProperties (CandidateSpan match) throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 393 | matchDocNumber = match.getDoc(); |
| 394 | matchStartPosition = match.getStart(); |
| 395 | matchEndPosition = match.getEnd(); |
| 396 | |
| 397 | if (collectPayloads && match.getPayloads() != null) |
| 398 | matchPayload.addAll(match.getPayloads()); |
| 399 | |
| 400 | setMatchFirstSpan(match); |
| 401 | } |
| 402 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 403 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 404 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 405 | public boolean skipTo (int target) throws IOException { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 406 | if (hasMoreSpans && firstSpans.doc() < target) { |
| 407 | if (!firstSpans.skipTo(target)) { |
| 408 | hasMoreSpans = false; |
| 409 | return false; |
| 410 | } |
| 411 | } |
| 412 | return advance(); |
| 413 | } |
| 414 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 415 | |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 416 | @Override |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 417 | public long cost () { |
| Eliza Margaretha | c8d5920 | 2014-12-16 16:21:16 +0000 | [diff] [blame] | 418 | return elements.cost() + firstSpans.cost() + secondSpans.cost(); |
| 419 | } |
| Eliza Margaretha | e335beb | 2014-02-27 12:56:14 +0000 | [diff] [blame] | 420 | |
| 421 | } |