blob: 52405809430ed592bc9314b73f441997d7cda412 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap.query.spans;
2
Eliza Margaretha22898982014-11-04 17:10:21 +00003import java.io.IOException;
4import java.util.ArrayList;
5import java.util.Collection;
6import java.util.LinkedList;
7import java.util.Map;
8
Nils Diewaldf399a672013-11-18 17:55:22 +00009import org.apache.lucene.index.AtomicReaderContext;
10import org.apache.lucene.index.Term;
11import org.apache.lucene.index.TermContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000012import org.apache.lucene.search.DocIdSetIterator;
Eliza Margaretha22898982014-11-04 17:10:21 +000013import org.apache.lucene.search.spans.Spans;
14import org.apache.lucene.util.Bits;
Nils Diewaldf399a672013-11-18 17:55:22 +000015import org.slf4j.Logger;
16import org.slf4j.LoggerFactory;
17
Eliza Margaretha22898982014-11-04 17:10:21 +000018import de.ids_mannheim.korap.query.SpanWithinQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000019
Nils Diewald83c9b162015-02-03 21:05:07 +000020
21/**
22 * SpanWithinQuery is DEPRECATED and will
23 * be replaced by SpanPositionQuery in the near future
Nils Diewaldcb8afb32015-02-04 21:12:37 +000024 *
25 * TODO: Support exclusivity
26 * TODO: Use the term "queue" and implement it similar to SpanOrQuery
Nils Diewald83c9b162015-02-03 21:05:07 +000027 */
28
Nils Diewald82a4b862014-02-20 21:17:41 +000029/**
Nils Diewald1455e1e2014-08-01 16:12:43 +000030 * Compare two spans and check how they relate positionally.
Nils Diewald6802acd2014-03-18 18:29:30 +000031 *
Nils Diewald44d5fa12015-01-15 21:31:52 +000032 * @author diewald
Nils Diewald82a4b862014-02-20 21:17:41 +000033 */
Nils Diewaldf399a672013-11-18 17:55:22 +000034public class WithinSpans extends Spans {
Nils Diewald82a4b862014-02-20 21:17:41 +000035
Nils Diewald6802acd2014-03-18 18:29:30 +000036 // Logger
37 private final Logger log = LoggerFactory.getLogger(WithinSpans.class);
Nils Diewald1455e1e2014-08-01 16:12:43 +000038
Nils Diewald82a4b862014-02-20 21:17:41 +000039 // This advices the java compiler to ignore all loggings
40 public static final boolean DEBUG = false;
Nils Diewald6802acd2014-03-18 18:29:30 +000041
Nils Diewaldf399a672013-11-18 17:55:22 +000042 private boolean more = false;
43
Nils Diewald6802acd2014-03-18 18:29:30 +000044 // Boolean value indicating if span B
45 // should be forwarded next (true)
46 // or span A (false);
47 boolean nextSpanB = true;
Nils Diewald82a4b862014-02-20 21:17:41 +000048
49 private int
Nils Diewald83c9b162015-02-03 21:05:07 +000050 wrapStart = -1,
51 wrapEnd = -1,
52 embeddedStart = -1,
53 embeddedEnd = -1,
54 wrapDoc = -1,
55 embeddedDoc = -1,
56 matchDoc = -1,
57 matchStart = -1,
58 matchEnd = -1;
Nils Diewald6802acd2014-03-18 18:29:30 +000059
60 private Collection<byte[]> matchPayload;
Nils Diewaldf399a672013-11-18 17:55:22 +000061 private Collection<byte[]> embeddedPayload;
Nils Diewald6802acd2014-03-18 18:29:30 +000062
63 // Indicates that the wrap and the embedded spans are in the same doc
64 private boolean inSameDoc = false;
Nils Diewaldf399a672013-11-18 17:55:22 +000065
Nils Diewald6802acd2014-03-18 18:29:30 +000066 /*
67 Supported flags are currently:
Nils Diewaldcb8afb32015-02-04 21:12:37 +000068 ov -> 0 | overlap: A & B != empty
69 rov -> 2 | real overlap: A & B != empty and
70 ((A | B) != A or
Nils Diewald6802acd2014-03-18 18:29:30 +000071 (A | B) != B)
Nils Diewaldcb8afb32015-02-04 21:12:37 +000072 in -> 4 | within: A | B = A
73 rin -> 6 | real within: A | B = A and
74 A & B != A
75 ew -> 8 | endswith: A | B = A and
76 A.start = B.start
77 sw -> 10 | startswith: A | B = A and
78 A.end = B.end
79 m -> 12 | A = B
80 */
Nils Diewald6802acd2014-03-18 18:29:30 +000081 public static final byte
Nils Diewald83c9b162015-02-03 21:05:07 +000082 OVERLAP = (byte) 0,
83 REAL_OVERLAP = (byte) 2,
84 WITHIN = (byte) 4,
85 REAL_WITHIN = (byte) 6,
86 ENDSWITH = (byte) 8,
87 STARTSWITH = (byte) 10,
88 MATCH = (byte) 12;
Nils Diewaldf399a672013-11-18 17:55:22 +000089
Nils Diewald6802acd2014-03-18 18:29:30 +000090 private byte flag;
Nils Diewaldf399a672013-11-18 17:55:22 +000091
Nils Diewald1455e1e2014-08-01 16:12:43 +000092 // Contains the query
Nils Diewaldf399a672013-11-18 17:55:22 +000093 private SpanWithinQuery query;
94
Nils Diewaldcb8afb32015-02-04 21:12:37 +000095 // Representing the first operand
96 private final Spans wrapSpans;
Nils Diewaldf399a672013-11-18 17:55:22 +000097
Nils Diewaldcb8afb32015-02-04 21:12:37 +000098 // Representing the second operand
99 private final Spans embeddedSpans;
100
101 // Check flag if the current constellation
102 // was checked yet
Nils Diewald6802acd2014-03-18 18:29:30 +0000103 private boolean tryMatch = true;
Nils Diewaldf399a672013-11-18 17:55:22 +0000104
Nils Diewald01ff7af2015-02-04 22:54:26 +0000105 // Two buffers for storing candidates
Nils Diewald41750bf2015-02-06 17:45:20 +0000106 private LinkedList<WithinSpan>
Nils Diewald83c9b162015-02-03 21:05:07 +0000107 spanStore1,
108 spanStore2;
Nils Diewaldf399a672013-11-18 17:55:22 +0000109
Nils Diewald01ff7af2015-02-04 22:54:26 +0000110 /**
111 * Construct a new WithinSpans object.
112 *
113 * @param spanWithinQuery The parental {@link SpanWithinQuery}.
114 * @param context The {@link AtomicReaderContext}.
115 * @param acceptDocs Bit vector representing the documents
116 * to be searched in.
Nils Diewald41750bf2015-02-06 17:45:20 +0000117 * @param termContexts A map managing {@link TermState TermStates}.
118 * @param flag A byte flag indicating the positional condition of the sub spans.
Nils Diewald01ff7af2015-02-04 22:54:26 +0000119 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000120 public WithinSpans (SpanWithinQuery spanWithinQuery,
Nils Diewald83c9b162015-02-03 21:05:07 +0000121 AtomicReaderContext context,
122 Bits acceptDocs,
123 Map<Term,TermContext> termContexts,
124 byte flag) throws IOException {
Nils Diewaldf399a672013-11-18 17:55:22 +0000125
Nils Diewald83c9b162015-02-03 21:05:07 +0000126 if (DEBUG)
127 log.trace("Construct WithinSpans");
Nils Diewaldf399a672013-11-18 17:55:22 +0000128
Nils Diewald83c9b162015-02-03 21:05:07 +0000129 // Init copies
130 this.matchPayload = new LinkedList<byte[]>();
Nils Diewaldf399a672013-11-18 17:55:22 +0000131
Nils Diewald83c9b162015-02-03 21:05:07 +0000132 // Get spans
133 this.wrapSpans = spanWithinQuery.wrap().getSpans(
Nils Diewald6802acd2014-03-18 18:29:30 +0000134 context,
Nils Diewald83c9b162015-02-03 21:05:07 +0000135 acceptDocs,
136 termContexts
137 );
138 this.embeddedSpans = spanWithinQuery.embedded().getSpans(
139 context,
140 acceptDocs,
141 termContexts
142 );
Nils Diewaldf399a672013-11-18 17:55:22 +0000143
Nils Diewald83c9b162015-02-03 21:05:07 +0000144 this.flag = flag;
Nils Diewaldf399a672013-11-18 17:55:22 +0000145
Nils Diewald83c9b162015-02-03 21:05:07 +0000146 // SpanStores for backtracking
Nils Diewald41750bf2015-02-06 17:45:20 +0000147 this.spanStore1 = new LinkedList<WithinSpan>();
148 this.spanStore2 = new LinkedList<WithinSpan>();
Nils Diewald6802acd2014-03-18 18:29:30 +0000149
Nils Diewald83c9b162015-02-03 21:05:07 +0000150 // kept for toString() only.
151 this.query = spanWithinQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +0000152 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000153
Nils Diewaldf399a672013-11-18 17:55:22 +0000154
Nils Diewald6802acd2014-03-18 18:29:30 +0000155 // Move to next match, returning true iff any such exists.
Nils Diewaldf399a672013-11-18 17:55:22 +0000156 @Override
157 public boolean next () throws IOException {
Nils Diewald6802acd2014-03-18 18:29:30 +0000158
Nils Diewald83c9b162015-02-03 21:05:07 +0000159 if (DEBUG)
160 log.trace("Next with docs {}, {}", wrapDoc, embeddedDoc);
Nils Diewaldf399a672013-11-18 17:55:22 +0000161
Nils Diewald83c9b162015-02-03 21:05:07 +0000162 // Initialize spans
163 if (!this.init()) {
164 this.more = false;
165 this.inSameDoc = false;
166 this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
167 this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
168 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
169 return false;
170 };
Nils Diewald82a4b862014-02-20 21:17:41 +0000171
Nils Diewald83c9b162015-02-03 21:05:07 +0000172 // There are more spans and they are in the same document
Nils Diewald6802acd2014-03-18 18:29:30 +0000173
Nils Diewald83c9b162015-02-03 21:05:07 +0000174 while (this.more && (wrapDoc == embeddedDoc ||
175 // this.inSameDoc ||
176 this.toSameDoc())) {
177 if (DEBUG)
178 log.trace("We are in the same doc: {}, {}", wrapDoc, embeddedDoc);
Nils Diewald6802acd2014-03-18 18:29:30 +0000179
Nils Diewald83c9b162015-02-03 21:05:07 +0000180 // Both spans match according to the flag
181 // Silently the next operations are prepared
182 if (this.tryMatch && this.doesMatch()) {
Nils Diewaldcd226862015-02-11 22:27:45 +0000183
Nils Diewald83c9b162015-02-03 21:05:07 +0000184 if (this.wrapEnd == -1)
185 this.wrapEnd = this.wrapSpans.end();
Nils Diewald82a4b862014-02-20 21:17:41 +0000186
Nils Diewald83c9b162015-02-03 21:05:07 +0000187 this.matchStart = embeddedStart < wrapStart ? embeddedStart : wrapStart;
188 this.matchEnd = embeddedEnd > wrapEnd ? embeddedEnd : wrapEnd;
189 this.matchDoc = embeddedDoc;
190 this.matchPayload.clear();
Nils Diewald82a4b862014-02-20 21:17:41 +0000191
Nils Diewald83c9b162015-02-03 21:05:07 +0000192 if (this.embeddedPayload != null)
193 matchPayload.addAll(embeddedPayload);
Nils Diewaldf399a672013-11-18 17:55:22 +0000194
Nils Diewald83c9b162015-02-03 21:05:07 +0000195 if (this.wrapSpans.isPayloadAvailable())
196 this.matchPayload.addAll(wrapSpans.getPayload());
Nils Diewaldf399a672013-11-18 17:55:22 +0000197
Nils Diewald83c9b162015-02-03 21:05:07 +0000198 if (DEBUG)
199 log.trace(
Nils Diewaldcd226862015-02-11 22:27:45 +0000200 " ---- MATCH ---- {}-{} ({})",
201 matchStart,
202 matchEnd,
203 matchDoc
204 );
Nils Diewald6802acd2014-03-18 18:29:30 +0000205
Nils Diewald83c9b162015-02-03 21:05:07 +0000206 this.tryMatch = false;
207 return true;
208 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000209
Nils Diewald83c9b162015-02-03 21:05:07 +0000210 // Get next embedded
211 else if (this.nextSpanB) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000212
Nils Diewald83c9b162015-02-03 21:05:07 +0000213 // Next time try the match
214 this.tryMatch = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000215
Nils Diewald83c9b162015-02-03 21:05:07 +0000216 if (DEBUG)
217 log.trace("In the next embedded branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000218
Nils Diewald41750bf2015-02-06 17:45:20 +0000219 WithinSpan current = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000220
Nils Diewald83c9b162015-02-03 21:05:07 +0000221 // New - fetch until theres a span in the correct doc or bigger
222 while (!this.spanStore2.isEmpty()) {
223 current = spanStore2.removeFirst();
224 if (current.doc >= this.wrapDoc)
225 break;
226 };
227
228
229 // There is nothing in the second store
230 if (current == null) {
231 if (DEBUG)
232 log.trace("SpanStore 2 is empty");
233
234 // Forward with embedding
235 if (!this.embeddedSpans.next()) {
236 this.nextSpanA();
237 continue;
238 }
239
240 else if (DEBUG) {
241 log.trace("Fetch next embedded span");
242 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000243
Nils Diewaldcd226862015-02-11 22:27:45 +0000244 this.embeddedStart = this.embeddedSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000245 this.embeddedEnd = -1;
246 this.embeddedPayload = null;
247 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000248
Nils Diewald83c9b162015-02-03 21:05:07 +0000249 if (this.embeddedDoc != this.wrapDoc) {
250
251 if (DEBUG) {
252 log.trace("Embedded span is in a new document {}",
253 _currentEmbedded().toString());
254 log.trace("Reset current embedded doc");
255 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000256
Nils Diewald83c9b162015-02-03 21:05:07 +0000257 /*
258 if (DEBUG)
259 log.trace("Clear all span stores");
260 this.spanStore1.clear();
261 this.spanStore2.clear();
262 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000263
Nils Diewald83c9b162015-02-03 21:05:07 +0000264 this.storeEmbedded();
265
266 // That is necessary to backtrack to the last document!
267 this.inSameDoc = true;
268 this.embeddedDoc = wrapDoc;
269 // this.tryMatch = false; // already covered in nextSpanA
270
271 this.nextSpanA();
272 continue;
273 };
274
275 if (DEBUG)
276 log.trace(
277 " Forward embedded span to {}",
278 _currentEmbedded().toString()
279 );
280
281 if (this.embeddedDoc != this.wrapDoc) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000282
283 // Is this always a good idea?
284 /*
285 this.spanStore1.clear();
286 this.spanStore2.clear();
287 */
288
289 this.embeddedStart = -1;
290 this.embeddedEnd = -1;
291 this.embeddedPayload = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000292
Nils Diewald83c9b162015-02-03 21:05:07 +0000293 if (!this.toSameDoc()) {
294 this.more = false;
295 this.inSameDoc = false;
296 return false;
297 };
298 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000299
Nils Diewald83c9b162015-02-03 21:05:07 +0000300 this.more = true;
301 this.inSameDoc = true;
302 this.tryMatch = true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000303
Nils Diewald83c9b162015-02-03 21:05:07 +0000304 this.nextSpanB();
305 continue;
306 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000307
Nils Diewald83c9b162015-02-03 21:05:07 +0000308 // Fetch from second store?
309 else {
310 /** TODO: Change this to a single embedded object! */
311 this.embeddedStart = current.start;
312 this.embeddedEnd = current.end;
313 this.embeddedDoc = current.doc;
Nils Diewald6802acd2014-03-18 18:29:30 +0000314
Nils Diewald83c9b162015-02-03 21:05:07 +0000315 if (current.payload != null) {
316 this.embeddedPayload = new ArrayList<byte[]>(current.payload.size());
317 this.embeddedPayload.addAll(current.payload);
318 }
319 else {
320 this.embeddedPayload = null;
321 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000322
Nils Diewald83c9b162015-02-03 21:05:07 +0000323 if (DEBUG)
324 log.trace("Fetch current from SpanStore 2: {}", current.toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000325
Nils Diewald83c9b162015-02-03 21:05:07 +0000326 this.tryMatch = true;
327 };
328 continue;
329 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000330
Nils Diewald83c9b162015-02-03 21:05:07 +0000331 // get next wrap
332 if (DEBUG)
333 log.trace("In the next wrap branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000334
Nils Diewald83c9b162015-02-03 21:05:07 +0000335 this.tryMatch = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000336
Nils Diewald83c9b162015-02-03 21:05:07 +0000337 if (DEBUG)
338 log.trace("Try next wrap");
Nils Diewald6802acd2014-03-18 18:29:30 +0000339
Nils Diewald83c9b162015-02-03 21:05:07 +0000340 // shift the stored spans
341 if (!this.spanStore1.isEmpty()) {
342 if (DEBUG) {
343 log.trace("Move everything from SpanStore 1 to SpanStore 2:");
Nils Diewald41750bf2015-02-06 17:45:20 +0000344 for (WithinSpan i : this.spanStore1) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000345 log.trace(" | {}", i.toString());
346 };
347 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000348
Nils Diewald83c9b162015-02-03 21:05:07 +0000349 // Move everything to spanStore2
350 this.spanStore2.addAll(
351 0,
Nils Diewald41750bf2015-02-06 17:45:20 +0000352 (LinkedList<WithinSpan>) this.spanStore1.clone()
Nils Diewald83c9b162015-02-03 21:05:07 +0000353 );
354 this.spanStore1.clear();
Nils Diewald6802acd2014-03-18 18:29:30 +0000355
Nils Diewald83c9b162015-02-03 21:05:07 +0000356 if (DEBUG) {
357 log.trace("SpanStore 2 now is:");
Nils Diewald41750bf2015-02-06 17:45:20 +0000358 for (WithinSpan i : this.spanStore2) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000359 log.trace(" | {}", i.toString());
360 };
361 };
362
363 }
364 else if (DEBUG) {
365 log.trace("spanStore 1 is empty");
366 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000367
Nils Diewald83c9b162015-02-03 21:05:07 +0000368 // Get next wrap
369 if (this.wrapSpans.next()) {
Nils Diewald82a4b862014-02-20 21:17:41 +0000370
Nils Diewald83c9b162015-02-03 21:05:07 +0000371 // Reset wrapping information
Nils Diewaldcd226862015-02-11 22:27:45 +0000372 this.wrapStart = this.wrapSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000373 this.wrapEnd = -1;
Nils Diewald82a4b862014-02-20 21:17:41 +0000374
Nils Diewald83c9b162015-02-03 21:05:07 +0000375 // Retrieve doc information
376 this.wrapDoc = this.wrapSpans.doc();
Nils Diewald20607ab2014-03-20 23:28:36 +0000377
Nils Diewald83c9b162015-02-03 21:05:07 +0000378 if (DEBUG)
379 log.trace(
380 " Forward wrap span to {}",
381 _currentWrap().toString()
382 );
Nils Diewald6802acd2014-03-18 18:29:30 +0000383
Nils Diewald83c9b162015-02-03 21:05:07 +0000384 if (this.embeddedDoc != this.wrapDoc) {
385 if (DEBUG)
386 log.trace("Delete all span stores");
387 this.spanStore1.clear();
388 this.spanStore2.clear();
Nils Diewald6802acd2014-03-18 18:29:30 +0000389
Nils Diewald83c9b162015-02-03 21:05:07 +0000390 // Reset embedded:
391 this.embeddedStart = -1;
392 this.embeddedEnd = -1;
393 this.embeddedPayload = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000394
Nils Diewald83c9b162015-02-03 21:05:07 +0000395 if (!this.toSameDoc()) {
396 this.inSameDoc = false;
397 this.more = false;
398 return false;
399 };
400 }
401 else {
402 this.inSameDoc = true;
403 // Do not match with the current state
404 this.tryMatch = false;
405 };
406
407 this.nextSpanB();
408 continue;
409 }
410 this.more = false;
411 this.inSameDoc = false;
412 this.spanStore1.clear();
413 this.spanStore2.clear();
414 return false;
415 };
Nils Diewald82a4b862014-02-20 21:17:41 +0000416
Nils Diewald83c9b162015-02-03 21:05:07 +0000417 // No more matches
418 return false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000419 };
420
421
422 /**
423 * Skip to the next document
424 */
425 private boolean toSameDoc () throws IOException {
Nils Diewald82a4b862014-02-20 21:17:41 +0000426
Nils Diewald83c9b162015-02-03 21:05:07 +0000427 if (DEBUG)
428 log.trace("Forward to find same docs");
Nils Diewald6802acd2014-03-18 18:29:30 +0000429
Nils Diewald83c9b162015-02-03 21:05:07 +0000430 /*
431 if (this.embeddedSpans == null) {
432 this.more = false;
433 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
434 this.inSameDoc = false;
435 return false;
436 };
437 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000438
Nils Diewald83c9b162015-02-03 21:05:07 +0000439 this.more = true;
440 this.inSameDoc = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000441
Nils Diewald83c9b162015-02-03 21:05:07 +0000442 this.wrapDoc = this.wrapSpans.doc();
443 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000444
Nils Diewald83c9b162015-02-03 21:05:07 +0000445 // Clear all spanStores
446 if (this.wrapDoc != this.embeddedDoc) {
447 /*
448 if (DEBUG)
449 log.trace("Clear all spanStores when moving forward");
450 // Why??
451 this.spanStore1.clear();
452 this.spanStore2.clear();
453 */
454 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000455
Nils Diewald83c9b162015-02-03 21:05:07 +0000456 // Last doc was reached
457 else if (this.wrapDoc == DocIdSetIterator.NO_MORE_DOCS) {
458 this.more = false;
459 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
460 this.inSameDoc = false;
461 return false;
462 }
463 else {
464 if (DEBUG) {
465 log.trace("Current position already is in the same doc");
466 log.trace("Embedded: {}", _currentEmbedded().toString());
467 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000468 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000469 return true;
470 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000471
Nils Diewaldcd226862015-02-11 22:27:45 +0000472
Nils Diewald83c9b162015-02-03 21:05:07 +0000473 // Forward till match
474 while (this.wrapDoc != this.embeddedDoc) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000475
Nils Diewald83c9b162015-02-03 21:05:07 +0000476 // Forward wrapInfo
477 if (this.wrapDoc < this.embeddedDoc) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000478
Nils Diewald83c9b162015-02-03 21:05:07 +0000479 // Set document information
480 if (!wrapSpans.skipTo(this.embeddedDoc)) {
481 this.more = false;
482 this.inSameDoc = false;
Nils Diewaldcd226862015-02-11 22:27:45 +0000483 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000484 return false;
485 };
486
487 if (DEBUG)
488 log.trace("Skip wrap to doc {}", this.embeddedDoc);
Nils Diewald6802acd2014-03-18 18:29:30 +0000489
Nils Diewald83c9b162015-02-03 21:05:07 +0000490 this.wrapDoc = this.wrapSpans.doc();
491
492 if (wrapDoc == DocIdSetIterator.NO_MORE_DOCS) {
493 this.more = false;
494 this.inSameDoc = false;
495 this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
496 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
497 return false;
498 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000499
500 /*
501 Remove stored information
502 */
503 if (DEBUG)
504 log.trace("Delete all span stores");
505
506 this.spanStore1.clear();
507 this.spanStore2.clear();
508
509 if (wrapDoc == embeddedDoc) {
510 this.wrapStart = this.wrapSpans.start();
511 this.embeddedStart = this.embeddedSpans.start();
512 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000513 return true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000514 };
Nils Diewald83c9b162015-02-03 21:05:07 +0000515
Nils Diewaldcd226862015-02-11 22:27:45 +0000516 this.wrapStart = -1;
517 this.embeddedStart = -1;
Nils Diewald83c9b162015-02-03 21:05:07 +0000518 }
Nils Diewald82a4b862014-02-20 21:17:41 +0000519
Nils Diewald83c9b162015-02-03 21:05:07 +0000520 // Forward embedInfo
521 else if (this.wrapDoc > this.embeddedDoc) {
522
523 // Set document information
524 if (!this.embeddedSpans.skipTo(this.wrapDoc)) {
525 this.more = false;
526 this.inSameDoc = false;
Nils Diewaldcd226862015-02-11 22:27:45 +0000527 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000528 return false;
529 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000530
Nils Diewald83c9b162015-02-03 21:05:07 +0000531 this.embeddedDoc = this.embeddedSpans.doc();
532
533 if (this.embeddedDoc == DocIdSetIterator.NO_MORE_DOCS) {
534 this.more = false;
535 this.inSameDoc = false;
536 this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
537 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
538 return false;
539 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000540
541 if (DEBUG)
542 log.trace("Skip embedded to doc {}", this.embeddedDoc);
Nils Diewald83c9b162015-02-03 21:05:07 +0000543
Nils Diewaldcd226862015-02-11 22:27:45 +0000544 this.embeddedStart = this.embeddedSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000545 this.embeddedEnd = -1;
546 this.embeddedPayload = null;
547
Nils Diewaldcd226862015-02-11 22:27:45 +0000548 if (this.wrapDoc == this.embeddedDoc) {
549 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000550 return true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000551 };
Nils Diewald83c9b162015-02-03 21:05:07 +0000552 }
553 else {
Nils Diewaldcd226862015-02-11 22:27:45 +0000554 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000555 return false;
556 };
557 };
558
Nils Diewaldcd226862015-02-11 22:27:45 +0000559 this.matchDoc = this.wrapDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000560 return true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000561 };
562
563
564 // Initialize spans
565 private boolean init () throws IOException {
566
Nils Diewald83c9b162015-02-03 21:05:07 +0000567 // There is a missing span
568 if (this.embeddedDoc >= 0)
569 return true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000570
Nils Diewald83c9b162015-02-03 21:05:07 +0000571 if (DEBUG)
572 log.trace("Initialize spans");
Nils Diewald6802acd2014-03-18 18:29:30 +0000573
Nils Diewald83c9b162015-02-03 21:05:07 +0000574 // First tick for both spans
575 if (!(this.embeddedSpans.next() && this.wrapSpans.next())) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000576
Nils Diewald83c9b162015-02-03 21:05:07 +0000577 if (DEBUG)
578 log.trace("No spans initialized");
Nils Diewald6802acd2014-03-18 18:29:30 +0000579
Nils Diewald83c9b162015-02-03 21:05:07 +0000580 this.embeddedDoc = -1;
581 this.more = false;
582 return false;
583 };
584 this.more = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000585
Nils Diewald83c9b162015-02-03 21:05:07 +0000586 // Store current positions for wrapping and embedded spans
587 this.wrapDoc = this.wrapSpans.doc();
588 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000589
Nils Diewald83c9b162015-02-03 21:05:07 +0000590 // Set inSameDoc to true, if it is true
591 if (this.embeddedDoc == this.wrapDoc)
592 this.inSameDoc = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000593
Nils Diewald83c9b162015-02-03 21:05:07 +0000594 return true;
Nils Diewaldf399a672013-11-18 17:55:22 +0000595 };
596
597
598 /** Skips to the first match beyond the current, whose document number is
599 * greater than or equal to <i>target</i>. <p>Returns true iff there is such
600 * a match. <p>Behaves as if written: <pre class="prettyprint">
601 * boolean skipTo(int target) {
602 * do {
603 * if (!next())
604 * return false;
605 * } while (target > doc());
606 * return true;
607 * }
608 * </pre>
609 * Most implementations are considerably more efficient than that.
610 */
611 public boolean skipTo (int target) throws IOException {
Nils Diewald6802acd2014-03-18 18:29:30 +0000612
Nils Diewald83c9b162015-02-03 21:05:07 +0000613 if (DEBUG)
Nils Diewaldcd226862015-02-11 22:27:45 +0000614 log.trace(
615 "skipTo document {}/{} -> {}",
616 this.embeddedDoc,
617 this.wrapDoc,
618 target
619 );
Nils Diewaldf399a672013-11-18 17:55:22 +0000620
Nils Diewald83c9b162015-02-03 21:05:07 +0000621 // Initialize spans
622 if (!this.init())
623 return false;
Nils Diewald82a4b862014-02-20 21:17:41 +0000624
Nils Diewaldcd226862015-02-11 22:27:45 +0000625 assert target > this.embeddedDoc;
Nils Diewald82a4b862014-02-20 21:17:41 +0000626
Nils Diewald83c9b162015-02-03 21:05:07 +0000627 // Only forward embedded spans
628 if (this.more && (this.embeddedDoc < target)) {
629 if (this.embeddedSpans.skipTo(target)) {
630 this.inSameDoc = false;
631 this.embeddedStart = -1;
632 this.embeddedEnd = -1;
633 this.embeddedPayload = null;
634 this.embeddedDoc = this.embeddedSpans.doc();
635 }
Nils Diewaldf399a672013-11-18 17:55:22 +0000636
Nils Diewald83c9b162015-02-03 21:05:07 +0000637 // Can't be skipped to target
638 else {
639 this.inSameDoc = false;
640 this.more = false;
641 return false;
642 };
643 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000644
Nils Diewald83c9b162015-02-03 21:05:07 +0000645 // Move to same doc
646 return this.toSameDoc();
Nils Diewaldf399a672013-11-18 17:55:22 +0000647 };
648
Nils Diewald6802acd2014-03-18 18:29:30 +0000649 private void nextSpanA () {
Nils Diewald83c9b162015-02-03 21:05:07 +0000650 if (DEBUG)
651 log.trace("Try wrap next time");
652 this.tryMatch = false;
653 this.nextSpanB = false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000654 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000655
Nils Diewald6802acd2014-03-18 18:29:30 +0000656 private void nextSpanB () {
Nils Diewald83c9b162015-02-03 21:05:07 +0000657 if (DEBUG)
658 log.trace("Try embedded next time");
659 this.nextSpanB = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000660 };
661
662
663 // Check if the current span constellation does match
664 // Store backtracking relevant data and say, how to proceed
665 private boolean doesMatch () {
Nils Diewaldcd226862015-02-11 22:27:45 +0000666 if (DEBUG)
667 log.trace("In the match test branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000668
Nils Diewaldcd226862015-02-11 22:27:45 +0000669 if (this.wrapStart == -1)
670 this.wrapStart = this.wrapSpans.start();
Nils Diewald6802acd2014-03-18 18:29:30 +0000671
Nils Diewaldcd226862015-02-11 22:27:45 +0000672 if (this.embeddedStart == -1) {
673 this.embeddedStart = this.embeddedSpans.start();
674 this.embeddedEnd = this.embeddedSpans.end();
675 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000676
Nils Diewaldcd226862015-02-11 22:27:45 +0000677 this.wrapEnd = -1;
Nils Diewald6802acd2014-03-18 18:29:30 +0000678
Nils Diewaldcd226862015-02-11 22:27:45 +0000679 // Shortcut to prevent lazyloading of .end()
680 if (this.wrapStart > this.embeddedStart) {
681 // Can't match for in, rin, ew, sw, and m
682 // and will always lead to next_b
683 if (flag >= WITHIN) {
684 this.nextSpanB();
685 if (DEBUG)
686 _logCurrentCase((byte) 16);
687 return false;
688 };
689 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000690
Nils Diewaldcd226862015-02-11 22:27:45 +0000691 else if (this.wrapStart < this.embeddedStart) {
692 // Can't match for sw and m and will always
693 // lead to next_a
694 if (flag >= STARTSWITH) {
695 this.nextSpanA();
696 if (DEBUG)
697 _logCurrentCase((byte) 15);
698 return false;
699 };
700 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000701
Nils Diewaldcd226862015-02-11 22:27:45 +0000702 // Now check correctly
703 byte currentCase = this.withinCase();
Nils Diewald6802acd2014-03-18 18:29:30 +0000704
Nils Diewaldcd226862015-02-11 22:27:45 +0000705 if (DEBUG)
706 _logCurrentCase(currentCase);
Nils Diewald6802acd2014-03-18 18:29:30 +0000707
Nils Diewaldcd226862015-02-11 22:27:45 +0000708 boolean match = false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000709
Nils Diewaldcd226862015-02-11 22:27:45 +0000710 // Test case
711 if (currentCase >= (byte) 3 && currentCase <= (byte) 11) {
712 switch (flag) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000713
Nils Diewaldcd226862015-02-11 22:27:45 +0000714 case WITHIN:
715 if (currentCase >= 6 && currentCase <= 10 && currentCase != 8)
716 match = true;
717 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000718
Nils Diewaldcd226862015-02-11 22:27:45 +0000719 case REAL_WITHIN:
720 if (currentCase == 6 ||
721 currentCase == 9 ||
722 currentCase == 10)
723 match = true;
724 break;
725
726 case MATCH:
727 if (currentCase == 7)
728 match = true;
729 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000730
Nils Diewaldcd226862015-02-11 22:27:45 +0000731 case STARTSWITH:
732 if (currentCase == 7 ||
733 currentCase == 6)
734 match = true;
735 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000736
Nils Diewaldcd226862015-02-11 22:27:45 +0000737 case ENDSWITH:
738 if (currentCase == 7 ||
739 currentCase == 10)
740 match = true;
741 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000742
Nils Diewaldcd226862015-02-11 22:27:45 +0000743 case OVERLAP:
744 match = true;
745 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000746
Nils Diewaldcd226862015-02-11 22:27:45 +0000747 case REAL_OVERLAP:
748 if (currentCase == 3 ||
749 currentCase == 11)
750 match = true;
751 break;
752 };
753 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000754
Nils Diewaldcd226862015-02-11 22:27:45 +0000755 try {
756 this.todo(currentCase);
757 }
758 catch (IOException e) {
759 return false;
760 }
761 return match;
Nils Diewald6802acd2014-03-18 18:29:30 +0000762 };
763
764
765 private void _logCurrentCase (byte currentCase) {
Nils Diewaldcd226862015-02-11 22:27:45 +0000766 log.trace("Current Case is {}", currentCase);
Nils Diewald6802acd2014-03-18 18:29:30 +0000767
Nils Diewaldcd226862015-02-11 22:27:45 +0000768 String _e = _currentEmbedded().toString();
Nils Diewald6802acd2014-03-18 18:29:30 +0000769
Nils Diewaldcd226862015-02-11 22:27:45 +0000770 log.trace(" |---| {}", _currentWrap().toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000771
Nils Diewaldcd226862015-02-11 22:27:45 +0000772 switch (currentCase) {
773 case 1:
774 log.trace("|-| {}", _e);
775 break;
776 case 2:
777 log.trace("|---| {}", _e);
Nils Diewald6802acd2014-03-18 18:29:30 +0000778 break;
Nils Diewaldcd226862015-02-11 22:27:45 +0000779 case 3:
780 log.trace(" |---| {}", _e);
781 break;
782 case 4:
783 log.trace(" |-----| {}", _e);
784 break;
785 case 5:
786 log.trace(" |-------| {}", _e);
787 break;
788 case 6:
789 log.trace(" |-| {}", _e);
790 break;
791 case 7:
792 log.trace(" |---| {}", _e);
793 break;
794 case 8:
795 log.trace(" |-----| {}", _e);
796 break;
797 case 9:
798 log.trace(" |-| {}", _e);
799 break;
800 case 10:
801 log.trace(" |-| {}", _e);
802 break;
803 case 11:
804 log.trace(" |---| {}", _e);
805 break;
806 case 12:
807 log.trace(" |-| {}", _e);
808 break;
809 case 13:
810 log.trace(" |-| {}", _e);
811 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000812
Nils Diewaldcd226862015-02-11 22:27:45 +0000813 case 15:
814 // Fake case
815 log.trace(" |---? {}", _e);
816 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000817
Nils Diewaldcd226862015-02-11 22:27:45 +0000818 case 16:
819 // Fake case
820 log.trace(" |---? {}", _e);
821 break;
822 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000823 };
824
825
Nils Diewald41750bf2015-02-06 17:45:20 +0000826 private WithinSpan _currentWrap () {
827 WithinSpan _wrap = new WithinSpan();
Nils Diewald83c9b162015-02-03 21:05:07 +0000828 _wrap.start = this.wrapStart != -1 ? this.wrapStart : this.wrapSpans.start();
829 _wrap.end = this.wrapEnd != -1 ? this.wrapEnd : this.wrapSpans.end();
830 _wrap.doc = this.wrapDoc != -1 ? this.wrapDoc : this.wrapSpans.doc();
831 return _wrap;
Nils Diewald6802acd2014-03-18 18:29:30 +0000832 };
833
Nils Diewald41750bf2015-02-06 17:45:20 +0000834 private WithinSpan _currentEmbedded () {
835 WithinSpan _embedded = new WithinSpan();
Nils Diewald83c9b162015-02-03 21:05:07 +0000836 _embedded.start = this.embeddedStart != -1 ?
837 this.embeddedStart : this.embeddedSpans.start();
838 _embedded.end = this.embeddedEnd != -1 ?
839 this.embeddedEnd : this.embeddedSpans.end();
840 _embedded.doc = this.embeddedDoc != -1 ?
841 this.embeddedDoc : this.embeddedSpans.doc();
842 return _embedded;
Nils Diewald6802acd2014-03-18 18:29:30 +0000843 };
844
845
846 private void todo (byte currentCase) throws IOException {
847 /*
848 Check what to do next with the spans.
849
850 The different follow up steps are:
851 - storeEmbedded -> store span B for later checks
852 - nextSpanA -> forward a
853 - nextSpanB -> forward b
854
855 These rules were automatically generated
856 */
857
858 // Case 1, 2
859 if (currentCase <= (byte) 2) {
860 this.nextSpanB();
861 }
862
863 // Case 12, 13
864 else if (currentCase >= (byte) 12) {
865 this.storeEmbedded();
866 this.nextSpanA();
867 }
868
869 // Case 3, 4, 5, 8
870 else if (currentCase <= (byte) 5 ||
Nils Diewald83c9b162015-02-03 21:05:07 +0000871 currentCase == (byte) 8) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000872 if (flag <= 2)
Nils Diewald83c9b162015-02-03 21:05:07 +0000873 this.storeEmbedded();
Nils Diewald6802acd2014-03-18 18:29:30 +0000874 this.nextSpanB();
875 }
876
877 // Case 11
878 else if (currentCase == (byte) 11) {
879 if (this.flag == REAL_WITHIN) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000880 this.nextSpanB();
Nils Diewald6802acd2014-03-18 18:29:30 +0000881 }
882 else if (this.flag >= STARTSWITH) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000883 this.nextSpanA();
Nils Diewald6802acd2014-03-18 18:29:30 +0000884 }
885 else {
Nils Diewald83c9b162015-02-03 21:05:07 +0000886 this.storeEmbedded();
887 this.nextSpanB();
Nils Diewald6802acd2014-03-18 18:29:30 +0000888 };
889 }
890
891
892 // Case 6, 7, 9, 10
893 else {
894
895 if (
896 // Case 6
897 (currentCase == (byte) 6 && this.flag == MATCH) ||
898
899 // Case 7
900 (currentCase == (byte) 7 && this.flag == REAL_WITHIN) ||
901
902 // Case 9, 10
903 (currentCase >= (byte) 9 && this.flag >= STARTSWITH)) {
904
905 this.nextSpanA();
906 }
907 else {
908 this.storeEmbedded();
909 this.nextSpanB();
910 };
911 };
912 };
913
Nils Diewald83c9b162015-02-03 21:05:07 +0000914 // Store the current embedded span in the first spanStore
Nils Diewald6802acd2014-03-18 18:29:30 +0000915 private void storeEmbedded () throws IOException {
916
Nils Diewald83c9b162015-02-03 21:05:07 +0000917 // Create a current copy
Nils Diewald41750bf2015-02-06 17:45:20 +0000918 WithinSpan embedded = new WithinSpan();
Nils Diewald83c9b162015-02-03 21:05:07 +0000919 embedded.start = this.embeddedStart != -1 ?
920 this.embeddedStart : this.embeddedSpans.start();
921 embedded.end = this.embeddedEnd != -1 ?
922 this.embeddedEnd : this.embeddedSpans.end();
923 embedded.doc = this.embeddedDoc;
Nils Diewald6802acd2014-03-18 18:29:30 +0000924
Nils Diewald83c9b162015-02-03 21:05:07 +0000925 // Copy payloads
926 if (this.embeddedPayload != null) {
927 embedded.payload = new ArrayList<byte[]>(this.embeddedPayload.size());
928 embedded.payload.addAll(this.embeddedPayload);
929 }
930 else if (this.embeddedSpans.isPayloadAvailable()) {
931 embedded.payload = new ArrayList<byte[]>(3);
932 Collection<byte[]> payload = this.embeddedSpans.getPayload();
933
934 this.embeddedPayload = new ArrayList<byte[]>(payload.size());
935 this.embeddedPayload.addAll(payload);
936 embedded.payload.addAll(payload);
937 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000938
Nils Diewald83c9b162015-02-03 21:05:07 +0000939 this.spanStore1.add(embedded);
Nils Diewald6802acd2014-03-18 18:29:30 +0000940
Nils Diewald83c9b162015-02-03 21:05:07 +0000941 if (DEBUG)
942 log.trace("Pushed to spanStore 1 {} (in storeEmbedded)", embedded.toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000943 };
944
945
946 // Return case number
947 private byte withinCase () {
948
Nils Diewaldcd226862015-02-11 22:27:45 +0000949 // case 1-5
950 if (this.wrapStart > this.embeddedStart) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000951
Nils Diewaldcd226862015-02-11 22:27:45 +0000952 // Case 1
953 // |-|
954 // |-|
955 if (this.wrapStart > this.embeddedEnd) {
956 return (byte) 1;
957 }
958
959 // Case 2
960 // |-|
961 // |-|
962 else if (this.wrapStart == this.embeddedEnd) {
963 return (byte) 2;
964 };
965
966 // Load wrapEnd
967 this.wrapEnd = this.wrapSpans.end();
968
969 // Case 3
970 // |---|
971 // |---|
972 if (this.wrapEnd > this.embeddedEnd) {
973 return (byte) 3;
974 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000975
Nils Diewaldcd226862015-02-11 22:27:45 +0000976 // Case 4
977 // |-|
978 // |---|
979 else if (this.wrapEnd == this.embeddedEnd) {
980 return (byte) 4;
981 };
982
983 // Case 5
984 // |-|
985 // |---|
986 return (byte) 5;
987 }
988
989 // case 6-8
990 else if (this.wrapStart == this.embeddedStart) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000991
Nils Diewaldcd226862015-02-11 22:27:45 +0000992 // Load wrapEnd
993 this.wrapEnd = this.wrapSpans.end();
Nils Diewald6802acd2014-03-18 18:29:30 +0000994
Nils Diewaldcd226862015-02-11 22:27:45 +0000995 // Case 6
996 // |---|
997 // |-|
998 if (this.wrapEnd > this.embeddedEnd) {
999 return (byte) 6;
1000 }
Nils Diewald6802acd2014-03-18 18:29:30 +00001001
Nils Diewaldcd226862015-02-11 22:27:45 +00001002 // Case 7
1003 // |---|
1004 // |---|
1005 else if (this.wrapEnd == this.embeddedEnd) {
1006 return (byte) 7;
1007 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001008
Nils Diewaldcd226862015-02-11 22:27:45 +00001009 // Case 8
1010 // |-|
1011 // |---|
1012 return (byte) 8;
1013 }
1014
1015 // wrapStart < embeddedStart
Nils Diewaldf399a672013-11-18 17:55:22 +00001016
Nils Diewaldcd226862015-02-11 22:27:45 +00001017 // Load wrapEnd
1018 this.wrapEnd = this.wrapSpans.end();
Nils Diewald6802acd2014-03-18 18:29:30 +00001019
Nils Diewaldcd226862015-02-11 22:27:45 +00001020 // Case 13
1021 // |-|
1022 // |-|
1023 if (this.wrapEnd < this.embeddedStart) {
1024 return (byte) 13;
1025 }
1026
1027 // Case 9
1028 // |---|
1029 // |-|
1030 else if (this.wrapEnd > this.embeddedEnd) {
1031 return (byte) 9;
1032 }
1033
1034 // Case 10
1035 // |---|
1036 // |-|
1037 else if (this.wrapEnd == this.embeddedEnd) {
1038 return (byte) 10;
1039 }
1040
1041 // Case 11
1042 // |---|
1043 // |---|
1044 else if (this.wrapEnd > this.embeddedStart) {
1045 return (byte) 11;
1046 }
Nils Diewald82a4b862014-02-20 21:17:41 +00001047
Nils Diewaldcd226862015-02-11 22:27:45 +00001048 // case 12
1049 // |-|
1050 // |-|
1051 return (byte) 12;
Nils Diewald6802acd2014-03-18 18:29:30 +00001052 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001053
1054
1055 /** Returns the document number of the current match. Initially invalid. */
1056 @Override
1057 public int doc () {
Nils Diewaldcd226862015-02-11 22:27:45 +00001058 return matchDoc;
Nils Diewaldf399a672013-11-18 17:55:22 +00001059 };
1060
Nils Diewald6802acd2014-03-18 18:29:30 +00001061
Nils Diewaldf399a672013-11-18 17:55:22 +00001062 /** Returns the start position of the embedding wrap. Initially invalid. */
1063 @Override
1064 public int start () {
Nils Diewaldcd226862015-02-11 22:27:45 +00001065 return matchStart;
Nils Diewaldf399a672013-11-18 17:55:22 +00001066 };
1067
Nils Diewald6802acd2014-03-18 18:29:30 +00001068
Nils Diewaldf399a672013-11-18 17:55:22 +00001069 /** Returns the end position of the embedding wrap. Initially invalid. */
1070 @Override
1071 public int end () {
1072 return matchEnd;
1073 };
1074
Nils Diewald6802acd2014-03-18 18:29:30 +00001075
Nils Diewaldf399a672013-11-18 17:55:22 +00001076 /**
1077 * Returns the payload data for the current span.
1078 * This is invalid until {@link #next()} is called for
1079 * the first time.
1080 * This method must not be called more than once after each call
1081 * of {@link #next()}. However, most payloads are loaded lazily,
1082 * so if the payload data for the current position is not needed,
1083 * this method may not be called at all for performance reasons. An ordered
1084 * SpanQuery does not lazy load, so if you have payloads in your index and
1085 * you do not want ordered SpanNearQuerys to collect payloads, you can
Nils Diewalde0725012014-09-25 19:32:52 +00001086 * disable collection with a constructor option.<br>
Nils Diewaldf399a672013-11-18 17:55:22 +00001087 * <br>
Nils Diewalde0725012014-09-25 19:32:52 +00001088 * Note that the return type is a collection, thus the ordering should not be relied upon.
Nils Diewaldf399a672013-11-18 17:55:22 +00001089 * <br/>
1090 * @lucene.experimental
1091 *
1092 * @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false
1093 * @throws IOException if there is a low-level I/O error
1094 */
1095 // public abstract Collection<byte[]> getPayload() throws IOException;
1096 @Override
1097 public Collection<byte[]> getPayload() throws IOException {
1098 return matchPayload;
1099 };
1100
1101
1102 /**
1103 * Checks if a payload can be loaded at this position.
1104 * <p/>
1105 * Payloads can only be loaded once per call to
1106 * {@link #next()}.
1107 *
1108 * @return true if there is a payload available at this position that can be loaded
1109 */
1110 @Override
1111 public boolean isPayloadAvailable() {
1112 return matchPayload.isEmpty() == false;
1113 };
1114
Nils Diewald6802acd2014-03-18 18:29:30 +00001115
Nils Diewaldf399a672013-11-18 17:55:22 +00001116 // Todo: This may be in the wrong version
1117 @Override
1118 public long cost() {
Nils Diewald6802acd2014-03-18 18:29:30 +00001119 return wrapSpans.cost() + embeddedSpans.cost();
Nils Diewaldf399a672013-11-18 17:55:22 +00001120 };
1121
Nils Diewald6802acd2014-03-18 18:29:30 +00001122
Nils Diewaldf399a672013-11-18 17:55:22 +00001123 @Override
1124 public String toString() {
1125 return getClass().getName() + "("+query.toString()+")@"+
Nils Diewald6802acd2014-03-18 18:29:30 +00001126 (embeddedDoc <= 0?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
Nils Diewaldf399a672013-11-18 17:55:22 +00001127 };
Nils Diewald41750bf2015-02-06 17:45:20 +00001128
1129
1130 // This was formerly the default candidate span class,
1131 // before it was refactored out
1132 private class WithinSpan implements Comparable<WithinSpan>, Cloneable {
1133 public int
1134 start = -1,
1135 end = -1,
1136 doc = -1;
1137
1138 public Collection<byte[]> payload;
1139
1140 public short elementRef = -1;
1141
1142 public void clear () {
1143 this.start = -1;
1144 this.end = -1;
1145 this.doc = -1;
1146 clearPayload();
1147 };
1148
1149 @Override
1150 public int compareTo (WithinSpan o) {
1151 /* optimizable for short numbers to return o.end - this.end */
1152 if (this.doc < o.doc) {
1153 return -1;
1154 }
1155 else if (this.doc == o.doc) {
1156 if (this.start < o.start) {
1157 return -1;
1158 }
1159 else if (this.start == o.start) {
1160 if (this.end < o.end)
1161 return -1;
1162 };
1163 };
1164 return 1;
1165 };
1166
1167 public short getElementRef() {
1168 return elementRef;
1169 }
1170
1171 public void setElementRef(short elementRef) {
1172 this.elementRef = elementRef;
1173 };
1174
1175 @Override
1176 public Object clone() {
1177 WithinSpan span = new WithinSpan();
1178 span.start = this.start;
1179 span.end = this.end;
1180 span.doc = this.doc;
1181 span.payload.addAll(this.payload);
1182 return span;
1183 };
1184
1185 public WithinSpan copyFrom (WithinSpan o) {
1186 this.start = o.start;
1187 this.end = o.end;
1188 this.doc = o.doc;
1189 // this.clearPayload();
1190 this.payload.addAll(o.payload);
1191 return this;
1192 };
1193
1194 public void clearPayload () {
1195 if (this.payload != null)
1196 this.payload.clear();
1197 };
1198
1199 public String toString () {
1200 StringBuilder sb = new StringBuilder("[");
1201 return sb.append(this.start).append('-')
1202 .append(this.end)
1203 .append('(').append(this.doc).append(')')
1204 .append(']')
1205 .toString();
1206 };
1207 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001208};