blob: 0cfb938b2daa62e77803eeed4238eb9fa304ceb6 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap.query.spans;
2
Eliza Margaretha22898982014-11-04 17:10:21 +00003import java.io.IOException;
4import java.util.ArrayList;
5import java.util.Collection;
6import java.util.LinkedList;
7import java.util.Map;
8
Akron700c1eb2015-09-25 16:57:30 +02009import org.apache.lucene.index.LeafReaderContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000010import org.apache.lucene.index.Term;
11import org.apache.lucene.index.TermContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000012import org.apache.lucene.search.DocIdSetIterator;
Eliza Margaretha22898982014-11-04 17:10:21 +000013import org.apache.lucene.search.spans.Spans;
14import org.apache.lucene.util.Bits;
Nils Diewaldf399a672013-11-18 17:55:22 +000015import org.slf4j.Logger;
16import org.slf4j.LoggerFactory;
17
Eliza Margaretha22898982014-11-04 17:10:21 +000018import de.ids_mannheim.korap.query.SpanWithinQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000019
Nils Diewald83c9b162015-02-03 21:05:07 +000020
21/**
22 * SpanWithinQuery is DEPRECATED and will
23 * be replaced by SpanPositionQuery in the near future
Nils Diewaldcb8afb32015-02-04 21:12:37 +000024 *
25 * TODO: Support exclusivity
26 * TODO: Use the term "queue" and implement it similar to SpanOrQuery
Akrona7b936d2016-03-04 13:40:54 +010027 * TODO: Implement a incrStartPos() method to forward an embedded span
28 * until the start position is higher than the current start position.
Nils Diewald83c9b162015-02-03 21:05:07 +000029 */
30
Nils Diewald82a4b862014-02-20 21:17:41 +000031/**
Nils Diewald1455e1e2014-08-01 16:12:43 +000032 * Compare two spans and check how they relate positionally.
Nils Diewaldbb33da22015-03-04 16:24:25 +000033 *
Nils Diewald44d5fa12015-01-15 21:31:52 +000034 * @author diewald
Nils Diewald82a4b862014-02-20 21:17:41 +000035 */
Nils Diewaldf399a672013-11-18 17:55:22 +000036public class WithinSpans extends Spans {
Nils Diewald82a4b862014-02-20 21:17:41 +000037
Nils Diewald6802acd2014-03-18 18:29:30 +000038 // Logger
39 private final Logger log = LoggerFactory.getLogger(WithinSpans.class);
Nils Diewald1455e1e2014-08-01 16:12:43 +000040
Nils Diewald82a4b862014-02-20 21:17:41 +000041 // This advices the java compiler to ignore all loggings
42 public static final boolean DEBUG = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +000043
Nils Diewaldf399a672013-11-18 17:55:22 +000044 private boolean more = false;
45
Nils Diewald6802acd2014-03-18 18:29:30 +000046 // Boolean value indicating if span B
47 // should be forwarded next (true)
48 // or span A (false);
49 boolean nextSpanB = true;
Nils Diewald82a4b862014-02-20 21:17:41 +000050
Nils Diewaldbb33da22015-03-04 16:24:25 +000051 private int wrapStart = -1, wrapEnd = -1, embeddedStart = -1,
52 embeddedEnd = -1, wrapDoc = -1, embeddedDoc = -1, matchDoc = -1,
53 matchStart = -1, matchEnd = -1;
54
Nils Diewald6802acd2014-03-18 18:29:30 +000055 private Collection<byte[]> matchPayload;
Nils Diewaldf399a672013-11-18 17:55:22 +000056 private Collection<byte[]> embeddedPayload;
Nils Diewaldbb33da22015-03-04 16:24:25 +000057
Nils Diewald6802acd2014-03-18 18:29:30 +000058 // Indicates that the wrap and the embedded spans are in the same doc
59 private boolean inSameDoc = false;
Nils Diewaldf399a672013-11-18 17:55:22 +000060
Nils Diewald6802acd2014-03-18 18:29:30 +000061 /*
62 Supported flags are currently:
Nils Diewaldcb8afb32015-02-04 21:12:37 +000063 ov -> 0 | overlap: A & B != empty
64 rov -> 2 | real overlap: A & B != empty and
65 ((A | B) != A or
Nils Diewald6802acd2014-03-18 18:29:30 +000066 (A | B) != B)
Nils Diewaldcb8afb32015-02-04 21:12:37 +000067 in -> 4 | within: A | B = A
68 rin -> 6 | real within: A | B = A and
69 A & B != A
70 ew -> 8 | endswith: A | B = A and
71 A.start = B.start
72 sw -> 10 | startswith: A | B = A and
73 A.end = B.end
74 m -> 12 | A = B
75 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000076 public static final byte OVERLAP = (byte) 0, REAL_OVERLAP = (byte) 2,
77 WITHIN = (byte) 4, REAL_WITHIN = (byte) 6, ENDSWITH = (byte) 8,
78 STARTSWITH = (byte) 10, MATCH = (byte) 12;
Nils Diewaldf399a672013-11-18 17:55:22 +000079
Nils Diewald6802acd2014-03-18 18:29:30 +000080 private byte flag;
Nils Diewaldf399a672013-11-18 17:55:22 +000081
Nils Diewald1455e1e2014-08-01 16:12:43 +000082 // Contains the query
Nils Diewaldf399a672013-11-18 17:55:22 +000083 private SpanWithinQuery query;
84
Nils Diewaldcb8afb32015-02-04 21:12:37 +000085 // Representing the first operand
86 private final Spans wrapSpans;
Nils Diewaldf399a672013-11-18 17:55:22 +000087
Nils Diewaldcb8afb32015-02-04 21:12:37 +000088 // Representing the second operand
89 private final Spans embeddedSpans;
90
91 // Check flag if the current constellation
92 // was checked yet
Nils Diewald6802acd2014-03-18 18:29:30 +000093 private boolean tryMatch = true;
Nils Diewaldf399a672013-11-18 17:55:22 +000094
Nils Diewald01ff7af2015-02-04 22:54:26 +000095 // Two buffers for storing candidates
Nils Diewaldbb33da22015-03-04 16:24:25 +000096 private LinkedList<WithinSpan> spanStore1, spanStore2;
97
Nils Diewaldf399a672013-11-18 17:55:22 +000098
Nils Diewald01ff7af2015-02-04 22:54:26 +000099 /**
100 * Construct a new WithinSpans object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000101 *
102 * @param spanWithinQuery
103 * The parental {@link SpanWithinQuery}.
104 * @param context
Akron700c1eb2015-09-25 16:57:30 +0200105 * The {@link LeafReaderContext}.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000106 * @param acceptDocs
107 * Bit vector representing the documents
108 * to be searched in.
109 * @param termContexts
110 * A map managing {@link TermState TermStates}.
111 * @param flag
112 * A byte flag indicating the positional condition of
113 * the sub spans.
Nils Diewald01ff7af2015-02-04 22:54:26 +0000114 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000115 public WithinSpans (SpanWithinQuery spanWithinQuery,
Akron700c1eb2015-09-25 16:57:30 +0200116 LeafReaderContext context, Bits acceptDocs,
Nils Diewaldbb33da22015-03-04 16:24:25 +0000117 Map<Term, TermContext> termContexts, byte flag)
118 throws IOException {
Nils Diewaldf399a672013-11-18 17:55:22 +0000119
Nils Diewald83c9b162015-02-03 21:05:07 +0000120 if (DEBUG)
121 log.trace("Construct WithinSpans");
Nils Diewaldf399a672013-11-18 17:55:22 +0000122
Nils Diewald83c9b162015-02-03 21:05:07 +0000123 // Init copies
124 this.matchPayload = new LinkedList<byte[]>();
Nils Diewaldf399a672013-11-18 17:55:22 +0000125
Nils Diewald83c9b162015-02-03 21:05:07 +0000126 // Get spans
Nils Diewaldbb33da22015-03-04 16:24:25 +0000127 this.wrapSpans = spanWithinQuery.wrap().getSpans(context, acceptDocs,
128 termContexts);
129 this.embeddedSpans = spanWithinQuery.embedded().getSpans(context,
130 acceptDocs, termContexts);
Nils Diewaldf399a672013-11-18 17:55:22 +0000131
Nils Diewald83c9b162015-02-03 21:05:07 +0000132 this.flag = flag;
Nils Diewaldf399a672013-11-18 17:55:22 +0000133
Nils Diewald83c9b162015-02-03 21:05:07 +0000134 // SpanStores for backtracking
Nils Diewald41750bf2015-02-06 17:45:20 +0000135 this.spanStore1 = new LinkedList<WithinSpan>();
136 this.spanStore2 = new LinkedList<WithinSpan>();
Nils Diewald6802acd2014-03-18 18:29:30 +0000137
Nils Diewald83c9b162015-02-03 21:05:07 +0000138 // kept for toString() only.
139 this.query = spanWithinQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +0000140 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000141
Nils Diewaldf399a672013-11-18 17:55:22 +0000142
Nils Diewald6802acd2014-03-18 18:29:30 +0000143 // Move to next match, returning true iff any such exists.
Nils Diewaldf399a672013-11-18 17:55:22 +0000144 @Override
145 public boolean next () throws IOException {
Nils Diewald6802acd2014-03-18 18:29:30 +0000146
Nils Diewald83c9b162015-02-03 21:05:07 +0000147 if (DEBUG)
148 log.trace("Next with docs {}, {}", wrapDoc, embeddedDoc);
Nils Diewaldf399a672013-11-18 17:55:22 +0000149
Nils Diewald83c9b162015-02-03 21:05:07 +0000150 // Initialize spans
151 if (!this.init()) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000152 this.more = false;
153 this.inSameDoc = false;
154 this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000155 this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000156 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000157 return false;
158 };
Nils Diewald82a4b862014-02-20 21:17:41 +0000159
Nils Diewald83c9b162015-02-03 21:05:07 +0000160 // There are more spans and they are in the same document
Nils Diewald6802acd2014-03-18 18:29:30 +0000161
Nils Diewald83c9b162015-02-03 21:05:07 +0000162 while (this.more && (wrapDoc == embeddedDoc ||
Nils Diewaldbb33da22015-03-04 16:24:25 +0000163 // this.inSameDoc ||
164 this.toSameDoc())) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000165 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000166 log.trace("We are in the same doc: {}, {}", wrapDoc,
167 embeddedDoc);
Nils Diewald6802acd2014-03-18 18:29:30 +0000168
Nils Diewald83c9b162015-02-03 21:05:07 +0000169 // Both spans match according to the flag
170 // Silently the next operations are prepared
171 if (this.tryMatch && this.doesMatch()) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000172
Nils Diewald83c9b162015-02-03 21:05:07 +0000173 if (this.wrapEnd == -1)
174 this.wrapEnd = this.wrapSpans.end();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000175
176 this.matchStart = embeddedStart < wrapStart ? embeddedStart
177 : wrapStart;
178 this.matchEnd = embeddedEnd > wrapEnd ? embeddedEnd : wrapEnd;
179 this.matchDoc = embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000180 this.matchPayload.clear();
Nils Diewald82a4b862014-02-20 21:17:41 +0000181
Nils Diewald83c9b162015-02-03 21:05:07 +0000182 if (this.embeddedPayload != null)
183 matchPayload.addAll(embeddedPayload);
Nils Diewaldf399a672013-11-18 17:55:22 +0000184
Nils Diewald83c9b162015-02-03 21:05:07 +0000185 if (this.wrapSpans.isPayloadAvailable())
186 this.matchPayload.addAll(wrapSpans.getPayload());
Nils Diewaldf399a672013-11-18 17:55:22 +0000187
Nils Diewald83c9b162015-02-03 21:05:07 +0000188 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000189 log.trace(" ---- MATCH ---- {}-{} ({})", matchStart,
190 matchEnd, matchDoc);
Nils Diewald6802acd2014-03-18 18:29:30 +0000191
Nils Diewald83c9b162015-02-03 21:05:07 +0000192 this.tryMatch = false;
193 return true;
194 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000195
Nils Diewald83c9b162015-02-03 21:05:07 +0000196 // Get next embedded
197 else if (this.nextSpanB) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000198
Nils Diewald83c9b162015-02-03 21:05:07 +0000199 // Next time try the match
200 this.tryMatch = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000201
Nils Diewald83c9b162015-02-03 21:05:07 +0000202 if (DEBUG)
203 log.trace("In the next embedded branch");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000204
Nils Diewald41750bf2015-02-06 17:45:20 +0000205 WithinSpan current = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000206
Nils Diewald83c9b162015-02-03 21:05:07 +0000207 // New - fetch until theres a span in the correct doc or bigger
208 while (!this.spanStore2.isEmpty()) {
209 current = spanStore2.removeFirst();
210 if (current.doc >= this.wrapDoc)
211 break;
212 };
213
214
215 // There is nothing in the second store
216 if (current == null) {
217 if (DEBUG)
218 log.trace("SpanStore 2 is empty");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000219
Nils Diewald83c9b162015-02-03 21:05:07 +0000220 // Forward with embedding
221 if (!this.embeddedSpans.next()) {
222 this.nextSpanA();
223 continue;
224 }
225
226 else if (DEBUG) {
227 log.trace("Fetch next embedded span");
228 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000229
Nils Diewaldcd226862015-02-11 22:27:45 +0000230 this.embeddedStart = this.embeddedSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000231 this.embeddedEnd = -1;
232 this.embeddedPayload = null;
233 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000234
Nils Diewald83c9b162015-02-03 21:05:07 +0000235 if (this.embeddedDoc != this.wrapDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000236
Nils Diewald83c9b162015-02-03 21:05:07 +0000237 if (DEBUG) {
238 log.trace("Embedded span is in a new document {}",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000239 _currentEmbedded().toString());
Nils Diewald83c9b162015-02-03 21:05:07 +0000240 log.trace("Reset current embedded doc");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000241 };
242
Nils Diewald83c9b162015-02-03 21:05:07 +0000243 /*
244 if (DEBUG)
245 log.trace("Clear all span stores");
246 this.spanStore1.clear();
247 this.spanStore2.clear();
248 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000249
Nils Diewald83c9b162015-02-03 21:05:07 +0000250 this.storeEmbedded();
251
252 // That is necessary to backtrack to the last document!
253 this.inSameDoc = true;
254 this.embeddedDoc = wrapDoc;
255 // this.tryMatch = false; // already covered in nextSpanA
256
257 this.nextSpanA();
258 continue;
259 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000260
Nils Diewald83c9b162015-02-03 21:05:07 +0000261 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000262 log.trace(" Forward embedded span to {}",
263 _currentEmbedded().toString());
264
Nils Diewald83c9b162015-02-03 21:05:07 +0000265 if (this.embeddedDoc != this.wrapDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000266
Nils Diewald83c9b162015-02-03 21:05:07 +0000267 // Is this always a good idea?
268 /*
269 this.spanStore1.clear();
270 this.spanStore2.clear();
271 */
272
273 this.embeddedStart = -1;
274 this.embeddedEnd = -1;
275 this.embeddedPayload = null;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000276
Nils Diewald83c9b162015-02-03 21:05:07 +0000277 if (!this.toSameDoc()) {
278 this.more = false;
279 this.inSameDoc = false;
280 return false;
281 };
282 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000283
Nils Diewald83c9b162015-02-03 21:05:07 +0000284 this.more = true;
285 this.inSameDoc = true;
286 this.tryMatch = true;
Akrona7b936d2016-03-04 13:40:54 +0100287
Nils Diewald83c9b162015-02-03 21:05:07 +0000288 this.nextSpanB();
289 continue;
290 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000291
Nils Diewald83c9b162015-02-03 21:05:07 +0000292 // Fetch from second store?
293 else {
Akrona7b936d2016-03-04 13:40:54 +0100294
Nils Diewald83c9b162015-02-03 21:05:07 +0000295 /** TODO: Change this to a single embedded object! */
296 this.embeddedStart = current.start;
297 this.embeddedEnd = current.end;
298 this.embeddedDoc = current.doc;
Nils Diewald6802acd2014-03-18 18:29:30 +0000299
Nils Diewald83c9b162015-02-03 21:05:07 +0000300 if (current.payload != null) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000301 this.embeddedPayload = new ArrayList<byte[]>(
302 current.payload.size());
Nils Diewald83c9b162015-02-03 21:05:07 +0000303 this.embeddedPayload.addAll(current.payload);
304 }
305 else {
306 this.embeddedPayload = null;
307 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000308
Nils Diewald83c9b162015-02-03 21:05:07 +0000309 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000310 log.trace("Fetch current from SpanStore 2: {}",
311 current.toString());
312
Nils Diewald83c9b162015-02-03 21:05:07 +0000313 this.tryMatch = true;
314 };
315 continue;
316 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000317
Nils Diewald83c9b162015-02-03 21:05:07 +0000318 // get next wrap
319 if (DEBUG)
320 log.trace("In the next wrap branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000321
Nils Diewald83c9b162015-02-03 21:05:07 +0000322 this.tryMatch = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000323
Nils Diewald83c9b162015-02-03 21:05:07 +0000324 if (DEBUG)
325 log.trace("Try next wrap");
Nils Diewald6802acd2014-03-18 18:29:30 +0000326
Nils Diewald83c9b162015-02-03 21:05:07 +0000327 // shift the stored spans
328 if (!this.spanStore1.isEmpty()) {
329 if (DEBUG) {
330 log.trace("Move everything from SpanStore 1 to SpanStore 2:");
Nils Diewald41750bf2015-02-06 17:45:20 +0000331 for (WithinSpan i : this.spanStore1) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000332 log.trace(" | {}", i.toString());
333 };
334 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000335
Nils Diewald83c9b162015-02-03 21:05:07 +0000336 // Move everything to spanStore2
Nils Diewaldbb33da22015-03-04 16:24:25 +0000337 this.spanStore2.addAll(0,
338 (LinkedList<WithinSpan>) this.spanStore1.clone());
Nils Diewald83c9b162015-02-03 21:05:07 +0000339 this.spanStore1.clear();
Nils Diewald6802acd2014-03-18 18:29:30 +0000340
Nils Diewald83c9b162015-02-03 21:05:07 +0000341 if (DEBUG) {
342 log.trace("SpanStore 2 now is:");
Nils Diewald41750bf2015-02-06 17:45:20 +0000343 for (WithinSpan i : this.spanStore2) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000344 log.trace(" | {}", i.toString());
345 };
346 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000347
Nils Diewald83c9b162015-02-03 21:05:07 +0000348 }
349 else if (DEBUG) {
350 log.trace("spanStore 1 is empty");
351 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000352
Nils Diewald83c9b162015-02-03 21:05:07 +0000353 // Get next wrap
354 if (this.wrapSpans.next()) {
Nils Diewald82a4b862014-02-20 21:17:41 +0000355
Nils Diewald83c9b162015-02-03 21:05:07 +0000356 // Reset wrapping information
Nils Diewaldcd226862015-02-11 22:27:45 +0000357 this.wrapStart = this.wrapSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000358 this.wrapEnd = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000359
Nils Diewald83c9b162015-02-03 21:05:07 +0000360 // Retrieve doc information
361 this.wrapDoc = this.wrapSpans.doc();
Nils Diewald20607ab2014-03-20 23:28:36 +0000362
Nils Diewald83c9b162015-02-03 21:05:07 +0000363 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000364 log.trace(" Forward wrap span to {}", _currentWrap()
365 .toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000366
Nils Diewald83c9b162015-02-03 21:05:07 +0000367 if (this.embeddedDoc != this.wrapDoc) {
368 if (DEBUG)
369 log.trace("Delete all span stores");
370 this.spanStore1.clear();
371 this.spanStore2.clear();
Nils Diewald6802acd2014-03-18 18:29:30 +0000372
Nils Diewald83c9b162015-02-03 21:05:07 +0000373 // Reset embedded:
374 this.embeddedStart = -1;
375 this.embeddedEnd = -1;
376 this.embeddedPayload = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000377
Nils Diewald83c9b162015-02-03 21:05:07 +0000378 if (!this.toSameDoc()) {
379 this.inSameDoc = false;
380 this.more = false;
381 return false;
382 };
383 }
384 else {
385 this.inSameDoc = true;
386 // Do not match with the current state
387 this.tryMatch = false;
388 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000389
Nils Diewald83c9b162015-02-03 21:05:07 +0000390 this.nextSpanB();
391 continue;
392 }
393 this.more = false;
394 this.inSameDoc = false;
395 this.spanStore1.clear();
396 this.spanStore2.clear();
397 return false;
398 };
Nils Diewald82a4b862014-02-20 21:17:41 +0000399
Nils Diewald83c9b162015-02-03 21:05:07 +0000400 // No more matches
401 return false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000402 };
403
404
405 /**
406 * Skip to the next document
407 */
408 private boolean toSameDoc () throws IOException {
Nils Diewald82a4b862014-02-20 21:17:41 +0000409
Nils Diewald83c9b162015-02-03 21:05:07 +0000410 if (DEBUG)
411 log.trace("Forward to find same docs");
Nils Diewald6802acd2014-03-18 18:29:30 +0000412
Nils Diewald83c9b162015-02-03 21:05:07 +0000413 /*
414 if (this.embeddedSpans == null) {
415 this.more = false;
416 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
417 this.inSameDoc = false;
418 return false;
419 };
Akron40f51ee2016-04-22 17:55:14 +0200420*/
Nils Diewald6802acd2014-03-18 18:29:30 +0000421
Nils Diewald83c9b162015-02-03 21:05:07 +0000422 this.more = true;
423 this.inSameDoc = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000424
Nils Diewaldbb33da22015-03-04 16:24:25 +0000425 this.wrapDoc = this.wrapSpans.doc();
Akron40f51ee2016-04-22 17:55:14 +0200426
427 // Last doc was reached
428 if (this.wrapDoc == DocIdSetIterator.NO_MORE_DOCS) {
429 this.more = false;
430 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
431 this.inSameDoc = false;
432 return false;
433 };
434
435 // This is just a workaround for an issue that seems to be a bug in Lucene's core code.
436 try {
437 this.embeddedDoc = this.embeddedSpans.doc();
438 }
439 catch (NullPointerException e) {
440 this.more = false;
441 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
442 this.inSameDoc = false;
443 return false;
444 };
445
Nils Diewald6802acd2014-03-18 18:29:30 +0000446
Nils Diewald83c9b162015-02-03 21:05:07 +0000447 // Clear all spanStores
448 if (this.wrapDoc != this.embeddedDoc) {
449 /*
450 if (DEBUG)
451 log.trace("Clear all spanStores when moving forward");
452 // Why??
453 this.spanStore1.clear();
454 this.spanStore2.clear();
455 */
456 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000457
Nils Diewald83c9b162015-02-03 21:05:07 +0000458 else {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000459 if (DEBUG) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000460 log.trace("Current position already is in the same doc");
461 log.trace("Embedded: {}", _currentEmbedded().toString());
462 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000463 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000464 return true;
465 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000466
Nils Diewaldcd226862015-02-11 22:27:45 +0000467
Nils Diewald83c9b162015-02-03 21:05:07 +0000468 // Forward till match
469 while (this.wrapDoc != this.embeddedDoc) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000470
Nils Diewald83c9b162015-02-03 21:05:07 +0000471 // Forward wrapInfo
472 if (this.wrapDoc < this.embeddedDoc) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000473
Nils Diewald83c9b162015-02-03 21:05:07 +0000474 // Set document information
475 if (!wrapSpans.skipTo(this.embeddedDoc)) {
476 this.more = false;
477 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000478 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000479 return false;
480 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000481
Nils Diewald83c9b162015-02-03 21:05:07 +0000482 if (DEBUG)
483 log.trace("Skip wrap to doc {}", this.embeddedDoc);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000484
Nils Diewald83c9b162015-02-03 21:05:07 +0000485 this.wrapDoc = this.wrapSpans.doc();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000486
Nils Diewald83c9b162015-02-03 21:05:07 +0000487 if (wrapDoc == DocIdSetIterator.NO_MORE_DOCS) {
488 this.more = false;
489 this.inSameDoc = false;
490 this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
491 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
492 return false;
493 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000494
495 /*
496 Remove stored information
497 */
498 if (DEBUG)
499 log.trace("Delete all span stores");
500
501 this.spanStore1.clear();
502 this.spanStore2.clear();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000503
Nils Diewaldcd226862015-02-11 22:27:45 +0000504 if (wrapDoc == embeddedDoc) {
505 this.wrapStart = this.wrapSpans.start();
506 this.embeddedStart = this.embeddedSpans.start();
507 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000508 return true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000509 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000510
Nils Diewaldcd226862015-02-11 22:27:45 +0000511 this.wrapStart = -1;
512 this.embeddedStart = -1;
Nils Diewald83c9b162015-02-03 21:05:07 +0000513 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000514
Nils Diewald83c9b162015-02-03 21:05:07 +0000515 // Forward embedInfo
516 else if (this.wrapDoc > this.embeddedDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000517
Nils Diewald83c9b162015-02-03 21:05:07 +0000518 // Set document information
519 if (!this.embeddedSpans.skipTo(this.wrapDoc)) {
520 this.more = false;
521 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000522 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000523 return false;
524 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000525
Nils Diewald83c9b162015-02-03 21:05:07 +0000526 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000527
Nils Diewald83c9b162015-02-03 21:05:07 +0000528 if (this.embeddedDoc == DocIdSetIterator.NO_MORE_DOCS) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000529 this.more = false;
Nils Diewald83c9b162015-02-03 21:05:07 +0000530 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000531 this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
532 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000533 return false;
534 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000535
536 if (DEBUG)
537 log.trace("Skip embedded to doc {}", this.embeddedDoc);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000538
Nils Diewaldcd226862015-02-11 22:27:45 +0000539 this.embeddedStart = this.embeddedSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000540 this.embeddedEnd = -1;
541 this.embeddedPayload = null;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000542
Nils Diewaldcd226862015-02-11 22:27:45 +0000543 if (this.wrapDoc == this.embeddedDoc) {
544 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000545 return true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000546 };
Nils Diewald83c9b162015-02-03 21:05:07 +0000547 }
548 else {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000549 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000550 return false;
551 };
552 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000553
Nils Diewaldcd226862015-02-11 22:27:45 +0000554 this.matchDoc = this.wrapDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000555 return true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000556 };
557
558
559 // Initialize spans
560 private boolean init () throws IOException {
561
Nils Diewald83c9b162015-02-03 21:05:07 +0000562 // There is a missing span
563 if (this.embeddedDoc >= 0)
564 return true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000565
Nils Diewald83c9b162015-02-03 21:05:07 +0000566 if (DEBUG)
567 log.trace("Initialize spans");
Nils Diewald6802acd2014-03-18 18:29:30 +0000568
Nils Diewald83c9b162015-02-03 21:05:07 +0000569 // First tick for both spans
570 if (!(this.embeddedSpans.next() && this.wrapSpans.next())) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000571
Nils Diewald83c9b162015-02-03 21:05:07 +0000572 if (DEBUG)
573 log.trace("No spans initialized");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000574
Nils Diewald83c9b162015-02-03 21:05:07 +0000575 this.embeddedDoc = -1;
576 this.more = false;
577 return false;
578 };
579 this.more = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000580
Nils Diewald83c9b162015-02-03 21:05:07 +0000581 // Store current positions for wrapping and embedded spans
Nils Diewaldbb33da22015-03-04 16:24:25 +0000582 this.wrapDoc = this.wrapSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000583 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000584
Nils Diewald83c9b162015-02-03 21:05:07 +0000585 // Set inSameDoc to true, if it is true
586 if (this.embeddedDoc == this.wrapDoc)
587 this.inSameDoc = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000588
Nils Diewald83c9b162015-02-03 21:05:07 +0000589 return true;
Nils Diewaldf399a672013-11-18 17:55:22 +0000590 };
591
592
Nils Diewaldbb33da22015-03-04 16:24:25 +0000593 /**
594 * Skips to the first match beyond the current, whose document
595 * number is
596 * greater than or equal to <i>target</i>. <p>Returns true iff
597 * there is such
598 * a match. <p>Behaves as if written: <pre class="prettyprint">
599 * boolean skipTo(int target) {
600 * do {
601 * if (!next())
602 * return false;
603 * } while (target > doc());
604 * return true;
605 * }
Nils Diewaldf399a672013-11-18 17:55:22 +0000606 * </pre>
607 * Most implementations are considerably more efficient than that.
608 */
609 public boolean skipTo (int target) throws IOException {
Nils Diewald6802acd2014-03-18 18:29:30 +0000610
Nils Diewald83c9b162015-02-03 21:05:07 +0000611 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000612 log.trace("skipTo document {}/{} -> {}", this.embeddedDoc,
613 this.wrapDoc, target);
Nils Diewaldf399a672013-11-18 17:55:22 +0000614
Nils Diewald83c9b162015-02-03 21:05:07 +0000615 // Initialize spans
616 if (!this.init())
617 return false;
Nils Diewald82a4b862014-02-20 21:17:41 +0000618
Nils Diewaldcd226862015-02-11 22:27:45 +0000619 assert target > this.embeddedDoc;
Nils Diewald82a4b862014-02-20 21:17:41 +0000620
Nils Diewald83c9b162015-02-03 21:05:07 +0000621 // Only forward embedded spans
622 if (this.more && (this.embeddedDoc < target)) {
623 if (this.embeddedSpans.skipTo(target)) {
624 this.inSameDoc = false;
625 this.embeddedStart = -1;
626 this.embeddedEnd = -1;
627 this.embeddedPayload = null;
628 this.embeddedDoc = this.embeddedSpans.doc();
629 }
Nils Diewaldf399a672013-11-18 17:55:22 +0000630
Nils Diewald83c9b162015-02-03 21:05:07 +0000631 // Can't be skipped to target
632 else {
633 this.inSameDoc = false;
634 this.more = false;
635 return false;
636 };
637 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000638
Nils Diewald83c9b162015-02-03 21:05:07 +0000639 // Move to same doc
640 return this.toSameDoc();
Nils Diewaldf399a672013-11-18 17:55:22 +0000641 };
642
Nils Diewaldbb33da22015-03-04 16:24:25 +0000643
Nils Diewald6802acd2014-03-18 18:29:30 +0000644 private void nextSpanA () {
Nils Diewald83c9b162015-02-03 21:05:07 +0000645 if (DEBUG)
646 log.trace("Try wrap next time");
647 this.tryMatch = false;
648 this.nextSpanB = false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000649 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000650
Nils Diewaldbb33da22015-03-04 16:24:25 +0000651
Nils Diewald6802acd2014-03-18 18:29:30 +0000652 private void nextSpanB () {
Nils Diewald83c9b162015-02-03 21:05:07 +0000653 if (DEBUG)
654 log.trace("Try embedded next time");
655 this.nextSpanB = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000656 };
657
658
659 // Check if the current span constellation does match
660 // Store backtracking relevant data and say, how to proceed
661 private boolean doesMatch () {
Nils Diewaldcd226862015-02-11 22:27:45 +0000662 if (DEBUG)
663 log.trace("In the match test branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000664
Nils Diewaldcd226862015-02-11 22:27:45 +0000665 if (this.wrapStart == -1)
666 this.wrapStart = this.wrapSpans.start();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000667
Nils Diewaldcd226862015-02-11 22:27:45 +0000668 if (this.embeddedStart == -1) {
669 this.embeddedStart = this.embeddedSpans.start();
Nils Diewaldcd226862015-02-11 22:27:45 +0000670 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000671
Nils Diewaldcd226862015-02-11 22:27:45 +0000672 this.wrapEnd = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000673
Nils Diewaldcd226862015-02-11 22:27:45 +0000674 // Shortcut to prevent lazyloading of .end()
Akrona7b936d2016-03-04 13:40:54 +0100675 // [---
676 // [---
Nils Diewaldcd226862015-02-11 22:27:45 +0000677 if (this.wrapStart > this.embeddedStart) {
678 // Can't match for in, rin, ew, sw, and m
679 // and will always lead to next_b
680 if (flag >= WITHIN) {
681 this.nextSpanB();
682 if (DEBUG)
683 _logCurrentCase((byte) 16);
684 return false;
685 };
686 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000687
Akrona7b936d2016-03-04 13:40:54 +0100688 // [---
689 // [---
Nils Diewaldcd226862015-02-11 22:27:45 +0000690 else if (this.wrapStart < this.embeddedStart) {
691 // Can't match for sw and m and will always
692 // lead to next_a
693 if (flag >= STARTSWITH) {
694 this.nextSpanA();
695 if (DEBUG)
696 _logCurrentCase((byte) 15);
697 return false;
698 };
699 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000700
Akron63cd32f2016-04-21 17:56:06 +0200701 if (this.embeddedEnd == -1) {
702 this.embeddedEnd = this.embeddedSpans.end();
703 };
704
Nils Diewaldcd226862015-02-11 22:27:45 +0000705 // Now check correctly
706 byte currentCase = this.withinCase();
Nils Diewald6802acd2014-03-18 18:29:30 +0000707
Nils Diewaldcd226862015-02-11 22:27:45 +0000708 if (DEBUG)
709 _logCurrentCase(currentCase);
Nils Diewald6802acd2014-03-18 18:29:30 +0000710
Nils Diewaldcd226862015-02-11 22:27:45 +0000711 boolean match = false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000712
Nils Diewaldcd226862015-02-11 22:27:45 +0000713 // Test case
714 if (currentCase >= (byte) 3 && currentCase <= (byte) 11) {
715 switch (flag) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000716
Nils Diewaldbb33da22015-03-04 16:24:25 +0000717 case WITHIN:
718 if (currentCase >= 6 && currentCase <= 10
719 && currentCase != 8)
720 match = true;
721 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000722
Nils Diewaldbb33da22015-03-04 16:24:25 +0000723 case REAL_WITHIN:
724 if (currentCase == 6 || currentCase == 9
725 || currentCase == 10)
726 match = true;
727 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000728
Nils Diewaldbb33da22015-03-04 16:24:25 +0000729 case MATCH:
730 if (currentCase == 7)
731 match = true;
732 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000733
Nils Diewaldbb33da22015-03-04 16:24:25 +0000734 case STARTSWITH:
735 if (currentCase == 7 || currentCase == 6)
736 match = true;
737 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000738
Nils Diewaldbb33da22015-03-04 16:24:25 +0000739 case ENDSWITH:
740 if (currentCase == 7 || currentCase == 10)
741 match = true;
742 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000743
Nils Diewaldbb33da22015-03-04 16:24:25 +0000744 case OVERLAP:
Nils Diewaldcd226862015-02-11 22:27:45 +0000745 match = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000746 break;
747
748 case REAL_OVERLAP:
749 if (currentCase == 3 || currentCase == 11)
750 match = true;
751 break;
Nils Diewaldcd226862015-02-11 22:27:45 +0000752 };
753 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000754
Nils Diewaldcd226862015-02-11 22:27:45 +0000755 try {
756 this.todo(currentCase);
757 }
758 catch (IOException e) {
759 return false;
760 }
761 return match;
Nils Diewald6802acd2014-03-18 18:29:30 +0000762 };
763
764
765 private void _logCurrentCase (byte currentCase) {
Nils Diewaldcd226862015-02-11 22:27:45 +0000766 log.trace("Current Case is {}", currentCase);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000767
Nils Diewaldcd226862015-02-11 22:27:45 +0000768 String _e = _currentEmbedded().toString();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000769
Nils Diewaldcd226862015-02-11 22:27:45 +0000770 log.trace(" |---| {}", _currentWrap().toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000771
Nils Diewaldcd226862015-02-11 22:27:45 +0000772 switch (currentCase) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000773 case 1:
774 log.trace("|-| {}", _e);
775 break;
776 case 2:
777 log.trace("|---| {}", _e);
778 break;
779 case 3:
780 log.trace(" |---| {}", _e);
781 break;
782 case 4:
783 log.trace(" |-----| {}", _e);
784 break;
785 case 5:
786 log.trace(" |-------| {}", _e);
787 break;
788 case 6:
789 log.trace(" |-| {}", _e);
790 break;
791 case 7:
792 log.trace(" |---| {}", _e);
793 break;
794 case 8:
795 log.trace(" |-----| {}", _e);
796 break;
797 case 9:
798 log.trace(" |-| {}", _e);
799 break;
800 case 10:
801 log.trace(" |-| {}", _e);
802 break;
803 case 11:
804 log.trace(" |---| {}", _e);
805 break;
806 case 12:
807 log.trace(" |-| {}", _e);
808 break;
809 case 13:
810 log.trace(" |-| {}", _e);
811 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000812
Nils Diewaldbb33da22015-03-04 16:24:25 +0000813 case 15:
814 // Fake case
815 log.trace(" |---? {}", _e);
816 break;
817
818 case 16:
819 // Fake case
820 log.trace(" |---? {}", _e);
821 break;
Nils Diewaldcd226862015-02-11 22:27:45 +0000822 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000823 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000824
Nils Diewald6802acd2014-03-18 18:29:30 +0000825
Nils Diewald41750bf2015-02-06 17:45:20 +0000826 private WithinSpan _currentWrap () {
827 WithinSpan _wrap = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000828 _wrap.start = this.wrapStart != -1 ? this.wrapStart : this.wrapSpans
829 .start();
830 _wrap.end = this.wrapEnd != -1 ? this.wrapEnd : this.wrapSpans.end();
831 _wrap.doc = this.wrapDoc != -1 ? this.wrapDoc : this.wrapSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000832 return _wrap;
Nils Diewald6802acd2014-03-18 18:29:30 +0000833 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000834
835
Nils Diewald41750bf2015-02-06 17:45:20 +0000836 private WithinSpan _currentEmbedded () {
837 WithinSpan _embedded = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000838 _embedded.start = this.embeddedStart != -1 ? this.embeddedStart
839 : this.embeddedSpans.start();
840 _embedded.end = this.embeddedEnd != -1 ? this.embeddedEnd
841 : this.embeddedSpans.end();
842 _embedded.doc = this.embeddedDoc != -1 ? this.embeddedDoc
843 : this.embeddedSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000844 return _embedded;
Nils Diewald6802acd2014-03-18 18:29:30 +0000845 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000846
Nils Diewald6802acd2014-03-18 18:29:30 +0000847
848 private void todo (byte currentCase) throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000849 /*
850 Check what to do next with the spans.
851
852 The different follow up steps are:
853 - storeEmbedded -> store span B for later checks
854 - nextSpanA -> forward a
855 - nextSpanB -> forward b
Nils Diewald6802acd2014-03-18 18:29:30 +0000856
Nils Diewaldbb33da22015-03-04 16:24:25 +0000857 These rules were automatically generated
858 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000859
Nils Diewaldbb33da22015-03-04 16:24:25 +0000860 // Case 1, 2
861 if (currentCase <= (byte) 2) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000862 this.nextSpanB();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000863 }
864
865 // Case 12, 13
866 else if (currentCase >= (byte) 12) {
867 this.storeEmbedded();
Nils Diewald83c9b162015-02-03 21:05:07 +0000868 this.nextSpanA();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000869 }
870
871 // Case 3, 4, 5, 8
872 else if (currentCase <= (byte) 5 || currentCase == (byte) 8) {
873 if (flag <= 2)
874 this.storeEmbedded();
Nils Diewald83c9b162015-02-03 21:05:07 +0000875 this.nextSpanB();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000876 }
877
878 // Case 11
879 else if (currentCase == (byte) 11) {
880 if (this.flag == REAL_WITHIN) {
881 this.nextSpanB();
882 }
883 else if (this.flag >= STARTSWITH) {
884 this.nextSpanA();
885 }
886 else {
887 this.storeEmbedded();
888 this.nextSpanB();
889 };
890 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000891
892
Nils Diewaldbb33da22015-03-04 16:24:25 +0000893 // Case 6, 7, 9, 10
894 else {
Nils Diewald6802acd2014-03-18 18:29:30 +0000895
Nils Diewaldbb33da22015-03-04 16:24:25 +0000896 if (
897 // Case 6
898 (currentCase == (byte) 6 && this.flag == MATCH) ||
Nils Diewald6802acd2014-03-18 18:29:30 +0000899
Nils Diewaldbb33da22015-03-04 16:24:25 +0000900 // Case 7
901 (currentCase == (byte) 7 && this.flag == REAL_WITHIN) ||
902
903 // Case 9, 10
904 (currentCase >= (byte) 9 && this.flag >= STARTSWITH)) {
905
906 this.nextSpanA();
907 }
908 else {
909 this.storeEmbedded();
910 this.nextSpanB();
911 };
912 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000913 };
914
Nils Diewaldbb33da22015-03-04 16:24:25 +0000915
Nils Diewald83c9b162015-02-03 21:05:07 +0000916 // Store the current embedded span in the first spanStore
Nils Diewald6802acd2014-03-18 18:29:30 +0000917 private void storeEmbedded () throws IOException {
918
Nils Diewald83c9b162015-02-03 21:05:07 +0000919 // Create a current copy
Nils Diewald41750bf2015-02-06 17:45:20 +0000920 WithinSpan embedded = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000921 embedded.start = this.embeddedStart != -1 ? this.embeddedStart
922 : this.embeddedSpans.start();
923 embedded.end = this.embeddedEnd != -1 ? this.embeddedEnd
924 : this.embeddedSpans.end();
925 embedded.doc = this.embeddedDoc;
Nils Diewald6802acd2014-03-18 18:29:30 +0000926
Nils Diewald83c9b162015-02-03 21:05:07 +0000927 // Copy payloads
928 if (this.embeddedPayload != null) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000929 embedded.payload = new ArrayList<byte[]>(
930 this.embeddedPayload.size());
Nils Diewald83c9b162015-02-03 21:05:07 +0000931 embedded.payload.addAll(this.embeddedPayload);
932 }
933 else if (this.embeddedSpans.isPayloadAvailable()) {
934 embedded.payload = new ArrayList<byte[]>(3);
935 Collection<byte[]> payload = this.embeddedSpans.getPayload();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000936
Nils Diewald83c9b162015-02-03 21:05:07 +0000937 this.embeddedPayload = new ArrayList<byte[]>(payload.size());
938 this.embeddedPayload.addAll(payload);
939 embedded.payload.addAll(payload);
940 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000941
Nils Diewald83c9b162015-02-03 21:05:07 +0000942 this.spanStore1.add(embedded);
Nils Diewald6802acd2014-03-18 18:29:30 +0000943
Nils Diewald83c9b162015-02-03 21:05:07 +0000944 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000945 log.trace("Pushed to spanStore 1 {} (in storeEmbedded)",
946 embedded.toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000947 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000948
Nils Diewald6802acd2014-03-18 18:29:30 +0000949
950 // Return case number
951 private byte withinCase () {
952
Akron63cd32f2016-04-21 17:56:06 +0200953 if (DEBUG) {
954 log.trace(">>>>>>>>>>>>>> {}-{}|{}-{}",
955 this.wrapStart,
956 this.wrapSpans.end(),
957 this.embeddedStart,
958 this.embeddedSpans.end()
959 );
960 };
961
Nils Diewaldcd226862015-02-11 22:27:45 +0000962 // case 1-5
963 if (this.wrapStart > this.embeddedStart) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000964
Nils Diewaldcd226862015-02-11 22:27:45 +0000965 // Case 1
966 // |-|
967 // |-|
968 if (this.wrapStart > this.embeddedEnd) {
969 return (byte) 1;
970 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000971
Nils Diewaldcd226862015-02-11 22:27:45 +0000972 // Case 2
973 // |-|
974 // |-|
975 else if (this.wrapStart == this.embeddedEnd) {
976 return (byte) 2;
977 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000978
Nils Diewaldcd226862015-02-11 22:27:45 +0000979 // Load wrapEnd
980 this.wrapEnd = this.wrapSpans.end();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000981
Nils Diewaldcd226862015-02-11 22:27:45 +0000982 // Case 3
983 // |---|
984 // |---|
985 if (this.wrapEnd > this.embeddedEnd) {
986 return (byte) 3;
987 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000988
Nils Diewaldcd226862015-02-11 22:27:45 +0000989 // Case 4
990 // |-|
991 // |---|
992 else if (this.wrapEnd == this.embeddedEnd) {
993 return (byte) 4;
994 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000995
Nils Diewaldcd226862015-02-11 22:27:45 +0000996 // Case 5
997 // |-|
998 // |---|
999 return (byte) 5;
1000 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001001
Nils Diewaldcd226862015-02-11 22:27:45 +00001002 // case 6-8
1003 else if (this.wrapStart == this.embeddedStart) {
Nils Diewald6802acd2014-03-18 18:29:30 +00001004
Nils Diewaldcd226862015-02-11 22:27:45 +00001005 // Load wrapEnd
1006 this.wrapEnd = this.wrapSpans.end();
Akrona7b936d2016-03-04 13:40:54 +01001007 // this.embeddedEnd = this.embeddedSpans.end();
Nils Diewald6802acd2014-03-18 18:29:30 +00001008
Nils Diewaldcd226862015-02-11 22:27:45 +00001009 // Case 6
1010 // |---|
1011 // |-|
1012 if (this.wrapEnd > this.embeddedEnd) {
1013 return (byte) 6;
1014 }
Nils Diewald6802acd2014-03-18 18:29:30 +00001015
Nils Diewaldcd226862015-02-11 22:27:45 +00001016 // Case 7
1017 // |---|
1018 // |---|
1019 else if (this.wrapEnd == this.embeddedEnd) {
1020 return (byte) 7;
1021 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001022
Nils Diewaldcd226862015-02-11 22:27:45 +00001023 // Case 8
1024 // |-|
1025 // |---|
1026 return (byte) 8;
1027 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001028
Nils Diewaldcd226862015-02-11 22:27:45 +00001029 // wrapStart < embeddedStart
Nils Diewaldf399a672013-11-18 17:55:22 +00001030
Nils Diewaldcd226862015-02-11 22:27:45 +00001031 // Load wrapEnd
1032 this.wrapEnd = this.wrapSpans.end();
Nils Diewald6802acd2014-03-18 18:29:30 +00001033
Nils Diewaldcd226862015-02-11 22:27:45 +00001034 // Case 13
1035 // |-|
1036 // |-|
1037 if (this.wrapEnd < this.embeddedStart) {
1038 return (byte) 13;
1039 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001040
Nils Diewaldcd226862015-02-11 22:27:45 +00001041 // Case 9
1042 // |---|
1043 // |-|
1044 else if (this.wrapEnd > this.embeddedEnd) {
1045 return (byte) 9;
1046 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001047
Nils Diewaldcd226862015-02-11 22:27:45 +00001048 // Case 10
1049 // |---|
1050 // |-|
1051 else if (this.wrapEnd == this.embeddedEnd) {
1052 return (byte) 10;
1053 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001054
Nils Diewaldcd226862015-02-11 22:27:45 +00001055 // Case 11
1056 // |---|
1057 // |---|
1058 else if (this.wrapEnd > this.embeddedStart) {
1059 return (byte) 11;
1060 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001061
Nils Diewaldcd226862015-02-11 22:27:45 +00001062 // case 12
1063 // |-|
1064 // |-|
1065 return (byte) 12;
Nils Diewaldbb33da22015-03-04 16:24:25 +00001066 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001067
1068
Nils Diewaldbb33da22015-03-04 16:24:25 +00001069 /**
1070 * Returns the document number of the current match. Initially
1071 * invalid.
1072 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001073 @Override
1074 public int doc () {
Nils Diewaldcd226862015-02-11 22:27:45 +00001075 return matchDoc;
Nils Diewaldf399a672013-11-18 17:55:22 +00001076 };
1077
Nils Diewaldbb33da22015-03-04 16:24:25 +00001078
1079 /**
1080 * Returns the start position of the embedding wrap. Initially
1081 * invalid.
1082 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001083 @Override
1084 public int start () {
Nils Diewaldcd226862015-02-11 22:27:45 +00001085 return matchStart;
Nils Diewaldf399a672013-11-18 17:55:22 +00001086 };
1087
Nils Diewaldbb33da22015-03-04 16:24:25 +00001088
1089 /**
1090 * Returns the end position of the embedding wrap. Initially
1091 * invalid.
1092 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001093 @Override
1094 public int end () {
Nils Diewaldbb33da22015-03-04 16:24:25 +00001095 return matchEnd;
Nils Diewaldf399a672013-11-18 17:55:22 +00001096 };
1097
Nils Diewaldbb33da22015-03-04 16:24:25 +00001098
Nils Diewaldf399a672013-11-18 17:55:22 +00001099 /**
1100 * Returns the payload data for the current span.
1101 * This is invalid until {@link #next()} is called for
1102 * the first time.
1103 * This method must not be called more than once after each call
1104 * of {@link #next()}. However, most payloads are loaded lazily,
1105 * so if the payload data for the current position is not needed,
Nils Diewaldbb33da22015-03-04 16:24:25 +00001106 * this method may not be called at all for performance reasons.
1107 * An ordered
1108 * SpanQuery does not lazy load, so if you have payloads in your
1109 * index and
1110 * you do not want ordered SpanNearQuerys to collect payloads, you
1111 * can
Nils Diewalde0725012014-09-25 19:32:52 +00001112 * disable collection with a constructor option.<br>
Nils Diewaldf399a672013-11-18 17:55:22 +00001113 * <br>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001114 * Note that the return type is a collection, thus the ordering
1115 * should not be relied upon.
Nils Diewaldf399a672013-11-18 17:55:22 +00001116 * <br/>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001117 *
Nils Diewaldf399a672013-11-18 17:55:22 +00001118 * @lucene.experimental
Nils Diewaldbb33da22015-03-04 16:24:25 +00001119 *
1120 * @return a List of byte arrays containing the data of this
1121 * payload, otherwise null if isPayloadAvailable is false
1122 * @throws IOException
1123 * if there is a low-level I/O error
Nils Diewaldf399a672013-11-18 17:55:22 +00001124 */
1125 // public abstract Collection<byte[]> getPayload() throws IOException;
1126 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001127 public Collection<byte[]> getPayload () throws IOException {
1128 return matchPayload;
Nils Diewaldf399a672013-11-18 17:55:22 +00001129 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001130
Nils Diewaldf399a672013-11-18 17:55:22 +00001131
1132 /**
1133 * Checks if a payload can be loaded at this position.
1134 * <p/>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001135 * Payloads can only be loaded once per call to {@link #next()}.
1136 *
1137 * @return true if there is a payload available at this position
1138 * that can be loaded
Nils Diewaldf399a672013-11-18 17:55:22 +00001139 */
1140 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001141 public boolean isPayloadAvailable () {
1142 return matchPayload.isEmpty() == false;
Nils Diewaldf399a672013-11-18 17:55:22 +00001143 };
1144
Nils Diewaldbb33da22015-03-04 16:24:25 +00001145
Nils Diewaldf399a672013-11-18 17:55:22 +00001146 // Todo: This may be in the wrong version
1147 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001148 public long cost () {
1149 return wrapSpans.cost() + embeddedSpans.cost();
Nils Diewaldf399a672013-11-18 17:55:22 +00001150 };
1151
Nils Diewaldbb33da22015-03-04 16:24:25 +00001152
Nils Diewaldf399a672013-11-18 17:55:22 +00001153 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001154 public String toString () {
1155 return getClass().getName()
1156 + "("
1157 + query.toString()
1158 + ")@"
1159 + (embeddedDoc <= 0 ? "START" : (more ? (doc() + ":" + start()
1160 + "-" + end()) : "END"));
Nils Diewaldf399a672013-11-18 17:55:22 +00001161 };
Nils Diewald41750bf2015-02-06 17:45:20 +00001162
1163
1164 // This was formerly the default candidate span class,
1165 // before it was refactored out
Nils Diewaldbb33da22015-03-04 16:24:25 +00001166 private class WithinSpan implements Comparable<WithinSpan>, Cloneable {
1167 public int start = -1, end = -1, doc = -1;
Nils Diewald41750bf2015-02-06 17:45:20 +00001168
1169 public Collection<byte[]> payload;
1170
1171 public short elementRef = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +00001172
Nils Diewald41750bf2015-02-06 17:45:20 +00001173 public void clear () {
1174 this.start = -1;
1175 this.end = -1;
1176 this.doc = -1;
1177 clearPayload();
1178 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001179
1180
Nils Diewald41750bf2015-02-06 17:45:20 +00001181 @Override
1182 public int compareTo (WithinSpan o) {
1183 /* optimizable for short numbers to return o.end - this.end */
1184 if (this.doc < o.doc) {
1185 return -1;
1186 }
1187 else if (this.doc == o.doc) {
1188 if (this.start < o.start) {
1189 return -1;
1190 }
1191 else if (this.start == o.start) {
1192 if (this.end < o.end)
1193 return -1;
1194 };
1195 };
1196 return 1;
1197 };
1198
Nils Diewaldbb33da22015-03-04 16:24:25 +00001199
1200 public short getElementRef () {
Nils Diewald41750bf2015-02-06 17:45:20 +00001201 return elementRef;
1202 }
1203
Nils Diewaldbb33da22015-03-04 16:24:25 +00001204
1205 public void setElementRef (short elementRef) {
Nils Diewald41750bf2015-02-06 17:45:20 +00001206 this.elementRef = elementRef;
1207 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001208
1209
Nils Diewald41750bf2015-02-06 17:45:20 +00001210 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001211 public Object clone () {
Nils Diewald41750bf2015-02-06 17:45:20 +00001212 WithinSpan span = new WithinSpan();
1213 span.start = this.start;
1214 span.end = this.end;
1215 span.doc = this.doc;
1216 span.payload.addAll(this.payload);
1217 return span;
1218 };
1219
Nils Diewaldbb33da22015-03-04 16:24:25 +00001220
Nils Diewald41750bf2015-02-06 17:45:20 +00001221 public WithinSpan copyFrom (WithinSpan o) {
1222 this.start = o.start;
1223 this.end = o.end;
1224 this.doc = o.doc;
1225 // this.clearPayload();
1226 this.payload.addAll(o.payload);
1227 return this;
1228 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001229
1230
Nils Diewald41750bf2015-02-06 17:45:20 +00001231 public void clearPayload () {
1232 if (this.payload != null)
1233 this.payload.clear();
1234 };
1235
Nils Diewaldbb33da22015-03-04 16:24:25 +00001236
Nils Diewald41750bf2015-02-06 17:45:20 +00001237 public String toString () {
1238 StringBuilder sb = new StringBuilder("[");
Nils Diewaldbb33da22015-03-04 16:24:25 +00001239 return sb.append(this.start).append('-').append(this.end)
1240 .append('(').append(this.doc).append(')').append(']')
1241 .toString();
Nils Diewald41750bf2015-02-06 17:45:20 +00001242 };
1243 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001244};