blob: fe507d915281a50285cc2ea7e07f7615619c40dc [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap.query.spans;
2
Eliza Margaretha22898982014-11-04 17:10:21 +00003import java.io.IOException;
4import java.util.ArrayList;
5import java.util.Collection;
6import java.util.LinkedList;
7import java.util.Map;
8
Akron700c1eb2015-09-25 16:57:30 +02009import org.apache.lucene.index.LeafReaderContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000010import org.apache.lucene.index.Term;
11import org.apache.lucene.index.TermContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000012import org.apache.lucene.search.DocIdSetIterator;
Eliza Margaretha22898982014-11-04 17:10:21 +000013import org.apache.lucene.search.spans.Spans;
14import org.apache.lucene.util.Bits;
Nils Diewaldf399a672013-11-18 17:55:22 +000015import org.slf4j.Logger;
16import org.slf4j.LoggerFactory;
17
Eliza Margaretha22898982014-11-04 17:10:21 +000018import de.ids_mannheim.korap.query.SpanWithinQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000019
Nils Diewald83c9b162015-02-03 21:05:07 +000020
21/**
22 * SpanWithinQuery is DEPRECATED and will
23 * be replaced by SpanPositionQuery in the near future
Nils Diewaldcb8afb32015-02-04 21:12:37 +000024 *
25 * TODO: Support exclusivity
26 * TODO: Use the term "queue" and implement it similar to SpanOrQuery
Akrona7b936d2016-03-04 13:40:54 +010027 * TODO: Implement a incrStartPos() method to forward an embedded span
28 * until the start position is higher than the current start position.
Nils Diewald83c9b162015-02-03 21:05:07 +000029 */
30
Nils Diewald82a4b862014-02-20 21:17:41 +000031/**
Nils Diewald1455e1e2014-08-01 16:12:43 +000032 * Compare two spans and check how they relate positionally.
Nils Diewaldbb33da22015-03-04 16:24:25 +000033 *
Nils Diewald44d5fa12015-01-15 21:31:52 +000034 * @author diewald
Nils Diewald82a4b862014-02-20 21:17:41 +000035 */
Nils Diewaldf399a672013-11-18 17:55:22 +000036public class WithinSpans extends Spans {
Nils Diewald82a4b862014-02-20 21:17:41 +000037
Nils Diewald6802acd2014-03-18 18:29:30 +000038 // Logger
39 private final Logger log = LoggerFactory.getLogger(WithinSpans.class);
Nils Diewald1455e1e2014-08-01 16:12:43 +000040
Nils Diewald82a4b862014-02-20 21:17:41 +000041 // This advices the java compiler to ignore all loggings
42 public static final boolean DEBUG = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +000043
Nils Diewaldf399a672013-11-18 17:55:22 +000044 private boolean more = false;
45
Nils Diewald6802acd2014-03-18 18:29:30 +000046 // Boolean value indicating if span B
47 // should be forwarded next (true)
48 // or span A (false);
49 boolean nextSpanB = true;
Nils Diewald82a4b862014-02-20 21:17:41 +000050
Nils Diewaldbb33da22015-03-04 16:24:25 +000051 private int wrapStart = -1, wrapEnd = -1, embeddedStart = -1,
52 embeddedEnd = -1, wrapDoc = -1, embeddedDoc = -1, matchDoc = -1,
53 matchStart = -1, matchEnd = -1;
54
Nils Diewald6802acd2014-03-18 18:29:30 +000055 private Collection<byte[]> matchPayload;
Nils Diewaldf399a672013-11-18 17:55:22 +000056 private Collection<byte[]> embeddedPayload;
Nils Diewaldbb33da22015-03-04 16:24:25 +000057
Nils Diewald6802acd2014-03-18 18:29:30 +000058 // Indicates that the wrap and the embedded spans are in the same doc
59 private boolean inSameDoc = false;
Nils Diewaldf399a672013-11-18 17:55:22 +000060
Nils Diewald6802acd2014-03-18 18:29:30 +000061 /*
62 Supported flags are currently:
Nils Diewaldcb8afb32015-02-04 21:12:37 +000063 ov -> 0 | overlap: A & B != empty
64 rov -> 2 | real overlap: A & B != empty and
65 ((A | B) != A or
Nils Diewald6802acd2014-03-18 18:29:30 +000066 (A | B) != B)
Nils Diewaldcb8afb32015-02-04 21:12:37 +000067 in -> 4 | within: A | B = A
68 rin -> 6 | real within: A | B = A and
69 A & B != A
70 ew -> 8 | endswith: A | B = A and
71 A.start = B.start
72 sw -> 10 | startswith: A | B = A and
73 A.end = B.end
74 m -> 12 | A = B
75 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000076 public static final byte OVERLAP = (byte) 0, REAL_OVERLAP = (byte) 2,
77 WITHIN = (byte) 4, REAL_WITHIN = (byte) 6, ENDSWITH = (byte) 8,
78 STARTSWITH = (byte) 10, MATCH = (byte) 12;
Nils Diewaldf399a672013-11-18 17:55:22 +000079
Nils Diewald6802acd2014-03-18 18:29:30 +000080 private byte flag;
Nils Diewaldf399a672013-11-18 17:55:22 +000081
Nils Diewald1455e1e2014-08-01 16:12:43 +000082 // Contains the query
Nils Diewaldf399a672013-11-18 17:55:22 +000083 private SpanWithinQuery query;
84
Nils Diewaldcb8afb32015-02-04 21:12:37 +000085 // Representing the first operand
86 private final Spans wrapSpans;
Nils Diewaldf399a672013-11-18 17:55:22 +000087
Nils Diewaldcb8afb32015-02-04 21:12:37 +000088 // Representing the second operand
89 private final Spans embeddedSpans;
90
91 // Check flag if the current constellation
92 // was checked yet
Nils Diewald6802acd2014-03-18 18:29:30 +000093 private boolean tryMatch = true;
Nils Diewaldf399a672013-11-18 17:55:22 +000094
Nils Diewald01ff7af2015-02-04 22:54:26 +000095 // Two buffers for storing candidates
Nils Diewaldbb33da22015-03-04 16:24:25 +000096 private LinkedList<WithinSpan> spanStore1, spanStore2;
97
Nils Diewaldf399a672013-11-18 17:55:22 +000098
Nils Diewald01ff7af2015-02-04 22:54:26 +000099 /**
100 * Construct a new WithinSpans object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000101 *
102 * @param spanWithinQuery
103 * The parental {@link SpanWithinQuery}.
104 * @param context
Akron700c1eb2015-09-25 16:57:30 +0200105 * The {@link LeafReaderContext}.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000106 * @param acceptDocs
107 * Bit vector representing the documents
108 * to be searched in.
109 * @param termContexts
110 * A map managing {@link TermState TermStates}.
111 * @param flag
112 * A byte flag indicating the positional condition of
113 * the sub spans.
Nils Diewald01ff7af2015-02-04 22:54:26 +0000114 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000115 public WithinSpans (SpanWithinQuery spanWithinQuery,
Akron700c1eb2015-09-25 16:57:30 +0200116 LeafReaderContext context, Bits acceptDocs,
Nils Diewaldbb33da22015-03-04 16:24:25 +0000117 Map<Term, TermContext> termContexts, byte flag)
118 throws IOException {
Nils Diewaldf399a672013-11-18 17:55:22 +0000119
Nils Diewald83c9b162015-02-03 21:05:07 +0000120 if (DEBUG)
121 log.trace("Construct WithinSpans");
Nils Diewaldf399a672013-11-18 17:55:22 +0000122
Nils Diewald83c9b162015-02-03 21:05:07 +0000123 // Init copies
124 this.matchPayload = new LinkedList<byte[]>();
Nils Diewaldf399a672013-11-18 17:55:22 +0000125
Nils Diewald83c9b162015-02-03 21:05:07 +0000126 // Get spans
Nils Diewaldbb33da22015-03-04 16:24:25 +0000127 this.wrapSpans = spanWithinQuery.wrap().getSpans(context, acceptDocs,
128 termContexts);
129 this.embeddedSpans = spanWithinQuery.embedded().getSpans(context,
130 acceptDocs, termContexts);
Nils Diewaldf399a672013-11-18 17:55:22 +0000131
Nils Diewald83c9b162015-02-03 21:05:07 +0000132 this.flag = flag;
Nils Diewaldf399a672013-11-18 17:55:22 +0000133
Nils Diewald83c9b162015-02-03 21:05:07 +0000134 // SpanStores for backtracking
Nils Diewald41750bf2015-02-06 17:45:20 +0000135 this.spanStore1 = new LinkedList<WithinSpan>();
136 this.spanStore2 = new LinkedList<WithinSpan>();
Nils Diewald6802acd2014-03-18 18:29:30 +0000137
Nils Diewald83c9b162015-02-03 21:05:07 +0000138 // kept for toString() only.
139 this.query = spanWithinQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +0000140 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000141
Nils Diewaldf399a672013-11-18 17:55:22 +0000142
Nils Diewald6802acd2014-03-18 18:29:30 +0000143 // Move to next match, returning true iff any such exists.
Nils Diewaldf399a672013-11-18 17:55:22 +0000144 @Override
145 public boolean next () throws IOException {
Nils Diewald6802acd2014-03-18 18:29:30 +0000146
Nils Diewald83c9b162015-02-03 21:05:07 +0000147 if (DEBUG)
148 log.trace("Next with docs {}, {}", wrapDoc, embeddedDoc);
Nils Diewaldf399a672013-11-18 17:55:22 +0000149
Nils Diewald83c9b162015-02-03 21:05:07 +0000150 // Initialize spans
151 if (!this.init()) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000152 this.more = false;
153 this.inSameDoc = false;
154 this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000155 this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000156 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000157 return false;
158 };
Nils Diewald82a4b862014-02-20 21:17:41 +0000159
Nils Diewald83c9b162015-02-03 21:05:07 +0000160 // There are more spans and they are in the same document
Nils Diewald6802acd2014-03-18 18:29:30 +0000161
Nils Diewald83c9b162015-02-03 21:05:07 +0000162 while (this.more && (wrapDoc == embeddedDoc ||
Nils Diewaldbb33da22015-03-04 16:24:25 +0000163 // this.inSameDoc ||
164 this.toSameDoc())) {
Akronc3a5df82016-04-29 16:56:53 +0200165
Nils Diewald83c9b162015-02-03 21:05:07 +0000166 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000167 log.trace("We are in the same doc: {}, {}", wrapDoc,
168 embeddedDoc);
Nils Diewald6802acd2014-03-18 18:29:30 +0000169
Nils Diewald83c9b162015-02-03 21:05:07 +0000170 // Both spans match according to the flag
171 // Silently the next operations are prepared
172 if (this.tryMatch && this.doesMatch()) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000173
Nils Diewald83c9b162015-02-03 21:05:07 +0000174 if (this.wrapEnd == -1)
175 this.wrapEnd = this.wrapSpans.end();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000176
177 this.matchStart = embeddedStart < wrapStart ? embeddedStart
178 : wrapStart;
179 this.matchEnd = embeddedEnd > wrapEnd ? embeddedEnd : wrapEnd;
180 this.matchDoc = embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000181 this.matchPayload.clear();
Nils Diewald82a4b862014-02-20 21:17:41 +0000182
Nils Diewald83c9b162015-02-03 21:05:07 +0000183 if (this.embeddedPayload != null)
184 matchPayload.addAll(embeddedPayload);
Nils Diewaldf399a672013-11-18 17:55:22 +0000185
Nils Diewald83c9b162015-02-03 21:05:07 +0000186 if (this.wrapSpans.isPayloadAvailable())
187 this.matchPayload.addAll(wrapSpans.getPayload());
Nils Diewaldf399a672013-11-18 17:55:22 +0000188
Nils Diewald83c9b162015-02-03 21:05:07 +0000189 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000190 log.trace(" ---- MATCH ---- {}-{} ({})", matchStart,
191 matchEnd, matchDoc);
Nils Diewald6802acd2014-03-18 18:29:30 +0000192
Nils Diewald83c9b162015-02-03 21:05:07 +0000193 this.tryMatch = false;
194 return true;
195 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000196
Nils Diewald83c9b162015-02-03 21:05:07 +0000197 // Get next embedded
198 else if (this.nextSpanB) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000199
Nils Diewald83c9b162015-02-03 21:05:07 +0000200 // Next time try the match
201 this.tryMatch = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000202
Nils Diewald83c9b162015-02-03 21:05:07 +0000203 if (DEBUG)
204 log.trace("In the next embedded branch");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000205
Nils Diewald41750bf2015-02-06 17:45:20 +0000206 WithinSpan current = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000207
Nils Diewald83c9b162015-02-03 21:05:07 +0000208 // New - fetch until theres a span in the correct doc or bigger
209 while (!this.spanStore2.isEmpty()) {
210 current = spanStore2.removeFirst();
211 if (current.doc >= this.wrapDoc)
212 break;
213 };
214
215
216 // There is nothing in the second store
217 if (current == null) {
218 if (DEBUG)
219 log.trace("SpanStore 2 is empty");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000220
Nils Diewald83c9b162015-02-03 21:05:07 +0000221 // Forward with embedding
222 if (!this.embeddedSpans.next()) {
Akronc3a5df82016-04-29 16:56:53 +0200223
224 // TODO: May need storeEmpdedded
Nils Diewald83c9b162015-02-03 21:05:07 +0000225 this.nextSpanA();
226 continue;
227 }
228
229 else if (DEBUG) {
230 log.trace("Fetch next embedded span");
231 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000232
Nils Diewaldcd226862015-02-11 22:27:45 +0000233 this.embeddedStart = this.embeddedSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000234 this.embeddedEnd = -1;
235 this.embeddedPayload = null;
236 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000237
Nils Diewald83c9b162015-02-03 21:05:07 +0000238 if (this.embeddedDoc != this.wrapDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000239
Nils Diewald83c9b162015-02-03 21:05:07 +0000240 if (DEBUG) {
Akronc12567c2016-06-03 00:40:52 +0200241 log.trace(
242 "(A) Embedded span is in a new document {}",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000243 _currentEmbedded().toString());
Nils Diewald83c9b162015-02-03 21:05:07 +0000244 log.trace("Reset current embedded doc");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000245 };
246
Nils Diewald83c9b162015-02-03 21:05:07 +0000247 /*
248 if (DEBUG)
249 log.trace("Clear all span stores");
250 this.spanStore1.clear();
251 this.spanStore2.clear();
252 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000253
Nils Diewald83c9b162015-02-03 21:05:07 +0000254 this.storeEmbedded();
255
256 // That is necessary to backtrack to the last document!
257 this.inSameDoc = true;
258 this.embeddedDoc = wrapDoc;
259 // this.tryMatch = false; // already covered in nextSpanA
260
261 this.nextSpanA();
262 continue;
263 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000264
Nils Diewald83c9b162015-02-03 21:05:07 +0000265 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000266 log.trace(" Forward embedded span to {}",
267 _currentEmbedded().toString());
268
Nils Diewald83c9b162015-02-03 21:05:07 +0000269 if (this.embeddedDoc != this.wrapDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000270
Akronc3a5df82016-04-29 16:56:53 +0200271 if (DEBUG) {
Akronc12567c2016-06-03 00:40:52 +0200272 log.trace(
273 "(B) Embedded span is in a new document {}",
Akronc3a5df82016-04-29 16:56:53 +0200274 _currentEmbedded().toString());
275 log.trace("Reset current embedded doc");
276 };
277
Nils Diewald83c9b162015-02-03 21:05:07 +0000278 // Is this always a good idea?
279 /*
280 this.spanStore1.clear();
281 this.spanStore2.clear();
282 */
283
284 this.embeddedStart = -1;
285 this.embeddedEnd = -1;
286 this.embeddedPayload = null;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000287
Nils Diewald83c9b162015-02-03 21:05:07 +0000288 if (!this.toSameDoc()) {
289 this.more = false;
290 this.inSameDoc = false;
291 return false;
292 };
293 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000294
Nils Diewald83c9b162015-02-03 21:05:07 +0000295 this.more = true;
296 this.inSameDoc = true;
297 this.tryMatch = true;
Akron6759b042016-04-28 01:25:00 +0200298
Nils Diewald83c9b162015-02-03 21:05:07 +0000299 this.nextSpanB();
300 continue;
301 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000302
Nils Diewald83c9b162015-02-03 21:05:07 +0000303 // Fetch from second store?
304 else {
Akrona7b936d2016-03-04 13:40:54 +0100305
Eliza Margaretha6f989202016-10-14 21:48:29 +0200306 /**
307 * TODO: Change this to a single embedded object!
308 */
Nils Diewald83c9b162015-02-03 21:05:07 +0000309 this.embeddedStart = current.start;
310 this.embeddedEnd = current.end;
311 this.embeddedDoc = current.doc;
Nils Diewald6802acd2014-03-18 18:29:30 +0000312
Nils Diewald83c9b162015-02-03 21:05:07 +0000313 if (current.payload != null) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000314 this.embeddedPayload = new ArrayList<byte[]>(
315 current.payload.size());
Nils Diewald83c9b162015-02-03 21:05:07 +0000316 this.embeddedPayload.addAll(current.payload);
317 }
318 else {
319 this.embeddedPayload = null;
320 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000321
Nils Diewald83c9b162015-02-03 21:05:07 +0000322 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000323 log.trace("Fetch current from SpanStore 2: {}",
324 current.toString());
325
Nils Diewald83c9b162015-02-03 21:05:07 +0000326 this.tryMatch = true;
327 };
328 continue;
329 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000330
Nils Diewald83c9b162015-02-03 21:05:07 +0000331 // get next wrap
332 if (DEBUG)
333 log.trace("In the next wrap branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000334
Nils Diewald83c9b162015-02-03 21:05:07 +0000335 this.tryMatch = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000336
Nils Diewald83c9b162015-02-03 21:05:07 +0000337 if (DEBUG)
338 log.trace("Try next wrap");
Nils Diewald6802acd2014-03-18 18:29:30 +0000339
Nils Diewald83c9b162015-02-03 21:05:07 +0000340 // shift the stored spans
341 if (!this.spanStore1.isEmpty()) {
342 if (DEBUG) {
Eliza Margaretha6f989202016-10-14 21:48:29 +0200343 log.trace(
344 "Move everything from SpanStore 1 to SpanStore 2:");
Nils Diewald41750bf2015-02-06 17:45:20 +0000345 for (WithinSpan i : this.spanStore1) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000346 log.trace(" | {}", i.toString());
347 };
348 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000349
Nils Diewald83c9b162015-02-03 21:05:07 +0000350 // Move everything to spanStore2
Nils Diewaldbb33da22015-03-04 16:24:25 +0000351 this.spanStore2.addAll(0,
352 (LinkedList<WithinSpan>) this.spanStore1.clone());
Nils Diewald83c9b162015-02-03 21:05:07 +0000353 this.spanStore1.clear();
Nils Diewald6802acd2014-03-18 18:29:30 +0000354
Nils Diewald83c9b162015-02-03 21:05:07 +0000355 if (DEBUG) {
356 log.trace("SpanStore 2 now is:");
Nils Diewald41750bf2015-02-06 17:45:20 +0000357 for (WithinSpan i : this.spanStore2) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000358 log.trace(" | {}", i.toString());
359 };
360 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000361
Nils Diewald83c9b162015-02-03 21:05:07 +0000362 }
363 else if (DEBUG) {
364 log.trace("spanStore 1 is empty");
365 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000366
Nils Diewald83c9b162015-02-03 21:05:07 +0000367 // Get next wrap
368 if (this.wrapSpans.next()) {
Nils Diewald82a4b862014-02-20 21:17:41 +0000369
Nils Diewald83c9b162015-02-03 21:05:07 +0000370 // Reset wrapping information
Nils Diewaldcd226862015-02-11 22:27:45 +0000371 this.wrapStart = this.wrapSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000372 this.wrapEnd = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000373
Nils Diewald83c9b162015-02-03 21:05:07 +0000374 // Retrieve doc information
375 this.wrapDoc = this.wrapSpans.doc();
Nils Diewald20607ab2014-03-20 23:28:36 +0000376
Nils Diewald83c9b162015-02-03 21:05:07 +0000377 if (DEBUG)
Eliza Margaretha6f989202016-10-14 21:48:29 +0200378 log.trace(" Forward wrap span to {}",
379 _currentWrap().toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000380
Nils Diewald83c9b162015-02-03 21:05:07 +0000381 if (this.embeddedDoc != this.wrapDoc) {
382 if (DEBUG)
383 log.trace("Delete all span stores");
384 this.spanStore1.clear();
385 this.spanStore2.clear();
Nils Diewald6802acd2014-03-18 18:29:30 +0000386
Nils Diewald83c9b162015-02-03 21:05:07 +0000387 // Reset embedded:
388 this.embeddedStart = -1;
389 this.embeddedEnd = -1;
390 this.embeddedPayload = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000391
Nils Diewald83c9b162015-02-03 21:05:07 +0000392 if (!this.toSameDoc()) {
393 this.inSameDoc = false;
394 this.more = false;
395 return false;
396 };
397 }
398 else {
399 this.inSameDoc = true;
400 // Do not match with the current state
401 this.tryMatch = false;
402 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000403
Nils Diewald83c9b162015-02-03 21:05:07 +0000404 this.nextSpanB();
405 continue;
406 }
407 this.more = false;
408 this.inSameDoc = false;
409 this.spanStore1.clear();
410 this.spanStore2.clear();
411 return false;
412 };
Nils Diewald82a4b862014-02-20 21:17:41 +0000413
Nils Diewald83c9b162015-02-03 21:05:07 +0000414 // No more matches
415 return false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000416 };
417
418
419 /**
420 * Skip to the next document
421 */
422 private boolean toSameDoc () throws IOException {
Nils Diewald82a4b862014-02-20 21:17:41 +0000423
Nils Diewald83c9b162015-02-03 21:05:07 +0000424 if (DEBUG)
425 log.trace("Forward to find same docs");
Nils Diewald6802acd2014-03-18 18:29:30 +0000426
Nils Diewald83c9b162015-02-03 21:05:07 +0000427 /*
428 if (this.embeddedSpans == null) {
429 this.more = false;
430 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
431 this.inSameDoc = false;
432 return false;
433 };
Akron6759b042016-04-28 01:25:00 +0200434 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000435
Nils Diewald83c9b162015-02-03 21:05:07 +0000436 this.more = true;
437 this.inSameDoc = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000438
Nils Diewaldbb33da22015-03-04 16:24:25 +0000439 this.wrapDoc = this.wrapSpans.doc();
Akron40f51ee2016-04-22 17:55:14 +0200440
441 // Last doc was reached
442 if (this.wrapDoc == DocIdSetIterator.NO_MORE_DOCS) {
443 this.more = false;
444 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
445 this.inSameDoc = false;
446 return false;
447 };
448
449 // This is just a workaround for an issue that seems to be a bug in Lucene's core code.
450 try {
451 this.embeddedDoc = this.embeddedSpans.doc();
452 }
453 catch (NullPointerException e) {
454 this.more = false;
455 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
456 this.inSameDoc = false;
457 return false;
458 };
Akron6759b042016-04-28 01:25:00 +0200459
Nils Diewald6802acd2014-03-18 18:29:30 +0000460
Nils Diewald83c9b162015-02-03 21:05:07 +0000461 // Clear all spanStores
462 if (this.wrapDoc != this.embeddedDoc) {
463 /*
464 if (DEBUG)
465 log.trace("Clear all spanStores when moving forward");
466 // Why??
467 this.spanStore1.clear();
468 this.spanStore2.clear();
469 */
470 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000471
Nils Diewald83c9b162015-02-03 21:05:07 +0000472 else {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000473 if (DEBUG) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000474 log.trace("Current position already is in the same doc");
475 log.trace("Embedded: {}", _currentEmbedded().toString());
476 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000477 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000478 return true;
479 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000480
Nils Diewaldcd226862015-02-11 22:27:45 +0000481
Nils Diewald83c9b162015-02-03 21:05:07 +0000482 // Forward till match
483 while (this.wrapDoc != this.embeddedDoc) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000484
Nils Diewald83c9b162015-02-03 21:05:07 +0000485 // Forward wrapInfo
486 if (this.wrapDoc < this.embeddedDoc) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000487
Nils Diewald83c9b162015-02-03 21:05:07 +0000488 // Set document information
489 if (!wrapSpans.skipTo(this.embeddedDoc)) {
490 this.more = false;
491 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000492 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000493 return false;
494 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000495
Nils Diewald83c9b162015-02-03 21:05:07 +0000496 if (DEBUG)
497 log.trace("Skip wrap to doc {}", this.embeddedDoc);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000498
Nils Diewald83c9b162015-02-03 21:05:07 +0000499 this.wrapDoc = this.wrapSpans.doc();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000500
Nils Diewald83c9b162015-02-03 21:05:07 +0000501 if (wrapDoc == DocIdSetIterator.NO_MORE_DOCS) {
502 this.more = false;
503 this.inSameDoc = false;
504 this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
505 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
506 return false;
507 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000508
509 /*
510 Remove stored information
511 */
512 if (DEBUG)
513 log.trace("Delete all span stores");
514
515 this.spanStore1.clear();
516 this.spanStore2.clear();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000517
Nils Diewaldcd226862015-02-11 22:27:45 +0000518 if (wrapDoc == embeddedDoc) {
519 this.wrapStart = this.wrapSpans.start();
520 this.embeddedStart = this.embeddedSpans.start();
521 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000522 return true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000523 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000524
Nils Diewaldcd226862015-02-11 22:27:45 +0000525 this.wrapStart = -1;
526 this.embeddedStart = -1;
Nils Diewald83c9b162015-02-03 21:05:07 +0000527 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000528
Nils Diewald83c9b162015-02-03 21:05:07 +0000529 // Forward embedInfo
530 else if (this.wrapDoc > this.embeddedDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000531
Nils Diewald83c9b162015-02-03 21:05:07 +0000532 // Set document information
533 if (!this.embeddedSpans.skipTo(this.wrapDoc)) {
534 this.more = false;
535 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000536 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000537 return false;
538 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000539
Nils Diewald83c9b162015-02-03 21:05:07 +0000540 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000541
Nils Diewald83c9b162015-02-03 21:05:07 +0000542 if (this.embeddedDoc == DocIdSetIterator.NO_MORE_DOCS) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000543 this.more = false;
Nils Diewald83c9b162015-02-03 21:05:07 +0000544 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000545 this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
546 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000547 return false;
548 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000549
550 if (DEBUG)
551 log.trace("Skip embedded to doc {}", this.embeddedDoc);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000552
Nils Diewaldcd226862015-02-11 22:27:45 +0000553 this.embeddedStart = this.embeddedSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000554 this.embeddedEnd = -1;
555 this.embeddedPayload = null;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000556
Nils Diewaldcd226862015-02-11 22:27:45 +0000557 if (this.wrapDoc == this.embeddedDoc) {
558 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000559 return true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000560 };
Nils Diewald83c9b162015-02-03 21:05:07 +0000561 }
562 else {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000563 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000564 return false;
565 };
566 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000567
Nils Diewaldcd226862015-02-11 22:27:45 +0000568 this.matchDoc = this.wrapDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000569 return true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000570 };
571
572
573 // Initialize spans
574 private boolean init () throws IOException {
575
Nils Diewald83c9b162015-02-03 21:05:07 +0000576 // There is a missing span
577 if (this.embeddedDoc >= 0)
578 return true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000579
Nils Diewald83c9b162015-02-03 21:05:07 +0000580 if (DEBUG)
581 log.trace("Initialize spans");
Nils Diewald6802acd2014-03-18 18:29:30 +0000582
Nils Diewald83c9b162015-02-03 21:05:07 +0000583 // First tick for both spans
584 if (!(this.embeddedSpans.next() && this.wrapSpans.next())) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000585
Nils Diewald83c9b162015-02-03 21:05:07 +0000586 if (DEBUG)
587 log.trace("No spans initialized");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000588
Nils Diewald83c9b162015-02-03 21:05:07 +0000589 this.embeddedDoc = -1;
590 this.more = false;
591 return false;
592 };
593 this.more = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000594
Nils Diewald83c9b162015-02-03 21:05:07 +0000595 // Store current positions for wrapping and embedded spans
Nils Diewaldbb33da22015-03-04 16:24:25 +0000596 this.wrapDoc = this.wrapSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000597 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000598
Nils Diewald83c9b162015-02-03 21:05:07 +0000599 // Set inSameDoc to true, if it is true
600 if (this.embeddedDoc == this.wrapDoc)
601 this.inSameDoc = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000602
Nils Diewald83c9b162015-02-03 21:05:07 +0000603 return true;
Nils Diewaldf399a672013-11-18 17:55:22 +0000604 };
605
606
Nils Diewaldbb33da22015-03-04 16:24:25 +0000607 /**
608 * Skips to the first match beyond the current, whose document
609 * number is
610 * greater than or equal to <i>target</i>. <p>Returns true iff
611 * there is such
612 * a match. <p>Behaves as if written: <pre class="prettyprint">
613 * boolean skipTo(int target) {
614 * do {
615 * if (!next())
616 * return false;
617 * } while (target > doc());
618 * return true;
619 * }
Nils Diewaldf399a672013-11-18 17:55:22 +0000620 * </pre>
621 * Most implementations are considerably more efficient than that.
622 */
623 public boolean skipTo (int target) throws IOException {
Nils Diewald6802acd2014-03-18 18:29:30 +0000624
Nils Diewald83c9b162015-02-03 21:05:07 +0000625 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000626 log.trace("skipTo document {}/{} -> {}", this.embeddedDoc,
627 this.wrapDoc, target);
Nils Diewaldf399a672013-11-18 17:55:22 +0000628
Nils Diewald83c9b162015-02-03 21:05:07 +0000629 // Initialize spans
630 if (!this.init())
631 return false;
Nils Diewald82a4b862014-02-20 21:17:41 +0000632
Nils Diewaldcd226862015-02-11 22:27:45 +0000633 assert target > this.embeddedDoc;
Nils Diewald82a4b862014-02-20 21:17:41 +0000634
Nils Diewald83c9b162015-02-03 21:05:07 +0000635 // Only forward embedded spans
636 if (this.more && (this.embeddedDoc < target)) {
637 if (this.embeddedSpans.skipTo(target)) {
638 this.inSameDoc = false;
639 this.embeddedStart = -1;
640 this.embeddedEnd = -1;
641 this.embeddedPayload = null;
642 this.embeddedDoc = this.embeddedSpans.doc();
643 }
Nils Diewaldf399a672013-11-18 17:55:22 +0000644
Nils Diewald83c9b162015-02-03 21:05:07 +0000645 // Can't be skipped to target
646 else {
647 this.inSameDoc = false;
648 this.more = false;
649 return false;
650 };
651 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000652
Nils Diewald83c9b162015-02-03 21:05:07 +0000653 // Move to same doc
654 return this.toSameDoc();
Nils Diewaldf399a672013-11-18 17:55:22 +0000655 };
656
Nils Diewaldbb33da22015-03-04 16:24:25 +0000657
Nils Diewald6802acd2014-03-18 18:29:30 +0000658 private void nextSpanA () {
Nils Diewald83c9b162015-02-03 21:05:07 +0000659 if (DEBUG)
660 log.trace("Try wrap next time");
661 this.tryMatch = false;
662 this.nextSpanB = false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000663 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000664
Nils Diewaldbb33da22015-03-04 16:24:25 +0000665
Nils Diewald6802acd2014-03-18 18:29:30 +0000666 private void nextSpanB () {
Nils Diewald83c9b162015-02-03 21:05:07 +0000667 if (DEBUG)
668 log.trace("Try embedded next time");
669 this.nextSpanB = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000670 };
671
672
673 // Check if the current span constellation does match
674 // Store backtracking relevant data and say, how to proceed
Akronc3a5df82016-04-29 16:56:53 +0200675 private boolean doesMatch () throws IOException {
Nils Diewaldcd226862015-02-11 22:27:45 +0000676 if (DEBUG)
677 log.trace("In the match test branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000678
Nils Diewaldcd226862015-02-11 22:27:45 +0000679 if (this.wrapStart == -1)
680 this.wrapStart = this.wrapSpans.start();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000681
Nils Diewaldcd226862015-02-11 22:27:45 +0000682 if (this.embeddedStart == -1) {
683 this.embeddedStart = this.embeddedSpans.start();
Nils Diewaldcd226862015-02-11 22:27:45 +0000684 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000685
Nils Diewaldcd226862015-02-11 22:27:45 +0000686 this.wrapEnd = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000687
Nils Diewaldcd226862015-02-11 22:27:45 +0000688 // Shortcut to prevent lazyloading of .end()
Akrona7b936d2016-03-04 13:40:54 +0100689 // [---
690 // [---
Nils Diewaldcd226862015-02-11 22:27:45 +0000691 if (this.wrapStart > this.embeddedStart) {
692 // Can't match for in, rin, ew, sw, and m
693 // and will always lead to next_b
694 if (flag >= WITHIN) {
695 this.nextSpanB();
696 if (DEBUG)
697 _logCurrentCase((byte) 16);
698 return false;
699 };
700 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000701
Akrona7b936d2016-03-04 13:40:54 +0100702 // [---
703 // [---
Nils Diewaldcd226862015-02-11 22:27:45 +0000704 else if (this.wrapStart < this.embeddedStart) {
705 // Can't match for sw and m and will always
706 // lead to next_a
Akronc3a5df82016-04-29 16:56:53 +0200707
Nils Diewaldcd226862015-02-11 22:27:45 +0000708 if (flag >= STARTSWITH) {
Akronc3a5df82016-04-29 16:56:53 +0200709 if (DEBUG)
710 log.trace("Shortcut for lazy loading");
711
712 this.storeEmbedded();
Nils Diewaldcd226862015-02-11 22:27:45 +0000713 this.nextSpanA();
Akronc12567c2016-06-03 00:40:52 +0200714
Nils Diewaldcd226862015-02-11 22:27:45 +0000715 if (DEBUG)
716 _logCurrentCase((byte) 15);
717 return false;
718 };
719 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000720
Akron63cd32f2016-04-21 17:56:06 +0200721 if (this.embeddedEnd == -1) {
722 this.embeddedEnd = this.embeddedSpans.end();
723 };
724
Nils Diewaldcd226862015-02-11 22:27:45 +0000725 // Now check correctly
726 byte currentCase = this.withinCase();
Nils Diewald6802acd2014-03-18 18:29:30 +0000727
Nils Diewaldcd226862015-02-11 22:27:45 +0000728 if (DEBUG)
729 _logCurrentCase(currentCase);
Nils Diewald6802acd2014-03-18 18:29:30 +0000730
Nils Diewaldcd226862015-02-11 22:27:45 +0000731 boolean match = false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000732
Nils Diewaldcd226862015-02-11 22:27:45 +0000733 // Test case
734 if (currentCase >= (byte) 3 && currentCase <= (byte) 11) {
735 switch (flag) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000736
Nils Diewaldbb33da22015-03-04 16:24:25 +0000737 case WITHIN:
738 if (currentCase >= 6 && currentCase <= 10
739 && currentCase != 8)
740 match = true;
741 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000742
Nils Diewaldbb33da22015-03-04 16:24:25 +0000743 case REAL_WITHIN:
744 if (currentCase == 6 || currentCase == 9
745 || currentCase == 10)
746 match = true;
747 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000748
Nils Diewaldbb33da22015-03-04 16:24:25 +0000749 case MATCH:
750 if (currentCase == 7)
751 match = true;
752 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000753
Nils Diewaldbb33da22015-03-04 16:24:25 +0000754 case STARTSWITH:
755 if (currentCase == 7 || currentCase == 6)
756 match = true;
757 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000758
Nils Diewaldbb33da22015-03-04 16:24:25 +0000759 case ENDSWITH:
760 if (currentCase == 7 || currentCase == 10)
761 match = true;
762 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000763
Nils Diewaldbb33da22015-03-04 16:24:25 +0000764 case OVERLAP:
Nils Diewaldcd226862015-02-11 22:27:45 +0000765 match = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000766 break;
767
768 case REAL_OVERLAP:
769 if (currentCase == 3 || currentCase == 11)
770 match = true;
771 break;
Nils Diewaldcd226862015-02-11 22:27:45 +0000772 };
773 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000774
Nils Diewaldcd226862015-02-11 22:27:45 +0000775 try {
776 this.todo(currentCase);
777 }
778 catch (IOException e) {
779 return false;
780 }
781 return match;
Nils Diewald6802acd2014-03-18 18:29:30 +0000782 };
783
784
785 private void _logCurrentCase (byte currentCase) {
Nils Diewaldcd226862015-02-11 22:27:45 +0000786 log.trace("Current Case is {}", currentCase);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000787
Nils Diewaldcd226862015-02-11 22:27:45 +0000788 String _e = _currentEmbedded().toString();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000789
Nils Diewaldcd226862015-02-11 22:27:45 +0000790 log.trace(" |---| {}", _currentWrap().toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000791
Nils Diewaldcd226862015-02-11 22:27:45 +0000792 switch (currentCase) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000793 case 1:
794 log.trace("|-| {}", _e);
795 break;
796 case 2:
797 log.trace("|---| {}", _e);
798 break;
799 case 3:
800 log.trace(" |---| {}", _e);
801 break;
802 case 4:
803 log.trace(" |-----| {}", _e);
804 break;
805 case 5:
806 log.trace(" |-------| {}", _e);
807 break;
808 case 6:
809 log.trace(" |-| {}", _e);
810 break;
811 case 7:
812 log.trace(" |---| {}", _e);
813 break;
814 case 8:
815 log.trace(" |-----| {}", _e);
816 break;
817 case 9:
818 log.trace(" |-| {}", _e);
819 break;
820 case 10:
821 log.trace(" |-| {}", _e);
822 break;
823 case 11:
824 log.trace(" |---| {}", _e);
825 break;
826 case 12:
827 log.trace(" |-| {}", _e);
828 break;
829 case 13:
830 log.trace(" |-| {}", _e);
831 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000832
Nils Diewaldbb33da22015-03-04 16:24:25 +0000833 case 15:
834 // Fake case
835 log.trace(" |---? {}", _e);
836 break;
837
838 case 16:
839 // Fake case
840 log.trace(" |---? {}", _e);
841 break;
Nils Diewaldcd226862015-02-11 22:27:45 +0000842 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000843 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000844
Nils Diewald6802acd2014-03-18 18:29:30 +0000845
Nils Diewald41750bf2015-02-06 17:45:20 +0000846 private WithinSpan _currentWrap () {
847 WithinSpan _wrap = new WithinSpan();
Eliza Margaretha6f989202016-10-14 21:48:29 +0200848 _wrap.start = this.wrapStart != -1 ? this.wrapStart
849 : this.wrapSpans.start();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000850 _wrap.end = this.wrapEnd != -1 ? this.wrapEnd : this.wrapSpans.end();
851 _wrap.doc = this.wrapDoc != -1 ? this.wrapDoc : this.wrapSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000852 return _wrap;
Nils Diewald6802acd2014-03-18 18:29:30 +0000853 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000854
855
Nils Diewald41750bf2015-02-06 17:45:20 +0000856 private WithinSpan _currentEmbedded () {
857 WithinSpan _embedded = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000858 _embedded.start = this.embeddedStart != -1 ? this.embeddedStart
859 : this.embeddedSpans.start();
860 _embedded.end = this.embeddedEnd != -1 ? this.embeddedEnd
861 : this.embeddedSpans.end();
862 _embedded.doc = this.embeddedDoc != -1 ? this.embeddedDoc
863 : this.embeddedSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000864 return _embedded;
Nils Diewald6802acd2014-03-18 18:29:30 +0000865 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000866
Nils Diewald6802acd2014-03-18 18:29:30 +0000867
868 private void todo (byte currentCase) throws IOException {
Akronc3a5df82016-04-29 16:56:53 +0200869 if (DEBUG) {
870 log.trace("Check what to do next ...");
871 };
872
Nils Diewaldbb33da22015-03-04 16:24:25 +0000873 /*
874 Check what to do next with the spans.
875
876 The different follow up steps are:
877 - storeEmbedded -> store span B for later checks
878 - nextSpanA -> forward a
879 - nextSpanB -> forward b
Eliza Margaretha6f989202016-10-14 21:48:29 +0200880
Nils Diewaldbb33da22015-03-04 16:24:25 +0000881 These rules were automatically generated
882 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000883
Nils Diewaldbb33da22015-03-04 16:24:25 +0000884 // Case 1, 2
885 if (currentCase <= (byte) 2) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000886 this.nextSpanB();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000887 }
888
889 // Case 12, 13
890 else if (currentCase >= (byte) 12) {
891 this.storeEmbedded();
Nils Diewald83c9b162015-02-03 21:05:07 +0000892 this.nextSpanA();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000893 }
894
895 // Case 3, 4, 5, 8
896 else if (currentCase <= (byte) 5 || currentCase == (byte) 8) {
897 if (flag <= 2)
898 this.storeEmbedded();
Nils Diewald83c9b162015-02-03 21:05:07 +0000899 this.nextSpanB();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000900 }
901
902 // Case 11
903 else if (currentCase == (byte) 11) {
904 if (this.flag == REAL_WITHIN) {
905 this.nextSpanB();
906 }
907 else if (this.flag >= STARTSWITH) {
Akronc3a5df82016-04-29 16:56:53 +0200908
909 // TODO: May need storeEmbedded
910
Nils Diewaldbb33da22015-03-04 16:24:25 +0000911 this.nextSpanA();
912 }
913 else {
914 this.storeEmbedded();
915 this.nextSpanB();
916 };
917 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000918
919
Nils Diewaldbb33da22015-03-04 16:24:25 +0000920 // Case 6, 7, 9, 10
921 else {
Nils Diewald6802acd2014-03-18 18:29:30 +0000922
Nils Diewaldbb33da22015-03-04 16:24:25 +0000923 if (
924 // Case 6
925 (currentCase == (byte) 6 && this.flag == MATCH) ||
Nils Diewald6802acd2014-03-18 18:29:30 +0000926
Nils Diewaldbb33da22015-03-04 16:24:25 +0000927 // Case 7
928 (currentCase == (byte) 7 && this.flag == REAL_WITHIN) ||
929
930 // Case 9, 10
931 (currentCase >= (byte) 9 && this.flag >= STARTSWITH)) {
932
Akronc3a5df82016-04-29 16:56:53 +0200933 // TODO: May need storeEmbedded
Nils Diewaldbb33da22015-03-04 16:24:25 +0000934 this.nextSpanA();
935 }
936 else {
937 this.storeEmbedded();
938 this.nextSpanB();
939 };
940 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000941 };
942
Nils Diewaldbb33da22015-03-04 16:24:25 +0000943
Nils Diewald83c9b162015-02-03 21:05:07 +0000944 // Store the current embedded span in the first spanStore
Nils Diewald6802acd2014-03-18 18:29:30 +0000945 private void storeEmbedded () throws IOException {
946
Nils Diewald83c9b162015-02-03 21:05:07 +0000947 // Create a current copy
Nils Diewald41750bf2015-02-06 17:45:20 +0000948 WithinSpan embedded = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000949 embedded.start = this.embeddedStart != -1 ? this.embeddedStart
950 : this.embeddedSpans.start();
951 embedded.end = this.embeddedEnd != -1 ? this.embeddedEnd
952 : this.embeddedSpans.end();
953 embedded.doc = this.embeddedDoc;
Nils Diewald6802acd2014-03-18 18:29:30 +0000954
Nils Diewald83c9b162015-02-03 21:05:07 +0000955 // Copy payloads
956 if (this.embeddedPayload != null) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000957 embedded.payload = new ArrayList<byte[]>(
958 this.embeddedPayload.size());
Nils Diewald83c9b162015-02-03 21:05:07 +0000959 embedded.payload.addAll(this.embeddedPayload);
960 }
961 else if (this.embeddedSpans.isPayloadAvailable()) {
962 embedded.payload = new ArrayList<byte[]>(3);
963 Collection<byte[]> payload = this.embeddedSpans.getPayload();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000964
Nils Diewald83c9b162015-02-03 21:05:07 +0000965 this.embeddedPayload = new ArrayList<byte[]>(payload.size());
966 this.embeddedPayload.addAll(payload);
967 embedded.payload.addAll(payload);
968 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000969
Nils Diewald83c9b162015-02-03 21:05:07 +0000970 this.spanStore1.add(embedded);
Nils Diewald6802acd2014-03-18 18:29:30 +0000971
Nils Diewald83c9b162015-02-03 21:05:07 +0000972 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000973 log.trace("Pushed to spanStore 1 {} (in storeEmbedded)",
974 embedded.toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000975 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000976
Nils Diewald6802acd2014-03-18 18:29:30 +0000977
978 // Return case number
979 private byte withinCase () {
980
Akron63cd32f2016-04-21 17:56:06 +0200981 if (DEBUG) {
Akron6759b042016-04-28 01:25:00 +0200982 log.trace(">>>>>>>>>>>>>> {}-{}|{}-{}", this.wrapStart,
983 this.wrapSpans.end(), this.embeddedStart,
984 this.embeddedSpans.end());
Akron63cd32f2016-04-21 17:56:06 +0200985 };
986
Nils Diewaldcd226862015-02-11 22:27:45 +0000987 // case 1-5
988 if (this.wrapStart > this.embeddedStart) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000989
Nils Diewaldcd226862015-02-11 22:27:45 +0000990 // Case 1
991 // |-|
992 // |-|
993 if (this.wrapStart > this.embeddedEnd) {
994 return (byte) 1;
995 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000996
Nils Diewaldcd226862015-02-11 22:27:45 +0000997 // Case 2
998 // |-|
999 // |-|
1000 else if (this.wrapStart == this.embeddedEnd) {
1001 return (byte) 2;
1002 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001003
Nils Diewaldcd226862015-02-11 22:27:45 +00001004 // Load wrapEnd
1005 this.wrapEnd = this.wrapSpans.end();
Nils Diewaldbb33da22015-03-04 16:24:25 +00001006
Nils Diewaldcd226862015-02-11 22:27:45 +00001007 // Case 3
1008 // |---|
1009 // |---|
1010 if (this.wrapEnd > this.embeddedEnd) {
1011 return (byte) 3;
1012 }
Nils Diewald6802acd2014-03-18 18:29:30 +00001013
Nils Diewaldcd226862015-02-11 22:27:45 +00001014 // Case 4
1015 // |-|
1016 // |---|
1017 else if (this.wrapEnd == this.embeddedEnd) {
1018 return (byte) 4;
1019 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001020
Nils Diewaldcd226862015-02-11 22:27:45 +00001021 // Case 5
1022 // |-|
1023 // |---|
1024 return (byte) 5;
1025 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001026
Nils Diewaldcd226862015-02-11 22:27:45 +00001027 // case 6-8
1028 else if (this.wrapStart == this.embeddedStart) {
Nils Diewald6802acd2014-03-18 18:29:30 +00001029
Nils Diewaldcd226862015-02-11 22:27:45 +00001030 // Load wrapEnd
1031 this.wrapEnd = this.wrapSpans.end();
Akrona7b936d2016-03-04 13:40:54 +01001032 // this.embeddedEnd = this.embeddedSpans.end();
Nils Diewald6802acd2014-03-18 18:29:30 +00001033
Nils Diewaldcd226862015-02-11 22:27:45 +00001034 // Case 6
1035 // |---|
1036 // |-|
1037 if (this.wrapEnd > this.embeddedEnd) {
1038 return (byte) 6;
1039 }
Nils Diewald6802acd2014-03-18 18:29:30 +00001040
Nils Diewaldcd226862015-02-11 22:27:45 +00001041 // Case 7
1042 // |---|
1043 // |---|
1044 else if (this.wrapEnd == this.embeddedEnd) {
1045 return (byte) 7;
1046 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001047
Nils Diewaldcd226862015-02-11 22:27:45 +00001048 // Case 8
1049 // |-|
1050 // |---|
1051 return (byte) 8;
1052 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001053
Nils Diewaldcd226862015-02-11 22:27:45 +00001054 // wrapStart < embeddedStart
Nils Diewaldf399a672013-11-18 17:55:22 +00001055
Nils Diewaldcd226862015-02-11 22:27:45 +00001056 // Load wrapEnd
1057 this.wrapEnd = this.wrapSpans.end();
Nils Diewald6802acd2014-03-18 18:29:30 +00001058
Nils Diewaldcd226862015-02-11 22:27:45 +00001059 // Case 13
1060 // |-|
1061 // |-|
1062 if (this.wrapEnd < this.embeddedStart) {
1063 return (byte) 13;
1064 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001065
Nils Diewaldcd226862015-02-11 22:27:45 +00001066 // Case 9
1067 // |---|
1068 // |-|
1069 else if (this.wrapEnd > this.embeddedEnd) {
1070 return (byte) 9;
1071 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001072
Nils Diewaldcd226862015-02-11 22:27:45 +00001073 // Case 10
1074 // |---|
1075 // |-|
1076 else if (this.wrapEnd == this.embeddedEnd) {
1077 return (byte) 10;
1078 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001079
Nils Diewaldcd226862015-02-11 22:27:45 +00001080 // Case 11
1081 // |---|
1082 // |---|
1083 else if (this.wrapEnd > this.embeddedStart) {
1084 return (byte) 11;
1085 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001086
Nils Diewaldcd226862015-02-11 22:27:45 +00001087 // case 12
1088 // |-|
1089 // |-|
1090 return (byte) 12;
Nils Diewaldbb33da22015-03-04 16:24:25 +00001091 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001092
1093
Nils Diewaldbb33da22015-03-04 16:24:25 +00001094 /**
1095 * Returns the document number of the current match. Initially
1096 * invalid.
1097 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001098 @Override
1099 public int doc () {
Nils Diewaldcd226862015-02-11 22:27:45 +00001100 return matchDoc;
Nils Diewaldf399a672013-11-18 17:55:22 +00001101 };
1102
Nils Diewaldbb33da22015-03-04 16:24:25 +00001103
1104 /**
1105 * Returns the start position of the embedding wrap. Initially
1106 * invalid.
1107 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001108 @Override
1109 public int start () {
Nils Diewaldcd226862015-02-11 22:27:45 +00001110 return matchStart;
Nils Diewaldf399a672013-11-18 17:55:22 +00001111 };
1112
Nils Diewaldbb33da22015-03-04 16:24:25 +00001113
1114 /**
1115 * Returns the end position of the embedding wrap. Initially
1116 * invalid.
1117 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001118 @Override
1119 public int end () {
Nils Diewaldbb33da22015-03-04 16:24:25 +00001120 return matchEnd;
Nils Diewaldf399a672013-11-18 17:55:22 +00001121 };
1122
Nils Diewaldbb33da22015-03-04 16:24:25 +00001123
Nils Diewaldf399a672013-11-18 17:55:22 +00001124 /**
1125 * Returns the payload data for the current span.
1126 * This is invalid until {@link #next()} is called for
1127 * the first time.
1128 * This method must not be called more than once after each call
1129 * of {@link #next()}. However, most payloads are loaded lazily,
1130 * so if the payload data for the current position is not needed,
Nils Diewaldbb33da22015-03-04 16:24:25 +00001131 * this method may not be called at all for performance reasons.
1132 * An ordered
1133 * SpanQuery does not lazy load, so if you have payloads in your
1134 * index and
1135 * you do not want ordered SpanNearQuerys to collect payloads, you
1136 * can
Nils Diewalde0725012014-09-25 19:32:52 +00001137 * disable collection with a constructor option.<br>
Nils Diewaldf399a672013-11-18 17:55:22 +00001138 * <br>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001139 * Note that the return type is a collection, thus the ordering
1140 * should not be relied upon.
Nils Diewaldf399a672013-11-18 17:55:22 +00001141 * <br/>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001142 *
Nils Diewaldf399a672013-11-18 17:55:22 +00001143 * @lucene.experimental
Nils Diewaldbb33da22015-03-04 16:24:25 +00001144 *
1145 * @return a List of byte arrays containing the data of this
1146 * payload, otherwise null if isPayloadAvailable is false
1147 * @throws IOException
1148 * if there is a low-level I/O error
Nils Diewaldf399a672013-11-18 17:55:22 +00001149 */
1150 // public abstract Collection<byte[]> getPayload() throws IOException;
1151 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001152 public Collection<byte[]> getPayload () throws IOException {
1153 return matchPayload;
Nils Diewaldf399a672013-11-18 17:55:22 +00001154 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001155
Nils Diewaldf399a672013-11-18 17:55:22 +00001156
1157 /**
1158 * Checks if a payload can be loaded at this position.
1159 * <p/>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001160 * Payloads can only be loaded once per call to {@link #next()}.
1161 *
1162 * @return true if there is a payload available at this position
1163 * that can be loaded
Nils Diewaldf399a672013-11-18 17:55:22 +00001164 */
1165 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001166 public boolean isPayloadAvailable () {
1167 return matchPayload.isEmpty() == false;
Nils Diewaldf399a672013-11-18 17:55:22 +00001168 };
1169
Nils Diewaldbb33da22015-03-04 16:24:25 +00001170
Nils Diewaldf399a672013-11-18 17:55:22 +00001171 // Todo: This may be in the wrong version
1172 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001173 public long cost () {
1174 return wrapSpans.cost() + embeddedSpans.cost();
Nils Diewaldf399a672013-11-18 17:55:22 +00001175 };
1176
Nils Diewaldbb33da22015-03-04 16:24:25 +00001177
Nils Diewaldf399a672013-11-18 17:55:22 +00001178 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001179 public String toString () {
Eliza Margaretha6f989202016-10-14 21:48:29 +02001180 return getClass().getName() + "(" + query.toString() + ")@"
1181 + (embeddedDoc <= 0 ? "START"
1182 : (more ? (doc() + ":" + start() + "-" + end())
1183 : "END"));
Nils Diewaldf399a672013-11-18 17:55:22 +00001184 };
Nils Diewald41750bf2015-02-06 17:45:20 +00001185
1186
1187 // This was formerly the default candidate span class,
1188 // before it was refactored out
Nils Diewaldbb33da22015-03-04 16:24:25 +00001189 private class WithinSpan implements Comparable<WithinSpan>, Cloneable {
1190 public int start = -1, end = -1, doc = -1;
Nils Diewald41750bf2015-02-06 17:45:20 +00001191
1192 public Collection<byte[]> payload;
1193
1194 public short elementRef = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +00001195
Akron6759b042016-04-28 01:25:00 +02001196
Nils Diewald41750bf2015-02-06 17:45:20 +00001197 public void clear () {
1198 this.start = -1;
1199 this.end = -1;
1200 this.doc = -1;
1201 clearPayload();
1202 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001203
1204
Nils Diewald41750bf2015-02-06 17:45:20 +00001205 @Override
1206 public int compareTo (WithinSpan o) {
1207 /* optimizable for short numbers to return o.end - this.end */
1208 if (this.doc < o.doc) {
1209 return -1;
1210 }
1211 else if (this.doc == o.doc) {
1212 if (this.start < o.start) {
1213 return -1;
1214 }
1215 else if (this.start == o.start) {
1216 if (this.end < o.end)
1217 return -1;
1218 };
1219 };
1220 return 1;
1221 };
1222
Nils Diewaldbb33da22015-03-04 16:24:25 +00001223
1224 public short getElementRef () {
Nils Diewald41750bf2015-02-06 17:45:20 +00001225 return elementRef;
1226 }
1227
Nils Diewaldbb33da22015-03-04 16:24:25 +00001228
1229 public void setElementRef (short elementRef) {
Nils Diewald41750bf2015-02-06 17:45:20 +00001230 this.elementRef = elementRef;
1231 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001232
1233
Nils Diewald41750bf2015-02-06 17:45:20 +00001234 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001235 public Object clone () {
Nils Diewald41750bf2015-02-06 17:45:20 +00001236 WithinSpan span = new WithinSpan();
1237 span.start = this.start;
1238 span.end = this.end;
1239 span.doc = this.doc;
1240 span.payload.addAll(this.payload);
1241 return span;
1242 };
1243
Nils Diewaldbb33da22015-03-04 16:24:25 +00001244
Nils Diewald41750bf2015-02-06 17:45:20 +00001245 public WithinSpan copyFrom (WithinSpan o) {
1246 this.start = o.start;
1247 this.end = o.end;
1248 this.doc = o.doc;
1249 // this.clearPayload();
1250 this.payload.addAll(o.payload);
1251 return this;
1252 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001253
1254
Nils Diewald41750bf2015-02-06 17:45:20 +00001255 public void clearPayload () {
1256 if (this.payload != null)
1257 this.payload.clear();
1258 };
1259
Nils Diewaldbb33da22015-03-04 16:24:25 +00001260
Nils Diewald41750bf2015-02-06 17:45:20 +00001261 public String toString () {
1262 StringBuilder sb = new StringBuilder("[");
Nils Diewaldbb33da22015-03-04 16:24:25 +00001263 return sb.append(this.start).append('-').append(this.end)
1264 .append('(').append(this.doc).append(')').append(']')
1265 .toString();
Nils Diewald41750bf2015-02-06 17:45:20 +00001266 };
1267 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001268};