blob: 92347163c36a0e2b7a530a39009acfc515d6e6a2 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap.query.spans;
2
Eliza Margaretha22898982014-11-04 17:10:21 +00003import java.io.IOException;
4import java.util.ArrayList;
5import java.util.Collection;
6import java.util.LinkedList;
7import java.util.Map;
8
Akron700c1eb2015-09-25 16:57:30 +02009import org.apache.lucene.index.LeafReaderContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000010import org.apache.lucene.index.Term;
11import org.apache.lucene.index.TermContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000012import org.apache.lucene.search.DocIdSetIterator;
Eliza Margaretha22898982014-11-04 17:10:21 +000013import org.apache.lucene.search.spans.Spans;
14import org.apache.lucene.util.Bits;
Nils Diewaldf399a672013-11-18 17:55:22 +000015import org.slf4j.Logger;
16import org.slf4j.LoggerFactory;
17
Eliza Margaretha22898982014-11-04 17:10:21 +000018import de.ids_mannheim.korap.query.SpanWithinQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000019
Nils Diewald83c9b162015-02-03 21:05:07 +000020
21/**
22 * SpanWithinQuery is DEPRECATED and will
23 * be replaced by SpanPositionQuery in the near future
Nils Diewaldcb8afb32015-02-04 21:12:37 +000024 *
25 * TODO: Support exclusivity
26 * TODO: Use the term "queue" and implement it similar to SpanOrQuery
Akrona7b936d2016-03-04 13:40:54 +010027 * TODO: Implement a incrStartPos() method to forward an embedded span
28 * until the start position is higher than the current start position.
Nils Diewald83c9b162015-02-03 21:05:07 +000029 */
30
Nils Diewald82a4b862014-02-20 21:17:41 +000031/**
Nils Diewald1455e1e2014-08-01 16:12:43 +000032 * Compare two spans and check how they relate positionally.
Nils Diewaldbb33da22015-03-04 16:24:25 +000033 *
Nils Diewald44d5fa12015-01-15 21:31:52 +000034 * @author diewald
Nils Diewald82a4b862014-02-20 21:17:41 +000035 */
Nils Diewaldf399a672013-11-18 17:55:22 +000036public class WithinSpans extends Spans {
Nils Diewald82a4b862014-02-20 21:17:41 +000037
Nils Diewald6802acd2014-03-18 18:29:30 +000038 // Logger
39 private final Logger log = LoggerFactory.getLogger(WithinSpans.class);
Nils Diewald1455e1e2014-08-01 16:12:43 +000040
Nils Diewald82a4b862014-02-20 21:17:41 +000041 // This advices the java compiler to ignore all loggings
42 public static final boolean DEBUG = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +000043
Nils Diewaldf399a672013-11-18 17:55:22 +000044 private boolean more = false;
45
Nils Diewald6802acd2014-03-18 18:29:30 +000046 // Boolean value indicating if span B
47 // should be forwarded next (true)
48 // or span A (false);
49 boolean nextSpanB = true;
Nils Diewald82a4b862014-02-20 21:17:41 +000050
Nils Diewaldbb33da22015-03-04 16:24:25 +000051 private int wrapStart = -1, wrapEnd = -1, embeddedStart = -1,
52 embeddedEnd = -1, wrapDoc = -1, embeddedDoc = -1, matchDoc = -1,
53 matchStart = -1, matchEnd = -1;
54
Nils Diewald6802acd2014-03-18 18:29:30 +000055 private Collection<byte[]> matchPayload;
Nils Diewaldf399a672013-11-18 17:55:22 +000056 private Collection<byte[]> embeddedPayload;
Nils Diewaldbb33da22015-03-04 16:24:25 +000057
Nils Diewald6802acd2014-03-18 18:29:30 +000058 // Indicates that the wrap and the embedded spans are in the same doc
59 private boolean inSameDoc = false;
Nils Diewaldf399a672013-11-18 17:55:22 +000060
Nils Diewald6802acd2014-03-18 18:29:30 +000061 /*
62 Supported flags are currently:
Nils Diewaldcb8afb32015-02-04 21:12:37 +000063 ov -> 0 | overlap: A & B != empty
64 rov -> 2 | real overlap: A & B != empty and
65 ((A | B) != A or
Nils Diewald6802acd2014-03-18 18:29:30 +000066 (A | B) != B)
Nils Diewaldcb8afb32015-02-04 21:12:37 +000067 in -> 4 | within: A | B = A
68 rin -> 6 | real within: A | B = A and
69 A & B != A
70 ew -> 8 | endswith: A | B = A and
71 A.start = B.start
72 sw -> 10 | startswith: A | B = A and
73 A.end = B.end
74 m -> 12 | A = B
75 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000076 public static final byte OVERLAP = (byte) 0, REAL_OVERLAP = (byte) 2,
77 WITHIN = (byte) 4, REAL_WITHIN = (byte) 6, ENDSWITH = (byte) 8,
78 STARTSWITH = (byte) 10, MATCH = (byte) 12;
Nils Diewaldf399a672013-11-18 17:55:22 +000079
Nils Diewald6802acd2014-03-18 18:29:30 +000080 private byte flag;
Nils Diewaldf399a672013-11-18 17:55:22 +000081
Nils Diewald1455e1e2014-08-01 16:12:43 +000082 // Contains the query
Nils Diewaldf399a672013-11-18 17:55:22 +000083 private SpanWithinQuery query;
84
Nils Diewaldcb8afb32015-02-04 21:12:37 +000085 // Representing the first operand
86 private final Spans wrapSpans;
Nils Diewaldf399a672013-11-18 17:55:22 +000087
Nils Diewaldcb8afb32015-02-04 21:12:37 +000088 // Representing the second operand
89 private final Spans embeddedSpans;
90
91 // Check flag if the current constellation
92 // was checked yet
Nils Diewald6802acd2014-03-18 18:29:30 +000093 private boolean tryMatch = true;
Nils Diewaldf399a672013-11-18 17:55:22 +000094
Nils Diewald01ff7af2015-02-04 22:54:26 +000095 // Two buffers for storing candidates
Nils Diewaldbb33da22015-03-04 16:24:25 +000096 private LinkedList<WithinSpan> spanStore1, spanStore2;
97
Nils Diewaldf399a672013-11-18 17:55:22 +000098
Nils Diewald01ff7af2015-02-04 22:54:26 +000099 /**
100 * Construct a new WithinSpans object.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000101 *
102 * @param spanWithinQuery
103 * The parental {@link SpanWithinQuery}.
104 * @param context
Akron700c1eb2015-09-25 16:57:30 +0200105 * The {@link LeafReaderContext}.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000106 * @param acceptDocs
107 * Bit vector representing the documents
108 * to be searched in.
109 * @param termContexts
110 * A map managing {@link TermState TermStates}.
111 * @param flag
112 * A byte flag indicating the positional condition of
113 * the sub spans.
Nils Diewald01ff7af2015-02-04 22:54:26 +0000114 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000115 public WithinSpans (SpanWithinQuery spanWithinQuery,
Akron700c1eb2015-09-25 16:57:30 +0200116 LeafReaderContext context, Bits acceptDocs,
Nils Diewaldbb33da22015-03-04 16:24:25 +0000117 Map<Term, TermContext> termContexts, byte flag)
118 throws IOException {
Nils Diewaldf399a672013-11-18 17:55:22 +0000119
Nils Diewald83c9b162015-02-03 21:05:07 +0000120 if (DEBUG)
121 log.trace("Construct WithinSpans");
Nils Diewaldf399a672013-11-18 17:55:22 +0000122
Nils Diewald83c9b162015-02-03 21:05:07 +0000123 // Init copies
124 this.matchPayload = new LinkedList<byte[]>();
Nils Diewaldf399a672013-11-18 17:55:22 +0000125
Nils Diewald83c9b162015-02-03 21:05:07 +0000126 // Get spans
Nils Diewaldbb33da22015-03-04 16:24:25 +0000127 this.wrapSpans = spanWithinQuery.wrap().getSpans(context, acceptDocs,
128 termContexts);
129 this.embeddedSpans = spanWithinQuery.embedded().getSpans(context,
130 acceptDocs, termContexts);
Nils Diewaldf399a672013-11-18 17:55:22 +0000131
Nils Diewald83c9b162015-02-03 21:05:07 +0000132 this.flag = flag;
Nils Diewaldf399a672013-11-18 17:55:22 +0000133
Nils Diewald83c9b162015-02-03 21:05:07 +0000134 // SpanStores for backtracking
Nils Diewald41750bf2015-02-06 17:45:20 +0000135 this.spanStore1 = new LinkedList<WithinSpan>();
136 this.spanStore2 = new LinkedList<WithinSpan>();
Nils Diewald6802acd2014-03-18 18:29:30 +0000137
Nils Diewald83c9b162015-02-03 21:05:07 +0000138 // kept for toString() only.
139 this.query = spanWithinQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +0000140 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000141
Nils Diewaldf399a672013-11-18 17:55:22 +0000142
Nils Diewald6802acd2014-03-18 18:29:30 +0000143 // Move to next match, returning true iff any such exists.
Nils Diewaldf399a672013-11-18 17:55:22 +0000144 @Override
145 public boolean next () throws IOException {
Nils Diewald6802acd2014-03-18 18:29:30 +0000146
Nils Diewald83c9b162015-02-03 21:05:07 +0000147 if (DEBUG)
148 log.trace("Next with docs {}, {}", wrapDoc, embeddedDoc);
Nils Diewaldf399a672013-11-18 17:55:22 +0000149
Nils Diewald83c9b162015-02-03 21:05:07 +0000150 // Initialize spans
151 if (!this.init()) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000152 this.more = false;
153 this.inSameDoc = false;
154 this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000155 this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000156 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000157 return false;
158 };
Nils Diewald82a4b862014-02-20 21:17:41 +0000159
Nils Diewald83c9b162015-02-03 21:05:07 +0000160 // There are more spans and they are in the same document
Nils Diewald6802acd2014-03-18 18:29:30 +0000161
Nils Diewald83c9b162015-02-03 21:05:07 +0000162 while (this.more && (wrapDoc == embeddedDoc ||
Nils Diewaldbb33da22015-03-04 16:24:25 +0000163 // this.inSameDoc ||
164 this.toSameDoc())) {
Akronc3a5df82016-04-29 16:56:53 +0200165
Nils Diewald83c9b162015-02-03 21:05:07 +0000166 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000167 log.trace("We are in the same doc: {}, {}", wrapDoc,
168 embeddedDoc);
Nils Diewald6802acd2014-03-18 18:29:30 +0000169
Nils Diewald83c9b162015-02-03 21:05:07 +0000170 // Both spans match according to the flag
171 // Silently the next operations are prepared
172 if (this.tryMatch && this.doesMatch()) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000173
Nils Diewald83c9b162015-02-03 21:05:07 +0000174 if (this.wrapEnd == -1)
175 this.wrapEnd = this.wrapSpans.end();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000176
177 this.matchStart = embeddedStart < wrapStart ? embeddedStart
178 : wrapStart;
179 this.matchEnd = embeddedEnd > wrapEnd ? embeddedEnd : wrapEnd;
180 this.matchDoc = embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000181 this.matchPayload.clear();
Nils Diewald82a4b862014-02-20 21:17:41 +0000182
Nils Diewald83c9b162015-02-03 21:05:07 +0000183 if (this.embeddedPayload != null)
184 matchPayload.addAll(embeddedPayload);
Nils Diewaldf399a672013-11-18 17:55:22 +0000185
Nils Diewald83c9b162015-02-03 21:05:07 +0000186 if (this.wrapSpans.isPayloadAvailable())
187 this.matchPayload.addAll(wrapSpans.getPayload());
Nils Diewaldf399a672013-11-18 17:55:22 +0000188
Nils Diewald83c9b162015-02-03 21:05:07 +0000189 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000190 log.trace(" ---- MATCH ---- {}-{} ({})", matchStart,
191 matchEnd, matchDoc);
Nils Diewald6802acd2014-03-18 18:29:30 +0000192
Nils Diewald83c9b162015-02-03 21:05:07 +0000193 this.tryMatch = false;
194 return true;
195 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000196
Nils Diewald83c9b162015-02-03 21:05:07 +0000197 // Get next embedded
198 else if (this.nextSpanB) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000199
Nils Diewald83c9b162015-02-03 21:05:07 +0000200 // Next time try the match
201 this.tryMatch = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000202
Nils Diewald83c9b162015-02-03 21:05:07 +0000203 if (DEBUG)
204 log.trace("In the next embedded branch");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000205
Nils Diewald41750bf2015-02-06 17:45:20 +0000206 WithinSpan current = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000207
Nils Diewald83c9b162015-02-03 21:05:07 +0000208 // New - fetch until theres a span in the correct doc or bigger
209 while (!this.spanStore2.isEmpty()) {
210 current = spanStore2.removeFirst();
211 if (current.doc >= this.wrapDoc)
212 break;
213 };
214
215
216 // There is nothing in the second store
217 if (current == null) {
218 if (DEBUG)
219 log.trace("SpanStore 2 is empty");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000220
Nils Diewald83c9b162015-02-03 21:05:07 +0000221 // Forward with embedding
222 if (!this.embeddedSpans.next()) {
Akronc3a5df82016-04-29 16:56:53 +0200223
224 // TODO: May need storeEmpdedded
Nils Diewald83c9b162015-02-03 21:05:07 +0000225 this.nextSpanA();
226 continue;
227 }
228
229 else if (DEBUG) {
230 log.trace("Fetch next embedded span");
231 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000232
Nils Diewaldcd226862015-02-11 22:27:45 +0000233 this.embeddedStart = this.embeddedSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000234 this.embeddedEnd = -1;
235 this.embeddedPayload = null;
236 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000237
Nils Diewald83c9b162015-02-03 21:05:07 +0000238 if (this.embeddedDoc != this.wrapDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000239
Nils Diewald83c9b162015-02-03 21:05:07 +0000240 if (DEBUG) {
Akronc12567c2016-06-03 00:40:52 +0200241 log.trace(
242 "(A) Embedded span is in a new document {}",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000243 _currentEmbedded().toString());
Nils Diewald83c9b162015-02-03 21:05:07 +0000244 log.trace("Reset current embedded doc");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000245 };
246
Nils Diewald83c9b162015-02-03 21:05:07 +0000247 /*
248 if (DEBUG)
249 log.trace("Clear all span stores");
250 this.spanStore1.clear();
251 this.spanStore2.clear();
252 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000253
Nils Diewald83c9b162015-02-03 21:05:07 +0000254 this.storeEmbedded();
255
256 // That is necessary to backtrack to the last document!
257 this.inSameDoc = true;
258 this.embeddedDoc = wrapDoc;
259 // this.tryMatch = false; // already covered in nextSpanA
260
261 this.nextSpanA();
262 continue;
263 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000264
Nils Diewald83c9b162015-02-03 21:05:07 +0000265 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000266 log.trace(" Forward embedded span to {}",
267 _currentEmbedded().toString());
268
Nils Diewald83c9b162015-02-03 21:05:07 +0000269 if (this.embeddedDoc != this.wrapDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000270
Akronc3a5df82016-04-29 16:56:53 +0200271 if (DEBUG) {
Akronc12567c2016-06-03 00:40:52 +0200272 log.trace(
273 "(B) Embedded span is in a new document {}",
Akronc3a5df82016-04-29 16:56:53 +0200274 _currentEmbedded().toString());
275 log.trace("Reset current embedded doc");
276 };
277
Nils Diewald83c9b162015-02-03 21:05:07 +0000278 // Is this always a good idea?
279 /*
280 this.spanStore1.clear();
281 this.spanStore2.clear();
282 */
283
284 this.embeddedStart = -1;
285 this.embeddedEnd = -1;
286 this.embeddedPayload = null;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000287
Nils Diewald83c9b162015-02-03 21:05:07 +0000288 if (!this.toSameDoc()) {
289 this.more = false;
290 this.inSameDoc = false;
291 return false;
292 };
293 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000294
Nils Diewald83c9b162015-02-03 21:05:07 +0000295 this.more = true;
296 this.inSameDoc = true;
297 this.tryMatch = true;
Akron6759b042016-04-28 01:25:00 +0200298
Nils Diewald83c9b162015-02-03 21:05:07 +0000299 this.nextSpanB();
300 continue;
301 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000302
Nils Diewald83c9b162015-02-03 21:05:07 +0000303 // Fetch from second store?
304 else {
Akrona7b936d2016-03-04 13:40:54 +0100305
Nils Diewald83c9b162015-02-03 21:05:07 +0000306 /** TODO: Change this to a single embedded object! */
307 this.embeddedStart = current.start;
308 this.embeddedEnd = current.end;
309 this.embeddedDoc = current.doc;
Nils Diewald6802acd2014-03-18 18:29:30 +0000310
Nils Diewald83c9b162015-02-03 21:05:07 +0000311 if (current.payload != null) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000312 this.embeddedPayload = new ArrayList<byte[]>(
313 current.payload.size());
Nils Diewald83c9b162015-02-03 21:05:07 +0000314 this.embeddedPayload.addAll(current.payload);
315 }
316 else {
317 this.embeddedPayload = null;
318 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000319
Nils Diewald83c9b162015-02-03 21:05:07 +0000320 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000321 log.trace("Fetch current from SpanStore 2: {}",
322 current.toString());
323
Nils Diewald83c9b162015-02-03 21:05:07 +0000324 this.tryMatch = true;
325 };
326 continue;
327 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000328
Nils Diewald83c9b162015-02-03 21:05:07 +0000329 // get next wrap
330 if (DEBUG)
331 log.trace("In the next wrap branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000332
Nils Diewald83c9b162015-02-03 21:05:07 +0000333 this.tryMatch = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000334
Nils Diewald83c9b162015-02-03 21:05:07 +0000335 if (DEBUG)
336 log.trace("Try next wrap");
Nils Diewald6802acd2014-03-18 18:29:30 +0000337
Nils Diewald83c9b162015-02-03 21:05:07 +0000338 // shift the stored spans
339 if (!this.spanStore1.isEmpty()) {
340 if (DEBUG) {
341 log.trace("Move everything from SpanStore 1 to SpanStore 2:");
Nils Diewald41750bf2015-02-06 17:45:20 +0000342 for (WithinSpan i : this.spanStore1) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000343 log.trace(" | {}", i.toString());
344 };
345 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000346
Nils Diewald83c9b162015-02-03 21:05:07 +0000347 // Move everything to spanStore2
Nils Diewaldbb33da22015-03-04 16:24:25 +0000348 this.spanStore2.addAll(0,
349 (LinkedList<WithinSpan>) this.spanStore1.clone());
Nils Diewald83c9b162015-02-03 21:05:07 +0000350 this.spanStore1.clear();
Nils Diewald6802acd2014-03-18 18:29:30 +0000351
Nils Diewald83c9b162015-02-03 21:05:07 +0000352 if (DEBUG) {
353 log.trace("SpanStore 2 now is:");
Nils Diewald41750bf2015-02-06 17:45:20 +0000354 for (WithinSpan i : this.spanStore2) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000355 log.trace(" | {}", i.toString());
356 };
357 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000358
Nils Diewald83c9b162015-02-03 21:05:07 +0000359 }
360 else if (DEBUG) {
361 log.trace("spanStore 1 is empty");
362 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000363
Nils Diewald83c9b162015-02-03 21:05:07 +0000364 // Get next wrap
365 if (this.wrapSpans.next()) {
Nils Diewald82a4b862014-02-20 21:17:41 +0000366
Nils Diewald83c9b162015-02-03 21:05:07 +0000367 // Reset wrapping information
Nils Diewaldcd226862015-02-11 22:27:45 +0000368 this.wrapStart = this.wrapSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000369 this.wrapEnd = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000370
Nils Diewald83c9b162015-02-03 21:05:07 +0000371 // Retrieve doc information
372 this.wrapDoc = this.wrapSpans.doc();
Nils Diewald20607ab2014-03-20 23:28:36 +0000373
Nils Diewald83c9b162015-02-03 21:05:07 +0000374 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000375 log.trace(" Forward wrap span to {}", _currentWrap()
376 .toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000377
Nils Diewald83c9b162015-02-03 21:05:07 +0000378 if (this.embeddedDoc != this.wrapDoc) {
379 if (DEBUG)
380 log.trace("Delete all span stores");
381 this.spanStore1.clear();
382 this.spanStore2.clear();
Nils Diewald6802acd2014-03-18 18:29:30 +0000383
Nils Diewald83c9b162015-02-03 21:05:07 +0000384 // Reset embedded:
385 this.embeddedStart = -1;
386 this.embeddedEnd = -1;
387 this.embeddedPayload = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000388
Nils Diewald83c9b162015-02-03 21:05:07 +0000389 if (!this.toSameDoc()) {
390 this.inSameDoc = false;
391 this.more = false;
392 return false;
393 };
394 }
395 else {
396 this.inSameDoc = true;
397 // Do not match with the current state
398 this.tryMatch = false;
399 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000400
Nils Diewald83c9b162015-02-03 21:05:07 +0000401 this.nextSpanB();
402 continue;
403 }
404 this.more = false;
405 this.inSameDoc = false;
406 this.spanStore1.clear();
407 this.spanStore2.clear();
408 return false;
409 };
Nils Diewald82a4b862014-02-20 21:17:41 +0000410
Nils Diewald83c9b162015-02-03 21:05:07 +0000411 // No more matches
412 return false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000413 };
414
415
416 /**
417 * Skip to the next document
418 */
419 private boolean toSameDoc () throws IOException {
Nils Diewald82a4b862014-02-20 21:17:41 +0000420
Nils Diewald83c9b162015-02-03 21:05:07 +0000421 if (DEBUG)
422 log.trace("Forward to find same docs");
Nils Diewald6802acd2014-03-18 18:29:30 +0000423
Nils Diewald83c9b162015-02-03 21:05:07 +0000424 /*
425 if (this.embeddedSpans == null) {
426 this.more = false;
427 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
428 this.inSameDoc = false;
429 return false;
430 };
Akron6759b042016-04-28 01:25:00 +0200431 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000432
Nils Diewald83c9b162015-02-03 21:05:07 +0000433 this.more = true;
434 this.inSameDoc = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000435
Nils Diewaldbb33da22015-03-04 16:24:25 +0000436 this.wrapDoc = this.wrapSpans.doc();
Akron40f51ee2016-04-22 17:55:14 +0200437
438 // Last doc was reached
439 if (this.wrapDoc == DocIdSetIterator.NO_MORE_DOCS) {
440 this.more = false;
441 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
442 this.inSameDoc = false;
443 return false;
444 };
445
446 // This is just a workaround for an issue that seems to be a bug in Lucene's core code.
447 try {
448 this.embeddedDoc = this.embeddedSpans.doc();
449 }
450 catch (NullPointerException e) {
451 this.more = false;
452 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
453 this.inSameDoc = false;
454 return false;
455 };
Akron6759b042016-04-28 01:25:00 +0200456
Nils Diewald6802acd2014-03-18 18:29:30 +0000457
Nils Diewald83c9b162015-02-03 21:05:07 +0000458 // Clear all spanStores
459 if (this.wrapDoc != this.embeddedDoc) {
460 /*
461 if (DEBUG)
462 log.trace("Clear all spanStores when moving forward");
463 // Why??
464 this.spanStore1.clear();
465 this.spanStore2.clear();
466 */
467 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000468
Nils Diewald83c9b162015-02-03 21:05:07 +0000469 else {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000470 if (DEBUG) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000471 log.trace("Current position already is in the same doc");
472 log.trace("Embedded: {}", _currentEmbedded().toString());
473 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000474 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000475 return true;
476 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000477
Nils Diewaldcd226862015-02-11 22:27:45 +0000478
Nils Diewald83c9b162015-02-03 21:05:07 +0000479 // Forward till match
480 while (this.wrapDoc != this.embeddedDoc) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000481
Nils Diewald83c9b162015-02-03 21:05:07 +0000482 // Forward wrapInfo
483 if (this.wrapDoc < this.embeddedDoc) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000484
Nils Diewald83c9b162015-02-03 21:05:07 +0000485 // Set document information
486 if (!wrapSpans.skipTo(this.embeddedDoc)) {
487 this.more = false;
488 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000489 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000490 return false;
491 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000492
Nils Diewald83c9b162015-02-03 21:05:07 +0000493 if (DEBUG)
494 log.trace("Skip wrap to doc {}", this.embeddedDoc);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000495
Nils Diewald83c9b162015-02-03 21:05:07 +0000496 this.wrapDoc = this.wrapSpans.doc();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000497
Nils Diewald83c9b162015-02-03 21:05:07 +0000498 if (wrapDoc == DocIdSetIterator.NO_MORE_DOCS) {
499 this.more = false;
500 this.inSameDoc = false;
501 this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
502 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
503 return false;
504 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000505
506 /*
507 Remove stored information
508 */
509 if (DEBUG)
510 log.trace("Delete all span stores");
511
512 this.spanStore1.clear();
513 this.spanStore2.clear();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000514
Nils Diewaldcd226862015-02-11 22:27:45 +0000515 if (wrapDoc == embeddedDoc) {
516 this.wrapStart = this.wrapSpans.start();
517 this.embeddedStart = this.embeddedSpans.start();
518 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000519 return true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000520 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000521
Nils Diewaldcd226862015-02-11 22:27:45 +0000522 this.wrapStart = -1;
523 this.embeddedStart = -1;
Nils Diewald83c9b162015-02-03 21:05:07 +0000524 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000525
Nils Diewald83c9b162015-02-03 21:05:07 +0000526 // Forward embedInfo
527 else if (this.wrapDoc > this.embeddedDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000528
Nils Diewald83c9b162015-02-03 21:05:07 +0000529 // Set document information
530 if (!this.embeddedSpans.skipTo(this.wrapDoc)) {
531 this.more = false;
532 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000533 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000534 return false;
535 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000536
Nils Diewald83c9b162015-02-03 21:05:07 +0000537 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000538
Nils Diewald83c9b162015-02-03 21:05:07 +0000539 if (this.embeddedDoc == DocIdSetIterator.NO_MORE_DOCS) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000540 this.more = false;
Nils Diewald83c9b162015-02-03 21:05:07 +0000541 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000542 this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
543 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000544 return false;
545 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000546
547 if (DEBUG)
548 log.trace("Skip embedded to doc {}", this.embeddedDoc);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000549
Nils Diewaldcd226862015-02-11 22:27:45 +0000550 this.embeddedStart = this.embeddedSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000551 this.embeddedEnd = -1;
552 this.embeddedPayload = null;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000553
Nils Diewaldcd226862015-02-11 22:27:45 +0000554 if (this.wrapDoc == this.embeddedDoc) {
555 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000556 return true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000557 };
Nils Diewald83c9b162015-02-03 21:05:07 +0000558 }
559 else {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000560 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000561 return false;
562 };
563 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000564
Nils Diewaldcd226862015-02-11 22:27:45 +0000565 this.matchDoc = this.wrapDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000566 return true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000567 };
568
569
570 // Initialize spans
571 private boolean init () throws IOException {
572
Nils Diewald83c9b162015-02-03 21:05:07 +0000573 // There is a missing span
574 if (this.embeddedDoc >= 0)
575 return true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000576
Nils Diewald83c9b162015-02-03 21:05:07 +0000577 if (DEBUG)
578 log.trace("Initialize spans");
Nils Diewald6802acd2014-03-18 18:29:30 +0000579
Nils Diewald83c9b162015-02-03 21:05:07 +0000580 // First tick for both spans
581 if (!(this.embeddedSpans.next() && this.wrapSpans.next())) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000582
Nils Diewald83c9b162015-02-03 21:05:07 +0000583 if (DEBUG)
584 log.trace("No spans initialized");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000585
Nils Diewald83c9b162015-02-03 21:05:07 +0000586 this.embeddedDoc = -1;
587 this.more = false;
588 return false;
589 };
590 this.more = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000591
Nils Diewald83c9b162015-02-03 21:05:07 +0000592 // Store current positions for wrapping and embedded spans
Nils Diewaldbb33da22015-03-04 16:24:25 +0000593 this.wrapDoc = this.wrapSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000594 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000595
Nils Diewald83c9b162015-02-03 21:05:07 +0000596 // Set inSameDoc to true, if it is true
597 if (this.embeddedDoc == this.wrapDoc)
598 this.inSameDoc = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000599
Nils Diewald83c9b162015-02-03 21:05:07 +0000600 return true;
Nils Diewaldf399a672013-11-18 17:55:22 +0000601 };
602
603
Nils Diewaldbb33da22015-03-04 16:24:25 +0000604 /**
605 * Skips to the first match beyond the current, whose document
606 * number is
607 * greater than or equal to <i>target</i>. <p>Returns true iff
608 * there is such
609 * a match. <p>Behaves as if written: <pre class="prettyprint">
610 * boolean skipTo(int target) {
611 * do {
612 * if (!next())
613 * return false;
614 * } while (target > doc());
615 * return true;
616 * }
Nils Diewaldf399a672013-11-18 17:55:22 +0000617 * </pre>
618 * Most implementations are considerably more efficient than that.
619 */
620 public boolean skipTo (int target) throws IOException {
Nils Diewald6802acd2014-03-18 18:29:30 +0000621
Nils Diewald83c9b162015-02-03 21:05:07 +0000622 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000623 log.trace("skipTo document {}/{} -> {}", this.embeddedDoc,
624 this.wrapDoc, target);
Nils Diewaldf399a672013-11-18 17:55:22 +0000625
Nils Diewald83c9b162015-02-03 21:05:07 +0000626 // Initialize spans
627 if (!this.init())
628 return false;
Nils Diewald82a4b862014-02-20 21:17:41 +0000629
Nils Diewaldcd226862015-02-11 22:27:45 +0000630 assert target > this.embeddedDoc;
Nils Diewald82a4b862014-02-20 21:17:41 +0000631
Nils Diewald83c9b162015-02-03 21:05:07 +0000632 // Only forward embedded spans
633 if (this.more && (this.embeddedDoc < target)) {
634 if (this.embeddedSpans.skipTo(target)) {
635 this.inSameDoc = false;
636 this.embeddedStart = -1;
637 this.embeddedEnd = -1;
638 this.embeddedPayload = null;
639 this.embeddedDoc = this.embeddedSpans.doc();
640 }
Nils Diewaldf399a672013-11-18 17:55:22 +0000641
Nils Diewald83c9b162015-02-03 21:05:07 +0000642 // Can't be skipped to target
643 else {
644 this.inSameDoc = false;
645 this.more = false;
646 return false;
647 };
648 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000649
Nils Diewald83c9b162015-02-03 21:05:07 +0000650 // Move to same doc
651 return this.toSameDoc();
Nils Diewaldf399a672013-11-18 17:55:22 +0000652 };
653
Nils Diewaldbb33da22015-03-04 16:24:25 +0000654
Nils Diewald6802acd2014-03-18 18:29:30 +0000655 private void nextSpanA () {
Nils Diewald83c9b162015-02-03 21:05:07 +0000656 if (DEBUG)
657 log.trace("Try wrap next time");
658 this.tryMatch = false;
659 this.nextSpanB = false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000660 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000661
Nils Diewaldbb33da22015-03-04 16:24:25 +0000662
Nils Diewald6802acd2014-03-18 18:29:30 +0000663 private void nextSpanB () {
Nils Diewald83c9b162015-02-03 21:05:07 +0000664 if (DEBUG)
665 log.trace("Try embedded next time");
666 this.nextSpanB = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000667 };
668
669
670 // Check if the current span constellation does match
671 // Store backtracking relevant data and say, how to proceed
Akronc3a5df82016-04-29 16:56:53 +0200672 private boolean doesMatch () throws IOException {
Nils Diewaldcd226862015-02-11 22:27:45 +0000673 if (DEBUG)
674 log.trace("In the match test branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000675
Nils Diewaldcd226862015-02-11 22:27:45 +0000676 if (this.wrapStart == -1)
677 this.wrapStart = this.wrapSpans.start();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000678
Nils Diewaldcd226862015-02-11 22:27:45 +0000679 if (this.embeddedStart == -1) {
680 this.embeddedStart = this.embeddedSpans.start();
Nils Diewaldcd226862015-02-11 22:27:45 +0000681 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000682
Nils Diewaldcd226862015-02-11 22:27:45 +0000683 this.wrapEnd = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000684
Nils Diewaldcd226862015-02-11 22:27:45 +0000685 // Shortcut to prevent lazyloading of .end()
Akrona7b936d2016-03-04 13:40:54 +0100686 // [---
687 // [---
Nils Diewaldcd226862015-02-11 22:27:45 +0000688 if (this.wrapStart > this.embeddedStart) {
689 // Can't match for in, rin, ew, sw, and m
690 // and will always lead to next_b
691 if (flag >= WITHIN) {
692 this.nextSpanB();
693 if (DEBUG)
694 _logCurrentCase((byte) 16);
695 return false;
696 };
697 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000698
Akrona7b936d2016-03-04 13:40:54 +0100699 // [---
700 // [---
Nils Diewaldcd226862015-02-11 22:27:45 +0000701 else if (this.wrapStart < this.embeddedStart) {
702 // Can't match for sw and m and will always
703 // lead to next_a
Akronc3a5df82016-04-29 16:56:53 +0200704
Nils Diewaldcd226862015-02-11 22:27:45 +0000705 if (flag >= STARTSWITH) {
Akronc3a5df82016-04-29 16:56:53 +0200706 if (DEBUG)
707 log.trace("Shortcut for lazy loading");
708
709 this.storeEmbedded();
Nils Diewaldcd226862015-02-11 22:27:45 +0000710 this.nextSpanA();
Akronc12567c2016-06-03 00:40:52 +0200711
Nils Diewaldcd226862015-02-11 22:27:45 +0000712 if (DEBUG)
713 _logCurrentCase((byte) 15);
714 return false;
715 };
716 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000717
Akron63cd32f2016-04-21 17:56:06 +0200718 if (this.embeddedEnd == -1) {
719 this.embeddedEnd = this.embeddedSpans.end();
720 };
721
Nils Diewaldcd226862015-02-11 22:27:45 +0000722 // Now check correctly
723 byte currentCase = this.withinCase();
Nils Diewald6802acd2014-03-18 18:29:30 +0000724
Nils Diewaldcd226862015-02-11 22:27:45 +0000725 if (DEBUG)
726 _logCurrentCase(currentCase);
Nils Diewald6802acd2014-03-18 18:29:30 +0000727
Nils Diewaldcd226862015-02-11 22:27:45 +0000728 boolean match = false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000729
Nils Diewaldcd226862015-02-11 22:27:45 +0000730 // Test case
731 if (currentCase >= (byte) 3 && currentCase <= (byte) 11) {
732 switch (flag) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000733
Nils Diewaldbb33da22015-03-04 16:24:25 +0000734 case WITHIN:
735 if (currentCase >= 6 && currentCase <= 10
736 && currentCase != 8)
737 match = true;
738 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000739
Nils Diewaldbb33da22015-03-04 16:24:25 +0000740 case REAL_WITHIN:
741 if (currentCase == 6 || currentCase == 9
742 || currentCase == 10)
743 match = true;
744 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000745
Nils Diewaldbb33da22015-03-04 16:24:25 +0000746 case MATCH:
747 if (currentCase == 7)
748 match = true;
749 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000750
Nils Diewaldbb33da22015-03-04 16:24:25 +0000751 case STARTSWITH:
752 if (currentCase == 7 || currentCase == 6)
753 match = true;
754 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000755
Nils Diewaldbb33da22015-03-04 16:24:25 +0000756 case ENDSWITH:
757 if (currentCase == 7 || currentCase == 10)
758 match = true;
759 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000760
Nils Diewaldbb33da22015-03-04 16:24:25 +0000761 case OVERLAP:
Nils Diewaldcd226862015-02-11 22:27:45 +0000762 match = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000763 break;
764
765 case REAL_OVERLAP:
766 if (currentCase == 3 || currentCase == 11)
767 match = true;
768 break;
Nils Diewaldcd226862015-02-11 22:27:45 +0000769 };
770 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000771
Nils Diewaldcd226862015-02-11 22:27:45 +0000772 try {
773 this.todo(currentCase);
774 }
775 catch (IOException e) {
776 return false;
777 }
778 return match;
Nils Diewald6802acd2014-03-18 18:29:30 +0000779 };
780
781
782 private void _logCurrentCase (byte currentCase) {
Nils Diewaldcd226862015-02-11 22:27:45 +0000783 log.trace("Current Case is {}", currentCase);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000784
Nils Diewaldcd226862015-02-11 22:27:45 +0000785 String _e = _currentEmbedded().toString();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000786
Nils Diewaldcd226862015-02-11 22:27:45 +0000787 log.trace(" |---| {}", _currentWrap().toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000788
Nils Diewaldcd226862015-02-11 22:27:45 +0000789 switch (currentCase) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000790 case 1:
791 log.trace("|-| {}", _e);
792 break;
793 case 2:
794 log.trace("|---| {}", _e);
795 break;
796 case 3:
797 log.trace(" |---| {}", _e);
798 break;
799 case 4:
800 log.trace(" |-----| {}", _e);
801 break;
802 case 5:
803 log.trace(" |-------| {}", _e);
804 break;
805 case 6:
806 log.trace(" |-| {}", _e);
807 break;
808 case 7:
809 log.trace(" |---| {}", _e);
810 break;
811 case 8:
812 log.trace(" |-----| {}", _e);
813 break;
814 case 9:
815 log.trace(" |-| {}", _e);
816 break;
817 case 10:
818 log.trace(" |-| {}", _e);
819 break;
820 case 11:
821 log.trace(" |---| {}", _e);
822 break;
823 case 12:
824 log.trace(" |-| {}", _e);
825 break;
826 case 13:
827 log.trace(" |-| {}", _e);
828 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000829
Nils Diewaldbb33da22015-03-04 16:24:25 +0000830 case 15:
831 // Fake case
832 log.trace(" |---? {}", _e);
833 break;
834
835 case 16:
836 // Fake case
837 log.trace(" |---? {}", _e);
838 break;
Nils Diewaldcd226862015-02-11 22:27:45 +0000839 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000840 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000841
Nils Diewald6802acd2014-03-18 18:29:30 +0000842
Nils Diewald41750bf2015-02-06 17:45:20 +0000843 private WithinSpan _currentWrap () {
844 WithinSpan _wrap = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000845 _wrap.start = this.wrapStart != -1 ? this.wrapStart : this.wrapSpans
846 .start();
847 _wrap.end = this.wrapEnd != -1 ? this.wrapEnd : this.wrapSpans.end();
848 _wrap.doc = this.wrapDoc != -1 ? this.wrapDoc : this.wrapSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000849 return _wrap;
Nils Diewald6802acd2014-03-18 18:29:30 +0000850 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000851
852
Nils Diewald41750bf2015-02-06 17:45:20 +0000853 private WithinSpan _currentEmbedded () {
854 WithinSpan _embedded = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000855 _embedded.start = this.embeddedStart != -1 ? this.embeddedStart
856 : this.embeddedSpans.start();
857 _embedded.end = this.embeddedEnd != -1 ? this.embeddedEnd
858 : this.embeddedSpans.end();
859 _embedded.doc = this.embeddedDoc != -1 ? this.embeddedDoc
860 : this.embeddedSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000861 return _embedded;
Nils Diewald6802acd2014-03-18 18:29:30 +0000862 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000863
Nils Diewald6802acd2014-03-18 18:29:30 +0000864
865 private void todo (byte currentCase) throws IOException {
Akronc3a5df82016-04-29 16:56:53 +0200866 if (DEBUG) {
867 log.trace("Check what to do next ...");
868 };
869
Nils Diewaldbb33da22015-03-04 16:24:25 +0000870 /*
871 Check what to do next with the spans.
872
873 The different follow up steps are:
874 - storeEmbedded -> store span B for later checks
875 - nextSpanA -> forward a
876 - nextSpanB -> forward b
Nils Diewald6802acd2014-03-18 18:29:30 +0000877
Nils Diewaldbb33da22015-03-04 16:24:25 +0000878 These rules were automatically generated
879 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000880
Nils Diewaldbb33da22015-03-04 16:24:25 +0000881 // Case 1, 2
882 if (currentCase <= (byte) 2) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000883 this.nextSpanB();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000884 }
885
886 // Case 12, 13
887 else if (currentCase >= (byte) 12) {
888 this.storeEmbedded();
Nils Diewald83c9b162015-02-03 21:05:07 +0000889 this.nextSpanA();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000890 }
891
892 // Case 3, 4, 5, 8
893 else if (currentCase <= (byte) 5 || currentCase == (byte) 8) {
894 if (flag <= 2)
895 this.storeEmbedded();
Nils Diewald83c9b162015-02-03 21:05:07 +0000896 this.nextSpanB();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000897 }
898
899 // Case 11
900 else if (currentCase == (byte) 11) {
901 if (this.flag == REAL_WITHIN) {
902 this.nextSpanB();
903 }
904 else if (this.flag >= STARTSWITH) {
Akronc3a5df82016-04-29 16:56:53 +0200905
906 // TODO: May need storeEmbedded
907
Nils Diewaldbb33da22015-03-04 16:24:25 +0000908 this.nextSpanA();
909 }
910 else {
911 this.storeEmbedded();
912 this.nextSpanB();
913 };
914 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000915
916
Nils Diewaldbb33da22015-03-04 16:24:25 +0000917 // Case 6, 7, 9, 10
918 else {
Nils Diewald6802acd2014-03-18 18:29:30 +0000919
Nils Diewaldbb33da22015-03-04 16:24:25 +0000920 if (
921 // Case 6
922 (currentCase == (byte) 6 && this.flag == MATCH) ||
Nils Diewald6802acd2014-03-18 18:29:30 +0000923
Nils Diewaldbb33da22015-03-04 16:24:25 +0000924 // Case 7
925 (currentCase == (byte) 7 && this.flag == REAL_WITHIN) ||
926
927 // Case 9, 10
928 (currentCase >= (byte) 9 && this.flag >= STARTSWITH)) {
929
Akronc3a5df82016-04-29 16:56:53 +0200930 // TODO: May need storeEmbedded
Nils Diewaldbb33da22015-03-04 16:24:25 +0000931 this.nextSpanA();
932 }
933 else {
934 this.storeEmbedded();
935 this.nextSpanB();
936 };
937 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000938 };
939
Nils Diewaldbb33da22015-03-04 16:24:25 +0000940
Nils Diewald83c9b162015-02-03 21:05:07 +0000941 // Store the current embedded span in the first spanStore
Nils Diewald6802acd2014-03-18 18:29:30 +0000942 private void storeEmbedded () throws IOException {
943
Nils Diewald83c9b162015-02-03 21:05:07 +0000944 // Create a current copy
Nils Diewald41750bf2015-02-06 17:45:20 +0000945 WithinSpan embedded = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000946 embedded.start = this.embeddedStart != -1 ? this.embeddedStart
947 : this.embeddedSpans.start();
948 embedded.end = this.embeddedEnd != -1 ? this.embeddedEnd
949 : this.embeddedSpans.end();
950 embedded.doc = this.embeddedDoc;
Nils Diewald6802acd2014-03-18 18:29:30 +0000951
Nils Diewald83c9b162015-02-03 21:05:07 +0000952 // Copy payloads
953 if (this.embeddedPayload != null) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000954 embedded.payload = new ArrayList<byte[]>(
955 this.embeddedPayload.size());
Nils Diewald83c9b162015-02-03 21:05:07 +0000956 embedded.payload.addAll(this.embeddedPayload);
957 }
958 else if (this.embeddedSpans.isPayloadAvailable()) {
959 embedded.payload = new ArrayList<byte[]>(3);
960 Collection<byte[]> payload = this.embeddedSpans.getPayload();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000961
Nils Diewald83c9b162015-02-03 21:05:07 +0000962 this.embeddedPayload = new ArrayList<byte[]>(payload.size());
963 this.embeddedPayload.addAll(payload);
964 embedded.payload.addAll(payload);
965 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000966
Nils Diewald83c9b162015-02-03 21:05:07 +0000967 this.spanStore1.add(embedded);
Nils Diewald6802acd2014-03-18 18:29:30 +0000968
Nils Diewald83c9b162015-02-03 21:05:07 +0000969 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000970 log.trace("Pushed to spanStore 1 {} (in storeEmbedded)",
971 embedded.toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000972 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000973
Nils Diewald6802acd2014-03-18 18:29:30 +0000974
975 // Return case number
976 private byte withinCase () {
977
Akron63cd32f2016-04-21 17:56:06 +0200978 if (DEBUG) {
Akron6759b042016-04-28 01:25:00 +0200979 log.trace(">>>>>>>>>>>>>> {}-{}|{}-{}", this.wrapStart,
980 this.wrapSpans.end(), this.embeddedStart,
981 this.embeddedSpans.end());
Akron63cd32f2016-04-21 17:56:06 +0200982 };
983
Nils Diewaldcd226862015-02-11 22:27:45 +0000984 // case 1-5
985 if (this.wrapStart > this.embeddedStart) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000986
Nils Diewaldcd226862015-02-11 22:27:45 +0000987 // Case 1
988 // |-|
989 // |-|
990 if (this.wrapStart > this.embeddedEnd) {
991 return (byte) 1;
992 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000993
Nils Diewaldcd226862015-02-11 22:27:45 +0000994 // Case 2
995 // |-|
996 // |-|
997 else if (this.wrapStart == this.embeddedEnd) {
998 return (byte) 2;
999 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001000
Nils Diewaldcd226862015-02-11 22:27:45 +00001001 // Load wrapEnd
1002 this.wrapEnd = this.wrapSpans.end();
Nils Diewaldbb33da22015-03-04 16:24:25 +00001003
Nils Diewaldcd226862015-02-11 22:27:45 +00001004 // Case 3
1005 // |---|
1006 // |---|
1007 if (this.wrapEnd > this.embeddedEnd) {
1008 return (byte) 3;
1009 }
Nils Diewald6802acd2014-03-18 18:29:30 +00001010
Nils Diewaldcd226862015-02-11 22:27:45 +00001011 // Case 4
1012 // |-|
1013 // |---|
1014 else if (this.wrapEnd == this.embeddedEnd) {
1015 return (byte) 4;
1016 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001017
Nils Diewaldcd226862015-02-11 22:27:45 +00001018 // Case 5
1019 // |-|
1020 // |---|
1021 return (byte) 5;
1022 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001023
Nils Diewaldcd226862015-02-11 22:27:45 +00001024 // case 6-8
1025 else if (this.wrapStart == this.embeddedStart) {
Nils Diewald6802acd2014-03-18 18:29:30 +00001026
Nils Diewaldcd226862015-02-11 22:27:45 +00001027 // Load wrapEnd
1028 this.wrapEnd = this.wrapSpans.end();
Akrona7b936d2016-03-04 13:40:54 +01001029 // this.embeddedEnd = this.embeddedSpans.end();
Nils Diewald6802acd2014-03-18 18:29:30 +00001030
Nils Diewaldcd226862015-02-11 22:27:45 +00001031 // Case 6
1032 // |---|
1033 // |-|
1034 if (this.wrapEnd > this.embeddedEnd) {
1035 return (byte) 6;
1036 }
Nils Diewald6802acd2014-03-18 18:29:30 +00001037
Nils Diewaldcd226862015-02-11 22:27:45 +00001038 // Case 7
1039 // |---|
1040 // |---|
1041 else if (this.wrapEnd == this.embeddedEnd) {
1042 return (byte) 7;
1043 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001044
Nils Diewaldcd226862015-02-11 22:27:45 +00001045 // Case 8
1046 // |-|
1047 // |---|
1048 return (byte) 8;
1049 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001050
Nils Diewaldcd226862015-02-11 22:27:45 +00001051 // wrapStart < embeddedStart
Nils Diewaldf399a672013-11-18 17:55:22 +00001052
Nils Diewaldcd226862015-02-11 22:27:45 +00001053 // Load wrapEnd
1054 this.wrapEnd = this.wrapSpans.end();
Nils Diewald6802acd2014-03-18 18:29:30 +00001055
Nils Diewaldcd226862015-02-11 22:27:45 +00001056 // Case 13
1057 // |-|
1058 // |-|
1059 if (this.wrapEnd < this.embeddedStart) {
1060 return (byte) 13;
1061 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001062
Nils Diewaldcd226862015-02-11 22:27:45 +00001063 // Case 9
1064 // |---|
1065 // |-|
1066 else if (this.wrapEnd > this.embeddedEnd) {
1067 return (byte) 9;
1068 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001069
Nils Diewaldcd226862015-02-11 22:27:45 +00001070 // Case 10
1071 // |---|
1072 // |-|
1073 else if (this.wrapEnd == this.embeddedEnd) {
1074 return (byte) 10;
1075 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001076
Nils Diewaldcd226862015-02-11 22:27:45 +00001077 // Case 11
1078 // |---|
1079 // |---|
1080 else if (this.wrapEnd > this.embeddedStart) {
1081 return (byte) 11;
1082 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001083
Nils Diewaldcd226862015-02-11 22:27:45 +00001084 // case 12
1085 // |-|
1086 // |-|
1087 return (byte) 12;
Nils Diewaldbb33da22015-03-04 16:24:25 +00001088 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001089
1090
Nils Diewaldbb33da22015-03-04 16:24:25 +00001091 /**
1092 * Returns the document number of the current match. Initially
1093 * invalid.
1094 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001095 @Override
1096 public int doc () {
Nils Diewaldcd226862015-02-11 22:27:45 +00001097 return matchDoc;
Nils Diewaldf399a672013-11-18 17:55:22 +00001098 };
1099
Nils Diewaldbb33da22015-03-04 16:24:25 +00001100
1101 /**
1102 * Returns the start position of the embedding wrap. Initially
1103 * invalid.
1104 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001105 @Override
1106 public int start () {
Nils Diewaldcd226862015-02-11 22:27:45 +00001107 return matchStart;
Nils Diewaldf399a672013-11-18 17:55:22 +00001108 };
1109
Nils Diewaldbb33da22015-03-04 16:24:25 +00001110
1111 /**
1112 * Returns the end position of the embedding wrap. Initially
1113 * invalid.
1114 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001115 @Override
1116 public int end () {
Nils Diewaldbb33da22015-03-04 16:24:25 +00001117 return matchEnd;
Nils Diewaldf399a672013-11-18 17:55:22 +00001118 };
1119
Nils Diewaldbb33da22015-03-04 16:24:25 +00001120
Nils Diewaldf399a672013-11-18 17:55:22 +00001121 /**
1122 * Returns the payload data for the current span.
1123 * This is invalid until {@link #next()} is called for
1124 * the first time.
1125 * This method must not be called more than once after each call
1126 * of {@link #next()}. However, most payloads are loaded lazily,
1127 * so if the payload data for the current position is not needed,
Nils Diewaldbb33da22015-03-04 16:24:25 +00001128 * this method may not be called at all for performance reasons.
1129 * An ordered
1130 * SpanQuery does not lazy load, so if you have payloads in your
1131 * index and
1132 * you do not want ordered SpanNearQuerys to collect payloads, you
1133 * can
Nils Diewalde0725012014-09-25 19:32:52 +00001134 * disable collection with a constructor option.<br>
Nils Diewaldf399a672013-11-18 17:55:22 +00001135 * <br>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001136 * Note that the return type is a collection, thus the ordering
1137 * should not be relied upon.
Nils Diewaldf399a672013-11-18 17:55:22 +00001138 * <br/>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001139 *
Nils Diewaldf399a672013-11-18 17:55:22 +00001140 * @lucene.experimental
Nils Diewaldbb33da22015-03-04 16:24:25 +00001141 *
1142 * @return a List of byte arrays containing the data of this
1143 * payload, otherwise null if isPayloadAvailable is false
1144 * @throws IOException
1145 * if there is a low-level I/O error
Nils Diewaldf399a672013-11-18 17:55:22 +00001146 */
1147 // public abstract Collection<byte[]> getPayload() throws IOException;
1148 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001149 public Collection<byte[]> getPayload () throws IOException {
1150 return matchPayload;
Nils Diewaldf399a672013-11-18 17:55:22 +00001151 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001152
Nils Diewaldf399a672013-11-18 17:55:22 +00001153
1154 /**
1155 * Checks if a payload can be loaded at this position.
1156 * <p/>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001157 * Payloads can only be loaded once per call to {@link #next()}.
1158 *
1159 * @return true if there is a payload available at this position
1160 * that can be loaded
Nils Diewaldf399a672013-11-18 17:55:22 +00001161 */
1162 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001163 public boolean isPayloadAvailable () {
1164 return matchPayload.isEmpty() == false;
Nils Diewaldf399a672013-11-18 17:55:22 +00001165 };
1166
Nils Diewaldbb33da22015-03-04 16:24:25 +00001167
Nils Diewaldf399a672013-11-18 17:55:22 +00001168 // Todo: This may be in the wrong version
1169 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001170 public long cost () {
1171 return wrapSpans.cost() + embeddedSpans.cost();
Nils Diewaldf399a672013-11-18 17:55:22 +00001172 };
1173
Nils Diewaldbb33da22015-03-04 16:24:25 +00001174
Nils Diewaldf399a672013-11-18 17:55:22 +00001175 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001176 public String toString () {
1177 return getClass().getName()
1178 + "("
1179 + query.toString()
1180 + ")@"
1181 + (embeddedDoc <= 0 ? "START" : (more ? (doc() + ":" + start()
1182 + "-" + end()) : "END"));
Nils Diewaldf399a672013-11-18 17:55:22 +00001183 };
Nils Diewald41750bf2015-02-06 17:45:20 +00001184
1185
1186 // This was formerly the default candidate span class,
1187 // before it was refactored out
Nils Diewaldbb33da22015-03-04 16:24:25 +00001188 private class WithinSpan implements Comparable<WithinSpan>, Cloneable {
1189 public int start = -1, end = -1, doc = -1;
Nils Diewald41750bf2015-02-06 17:45:20 +00001190
1191 public Collection<byte[]> payload;
1192
1193 public short elementRef = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +00001194
Akron6759b042016-04-28 01:25:00 +02001195
Nils Diewald41750bf2015-02-06 17:45:20 +00001196 public void clear () {
1197 this.start = -1;
1198 this.end = -1;
1199 this.doc = -1;
1200 clearPayload();
1201 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001202
1203
Nils Diewald41750bf2015-02-06 17:45:20 +00001204 @Override
1205 public int compareTo (WithinSpan o) {
1206 /* optimizable for short numbers to return o.end - this.end */
1207 if (this.doc < o.doc) {
1208 return -1;
1209 }
1210 else if (this.doc == o.doc) {
1211 if (this.start < o.start) {
1212 return -1;
1213 }
1214 else if (this.start == o.start) {
1215 if (this.end < o.end)
1216 return -1;
1217 };
1218 };
1219 return 1;
1220 };
1221
Nils Diewaldbb33da22015-03-04 16:24:25 +00001222
1223 public short getElementRef () {
Nils Diewald41750bf2015-02-06 17:45:20 +00001224 return elementRef;
1225 }
1226
Nils Diewaldbb33da22015-03-04 16:24:25 +00001227
1228 public void setElementRef (short elementRef) {
Nils Diewald41750bf2015-02-06 17:45:20 +00001229 this.elementRef = elementRef;
1230 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001231
1232
Nils Diewald41750bf2015-02-06 17:45:20 +00001233 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001234 public Object clone () {
Nils Diewald41750bf2015-02-06 17:45:20 +00001235 WithinSpan span = new WithinSpan();
1236 span.start = this.start;
1237 span.end = this.end;
1238 span.doc = this.doc;
1239 span.payload.addAll(this.payload);
1240 return span;
1241 };
1242
Nils Diewaldbb33da22015-03-04 16:24:25 +00001243
Nils Diewald41750bf2015-02-06 17:45:20 +00001244 public WithinSpan copyFrom (WithinSpan o) {
1245 this.start = o.start;
1246 this.end = o.end;
1247 this.doc = o.doc;
1248 // this.clearPayload();
1249 this.payload.addAll(o.payload);
1250 return this;
1251 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001252
1253
Nils Diewald41750bf2015-02-06 17:45:20 +00001254 public void clearPayload () {
1255 if (this.payload != null)
1256 this.payload.clear();
1257 };
1258
Nils Diewaldbb33da22015-03-04 16:24:25 +00001259
Nils Diewald41750bf2015-02-06 17:45:20 +00001260 public String toString () {
1261 StringBuilder sb = new StringBuilder("[");
Nils Diewaldbb33da22015-03-04 16:24:25 +00001262 return sb.append(this.start).append('-').append(this.end)
1263 .append('(').append(this.doc).append(')').append(']')
1264 .toString();
Nils Diewald41750bf2015-02-06 17:45:20 +00001265 };
1266 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001267};