blob: 2afcc4218f492f5c238d3d6a869c28202521c9f4 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap.query.spans;
2
Eliza Margaretha22898982014-11-04 17:10:21 +00003import java.io.IOException;
4import java.util.ArrayList;
5import java.util.Collection;
6import java.util.LinkedList;
7import java.util.Map;
8
Akron700c1eb2015-09-25 16:57:30 +02009import org.apache.lucene.index.LeafReaderContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000010import org.apache.lucene.index.Term;
11import org.apache.lucene.index.TermContext;
Nils Diewaldf399a672013-11-18 17:55:22 +000012import org.apache.lucene.search.DocIdSetIterator;
Eliza Margaretha22898982014-11-04 17:10:21 +000013import org.apache.lucene.search.spans.Spans;
14import org.apache.lucene.util.Bits;
Nils Diewaldf399a672013-11-18 17:55:22 +000015import org.slf4j.Logger;
16import org.slf4j.LoggerFactory;
17
Eliza Margaretha22898982014-11-04 17:10:21 +000018import de.ids_mannheim.korap.query.SpanWithinQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +000019
Nils Diewald83c9b162015-02-03 21:05:07 +000020
21/**
22 * SpanWithinQuery is DEPRECATED and will
23 * be replaced by SpanPositionQuery in the near future
Nils Diewaldcb8afb32015-02-04 21:12:37 +000024 *
25 * TODO: Support exclusivity
26 * TODO: Use the term "queue" and implement it similar to SpanOrQuery
Nils Diewald83c9b162015-02-03 21:05:07 +000027 */
28
Nils Diewald82a4b862014-02-20 21:17:41 +000029/**
Nils Diewald1455e1e2014-08-01 16:12:43 +000030 * Compare two spans and check how they relate positionally.
Nils Diewaldbb33da22015-03-04 16:24:25 +000031 *
Nils Diewald44d5fa12015-01-15 21:31:52 +000032 * @author diewald
Nils Diewald82a4b862014-02-20 21:17:41 +000033 */
Nils Diewaldf399a672013-11-18 17:55:22 +000034public class WithinSpans extends Spans {
Nils Diewald82a4b862014-02-20 21:17:41 +000035
Nils Diewald6802acd2014-03-18 18:29:30 +000036 // Logger
37 private final Logger log = LoggerFactory.getLogger(WithinSpans.class);
Nils Diewald1455e1e2014-08-01 16:12:43 +000038
Nils Diewald82a4b862014-02-20 21:17:41 +000039 // This advices the java compiler to ignore all loggings
40 public static final boolean DEBUG = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +000041
Nils Diewaldf399a672013-11-18 17:55:22 +000042 private boolean more = false;
43
Nils Diewald6802acd2014-03-18 18:29:30 +000044 // Boolean value indicating if span B
45 // should be forwarded next (true)
46 // or span A (false);
47 boolean nextSpanB = true;
Nils Diewald82a4b862014-02-20 21:17:41 +000048
Nils Diewaldbb33da22015-03-04 16:24:25 +000049 private int wrapStart = -1, wrapEnd = -1, embeddedStart = -1,
50 embeddedEnd = -1, wrapDoc = -1, embeddedDoc = -1, matchDoc = -1,
51 matchStart = -1, matchEnd = -1;
52
Nils Diewald6802acd2014-03-18 18:29:30 +000053 private Collection<byte[]> matchPayload;
Nils Diewaldf399a672013-11-18 17:55:22 +000054 private Collection<byte[]> embeddedPayload;
Nils Diewaldbb33da22015-03-04 16:24:25 +000055
Nils Diewald6802acd2014-03-18 18:29:30 +000056 // Indicates that the wrap and the embedded spans are in the same doc
57 private boolean inSameDoc = false;
Nils Diewaldf399a672013-11-18 17:55:22 +000058
Nils Diewald6802acd2014-03-18 18:29:30 +000059 /*
60 Supported flags are currently:
Nils Diewaldcb8afb32015-02-04 21:12:37 +000061 ov -> 0 | overlap: A & B != empty
62 rov -> 2 | real overlap: A & B != empty and
63 ((A | B) != A or
Nils Diewald6802acd2014-03-18 18:29:30 +000064 (A | B) != B)
Nils Diewaldcb8afb32015-02-04 21:12:37 +000065 in -> 4 | within: A | B = A
66 rin -> 6 | real within: A | B = A and
67 A & B != A
68 ew -> 8 | endswith: A | B = A and
69 A.start = B.start
70 sw -> 10 | startswith: A | B = A and
71 A.end = B.end
72 m -> 12 | A = B
73 */
Nils Diewaldbb33da22015-03-04 16:24:25 +000074 public static final byte OVERLAP = (byte) 0, REAL_OVERLAP = (byte) 2,
75 WITHIN = (byte) 4, REAL_WITHIN = (byte) 6, ENDSWITH = (byte) 8,
76 STARTSWITH = (byte) 10, MATCH = (byte) 12;
Nils Diewaldf399a672013-11-18 17:55:22 +000077
Nils Diewald6802acd2014-03-18 18:29:30 +000078 private byte flag;
Nils Diewaldf399a672013-11-18 17:55:22 +000079
Nils Diewald1455e1e2014-08-01 16:12:43 +000080 // Contains the query
Nils Diewaldf399a672013-11-18 17:55:22 +000081 private SpanWithinQuery query;
82
Nils Diewaldcb8afb32015-02-04 21:12:37 +000083 // Representing the first operand
84 private final Spans wrapSpans;
Nils Diewaldf399a672013-11-18 17:55:22 +000085
Nils Diewaldcb8afb32015-02-04 21:12:37 +000086 // Representing the second operand
87 private final Spans embeddedSpans;
88
89 // Check flag if the current constellation
90 // was checked yet
Nils Diewald6802acd2014-03-18 18:29:30 +000091 private boolean tryMatch = true;
Nils Diewaldf399a672013-11-18 17:55:22 +000092
Nils Diewald01ff7af2015-02-04 22:54:26 +000093 // Two buffers for storing candidates
Nils Diewaldbb33da22015-03-04 16:24:25 +000094 private LinkedList<WithinSpan> spanStore1, spanStore2;
95
Nils Diewaldf399a672013-11-18 17:55:22 +000096
Nils Diewald01ff7af2015-02-04 22:54:26 +000097 /**
98 * Construct a new WithinSpans object.
Nils Diewaldbb33da22015-03-04 16:24:25 +000099 *
100 * @param spanWithinQuery
101 * The parental {@link SpanWithinQuery}.
102 * @param context
Akron700c1eb2015-09-25 16:57:30 +0200103 * The {@link LeafReaderContext}.
Nils Diewaldbb33da22015-03-04 16:24:25 +0000104 * @param acceptDocs
105 * Bit vector representing the documents
106 * to be searched in.
107 * @param termContexts
108 * A map managing {@link TermState TermStates}.
109 * @param flag
110 * A byte flag indicating the positional condition of
111 * the sub spans.
Nils Diewald01ff7af2015-02-04 22:54:26 +0000112 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000113 public WithinSpans (SpanWithinQuery spanWithinQuery,
Akron700c1eb2015-09-25 16:57:30 +0200114 LeafReaderContext context, Bits acceptDocs,
Nils Diewaldbb33da22015-03-04 16:24:25 +0000115 Map<Term, TermContext> termContexts, byte flag)
116 throws IOException {
Nils Diewaldf399a672013-11-18 17:55:22 +0000117
Nils Diewald83c9b162015-02-03 21:05:07 +0000118 if (DEBUG)
119 log.trace("Construct WithinSpans");
Nils Diewaldf399a672013-11-18 17:55:22 +0000120
Nils Diewald83c9b162015-02-03 21:05:07 +0000121 // Init copies
122 this.matchPayload = new LinkedList<byte[]>();
Nils Diewaldf399a672013-11-18 17:55:22 +0000123
Nils Diewald83c9b162015-02-03 21:05:07 +0000124 // Get spans
Nils Diewaldbb33da22015-03-04 16:24:25 +0000125 this.wrapSpans = spanWithinQuery.wrap().getSpans(context, acceptDocs,
126 termContexts);
127 this.embeddedSpans = spanWithinQuery.embedded().getSpans(context,
128 acceptDocs, termContexts);
Nils Diewaldf399a672013-11-18 17:55:22 +0000129
Nils Diewald83c9b162015-02-03 21:05:07 +0000130 this.flag = flag;
Nils Diewaldf399a672013-11-18 17:55:22 +0000131
Nils Diewald83c9b162015-02-03 21:05:07 +0000132 // SpanStores for backtracking
Nils Diewald41750bf2015-02-06 17:45:20 +0000133 this.spanStore1 = new LinkedList<WithinSpan>();
134 this.spanStore2 = new LinkedList<WithinSpan>();
Nils Diewald6802acd2014-03-18 18:29:30 +0000135
Nils Diewald83c9b162015-02-03 21:05:07 +0000136 // kept for toString() only.
137 this.query = spanWithinQuery;
Nils Diewaldf399a672013-11-18 17:55:22 +0000138 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000139
Nils Diewaldf399a672013-11-18 17:55:22 +0000140
Nils Diewald6802acd2014-03-18 18:29:30 +0000141 // Move to next match, returning true iff any such exists.
Nils Diewaldf399a672013-11-18 17:55:22 +0000142 @Override
143 public boolean next () throws IOException {
Nils Diewald6802acd2014-03-18 18:29:30 +0000144
Nils Diewald83c9b162015-02-03 21:05:07 +0000145 if (DEBUG)
146 log.trace("Next with docs {}, {}", wrapDoc, embeddedDoc);
Nils Diewaldf399a672013-11-18 17:55:22 +0000147
Nils Diewald83c9b162015-02-03 21:05:07 +0000148 // Initialize spans
149 if (!this.init()) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000150 this.more = false;
151 this.inSameDoc = false;
152 this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000153 this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000154 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000155 return false;
156 };
Nils Diewald82a4b862014-02-20 21:17:41 +0000157
Nils Diewald83c9b162015-02-03 21:05:07 +0000158 // There are more spans and they are in the same document
Nils Diewald6802acd2014-03-18 18:29:30 +0000159
Nils Diewald83c9b162015-02-03 21:05:07 +0000160 while (this.more && (wrapDoc == embeddedDoc ||
Nils Diewaldbb33da22015-03-04 16:24:25 +0000161 // this.inSameDoc ||
162 this.toSameDoc())) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000163 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000164 log.trace("We are in the same doc: {}, {}", wrapDoc,
165 embeddedDoc);
Nils Diewald6802acd2014-03-18 18:29:30 +0000166
Nils Diewald83c9b162015-02-03 21:05:07 +0000167 // Both spans match according to the flag
168 // Silently the next operations are prepared
169 if (this.tryMatch && this.doesMatch()) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000170
Nils Diewald83c9b162015-02-03 21:05:07 +0000171 if (this.wrapEnd == -1)
172 this.wrapEnd = this.wrapSpans.end();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000173
174 this.matchStart = embeddedStart < wrapStart ? embeddedStart
175 : wrapStart;
176 this.matchEnd = embeddedEnd > wrapEnd ? embeddedEnd : wrapEnd;
177 this.matchDoc = embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000178 this.matchPayload.clear();
Nils Diewald82a4b862014-02-20 21:17:41 +0000179
Nils Diewald83c9b162015-02-03 21:05:07 +0000180 if (this.embeddedPayload != null)
181 matchPayload.addAll(embeddedPayload);
Nils Diewaldf399a672013-11-18 17:55:22 +0000182
Nils Diewald83c9b162015-02-03 21:05:07 +0000183 if (this.wrapSpans.isPayloadAvailable())
184 this.matchPayload.addAll(wrapSpans.getPayload());
Nils Diewaldf399a672013-11-18 17:55:22 +0000185
Nils Diewald83c9b162015-02-03 21:05:07 +0000186 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000187 log.trace(" ---- MATCH ---- {}-{} ({})", matchStart,
188 matchEnd, matchDoc);
Nils Diewald6802acd2014-03-18 18:29:30 +0000189
Nils Diewald83c9b162015-02-03 21:05:07 +0000190 this.tryMatch = false;
191 return true;
192 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000193
Nils Diewald83c9b162015-02-03 21:05:07 +0000194 // Get next embedded
195 else if (this.nextSpanB) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000196
Nils Diewald83c9b162015-02-03 21:05:07 +0000197 // Next time try the match
198 this.tryMatch = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000199
Nils Diewald83c9b162015-02-03 21:05:07 +0000200 if (DEBUG)
201 log.trace("In the next embedded branch");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000202
Nils Diewald41750bf2015-02-06 17:45:20 +0000203 WithinSpan current = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000204
Nils Diewald83c9b162015-02-03 21:05:07 +0000205 // New - fetch until theres a span in the correct doc or bigger
206 while (!this.spanStore2.isEmpty()) {
207 current = spanStore2.removeFirst();
208 if (current.doc >= this.wrapDoc)
209 break;
210 };
211
212
213 // There is nothing in the second store
214 if (current == null) {
215 if (DEBUG)
216 log.trace("SpanStore 2 is empty");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000217
Nils Diewald83c9b162015-02-03 21:05:07 +0000218 // Forward with embedding
219 if (!this.embeddedSpans.next()) {
220 this.nextSpanA();
221 continue;
222 }
223
224 else if (DEBUG) {
225 log.trace("Fetch next embedded span");
226 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000227
Nils Diewaldcd226862015-02-11 22:27:45 +0000228 this.embeddedStart = this.embeddedSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000229 this.embeddedEnd = -1;
230 this.embeddedPayload = null;
231 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000232
Nils Diewald83c9b162015-02-03 21:05:07 +0000233 if (this.embeddedDoc != this.wrapDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000234
Nils Diewald83c9b162015-02-03 21:05:07 +0000235 if (DEBUG) {
236 log.trace("Embedded span is in a new document {}",
Nils Diewaldbb33da22015-03-04 16:24:25 +0000237 _currentEmbedded().toString());
Nils Diewald83c9b162015-02-03 21:05:07 +0000238 log.trace("Reset current embedded doc");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000239 };
240
Nils Diewald83c9b162015-02-03 21:05:07 +0000241 /*
242 if (DEBUG)
243 log.trace("Clear all span stores");
244 this.spanStore1.clear();
245 this.spanStore2.clear();
246 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000247
Nils Diewald83c9b162015-02-03 21:05:07 +0000248 this.storeEmbedded();
249
250 // That is necessary to backtrack to the last document!
251 this.inSameDoc = true;
252 this.embeddedDoc = wrapDoc;
253 // this.tryMatch = false; // already covered in nextSpanA
254
255 this.nextSpanA();
256 continue;
257 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000258
Nils Diewald83c9b162015-02-03 21:05:07 +0000259 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000260 log.trace(" Forward embedded span to {}",
261 _currentEmbedded().toString());
262
Nils Diewald83c9b162015-02-03 21:05:07 +0000263 if (this.embeddedDoc != this.wrapDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000264
Nils Diewald83c9b162015-02-03 21:05:07 +0000265 // Is this always a good idea?
266 /*
267 this.spanStore1.clear();
268 this.spanStore2.clear();
269 */
270
271 this.embeddedStart = -1;
272 this.embeddedEnd = -1;
273 this.embeddedPayload = null;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000274
Nils Diewald83c9b162015-02-03 21:05:07 +0000275 if (!this.toSameDoc()) {
276 this.more = false;
277 this.inSameDoc = false;
278 return false;
279 };
280 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000281
Nils Diewald83c9b162015-02-03 21:05:07 +0000282 this.more = true;
283 this.inSameDoc = true;
284 this.tryMatch = true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000285
Nils Diewald83c9b162015-02-03 21:05:07 +0000286 this.nextSpanB();
287 continue;
288 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000289
Nils Diewald83c9b162015-02-03 21:05:07 +0000290 // Fetch from second store?
291 else {
292 /** TODO: Change this to a single embedded object! */
293 this.embeddedStart = current.start;
294 this.embeddedEnd = current.end;
295 this.embeddedDoc = current.doc;
Nils Diewald6802acd2014-03-18 18:29:30 +0000296
Nils Diewald83c9b162015-02-03 21:05:07 +0000297 if (current.payload != null) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000298 this.embeddedPayload = new ArrayList<byte[]>(
299 current.payload.size());
Nils Diewald83c9b162015-02-03 21:05:07 +0000300 this.embeddedPayload.addAll(current.payload);
301 }
302 else {
303 this.embeddedPayload = null;
304 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000305
Nils Diewald83c9b162015-02-03 21:05:07 +0000306 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000307 log.trace("Fetch current from SpanStore 2: {}",
308 current.toString());
309
Nils Diewald83c9b162015-02-03 21:05:07 +0000310 this.tryMatch = true;
311 };
312 continue;
313 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000314
Nils Diewald83c9b162015-02-03 21:05:07 +0000315 // get next wrap
316 if (DEBUG)
317 log.trace("In the next wrap branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000318
Nils Diewald83c9b162015-02-03 21:05:07 +0000319 this.tryMatch = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000320
Nils Diewald83c9b162015-02-03 21:05:07 +0000321 if (DEBUG)
322 log.trace("Try next wrap");
Nils Diewald6802acd2014-03-18 18:29:30 +0000323
Nils Diewald83c9b162015-02-03 21:05:07 +0000324 // shift the stored spans
325 if (!this.spanStore1.isEmpty()) {
326 if (DEBUG) {
327 log.trace("Move everything from SpanStore 1 to SpanStore 2:");
Nils Diewald41750bf2015-02-06 17:45:20 +0000328 for (WithinSpan i : this.spanStore1) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000329 log.trace(" | {}", i.toString());
330 };
331 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000332
Nils Diewald83c9b162015-02-03 21:05:07 +0000333 // Move everything to spanStore2
Nils Diewaldbb33da22015-03-04 16:24:25 +0000334 this.spanStore2.addAll(0,
335 (LinkedList<WithinSpan>) this.spanStore1.clone());
Nils Diewald83c9b162015-02-03 21:05:07 +0000336 this.spanStore1.clear();
Nils Diewald6802acd2014-03-18 18:29:30 +0000337
Nils Diewald83c9b162015-02-03 21:05:07 +0000338 if (DEBUG) {
339 log.trace("SpanStore 2 now is:");
Nils Diewald41750bf2015-02-06 17:45:20 +0000340 for (WithinSpan i : this.spanStore2) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000341 log.trace(" | {}", i.toString());
342 };
343 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000344
Nils Diewald83c9b162015-02-03 21:05:07 +0000345 }
346 else if (DEBUG) {
347 log.trace("spanStore 1 is empty");
348 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000349
Nils Diewald83c9b162015-02-03 21:05:07 +0000350 // Get next wrap
351 if (this.wrapSpans.next()) {
Nils Diewald82a4b862014-02-20 21:17:41 +0000352
Nils Diewald83c9b162015-02-03 21:05:07 +0000353 // Reset wrapping information
Nils Diewaldcd226862015-02-11 22:27:45 +0000354 this.wrapStart = this.wrapSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000355 this.wrapEnd = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000356
Nils Diewald83c9b162015-02-03 21:05:07 +0000357 // Retrieve doc information
358 this.wrapDoc = this.wrapSpans.doc();
Nils Diewald20607ab2014-03-20 23:28:36 +0000359
Nils Diewald83c9b162015-02-03 21:05:07 +0000360 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000361 log.trace(" Forward wrap span to {}", _currentWrap()
362 .toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000363
Nils Diewald83c9b162015-02-03 21:05:07 +0000364 if (this.embeddedDoc != this.wrapDoc) {
365 if (DEBUG)
366 log.trace("Delete all span stores");
367 this.spanStore1.clear();
368 this.spanStore2.clear();
Nils Diewald6802acd2014-03-18 18:29:30 +0000369
Nils Diewald83c9b162015-02-03 21:05:07 +0000370 // Reset embedded:
371 this.embeddedStart = -1;
372 this.embeddedEnd = -1;
373 this.embeddedPayload = null;
Nils Diewald6802acd2014-03-18 18:29:30 +0000374
Nils Diewald83c9b162015-02-03 21:05:07 +0000375 if (!this.toSameDoc()) {
376 this.inSameDoc = false;
377 this.more = false;
378 return false;
379 };
380 }
381 else {
382 this.inSameDoc = true;
383 // Do not match with the current state
384 this.tryMatch = false;
385 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000386
Nils Diewald83c9b162015-02-03 21:05:07 +0000387 this.nextSpanB();
388 continue;
389 }
390 this.more = false;
391 this.inSameDoc = false;
392 this.spanStore1.clear();
393 this.spanStore2.clear();
394 return false;
395 };
Nils Diewald82a4b862014-02-20 21:17:41 +0000396
Nils Diewald83c9b162015-02-03 21:05:07 +0000397 // No more matches
398 return false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000399 };
400
401
402 /**
403 * Skip to the next document
404 */
405 private boolean toSameDoc () throws IOException {
Nils Diewald82a4b862014-02-20 21:17:41 +0000406
Nils Diewald83c9b162015-02-03 21:05:07 +0000407 if (DEBUG)
408 log.trace("Forward to find same docs");
Nils Diewald6802acd2014-03-18 18:29:30 +0000409
Nils Diewald83c9b162015-02-03 21:05:07 +0000410 /*
411 if (this.embeddedSpans == null) {
412 this.more = false;
413 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
414 this.inSameDoc = false;
415 return false;
416 };
417 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000418
Nils Diewald83c9b162015-02-03 21:05:07 +0000419 this.more = true;
420 this.inSameDoc = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000421
Nils Diewaldbb33da22015-03-04 16:24:25 +0000422 this.wrapDoc = this.wrapSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000423 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000424
Nils Diewald83c9b162015-02-03 21:05:07 +0000425 // Clear all spanStores
426 if (this.wrapDoc != this.embeddedDoc) {
427 /*
428 if (DEBUG)
429 log.trace("Clear all spanStores when moving forward");
430 // Why??
431 this.spanStore1.clear();
432 this.spanStore2.clear();
433 */
434 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000435
Nils Diewald83c9b162015-02-03 21:05:07 +0000436 // Last doc was reached
437 else if (this.wrapDoc == DocIdSetIterator.NO_MORE_DOCS) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000438 this.more = false;
439 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000440 this.inSameDoc = false;
441 return false;
442 }
443 else {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000444 if (DEBUG) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000445 log.trace("Current position already is in the same doc");
446 log.trace("Embedded: {}", _currentEmbedded().toString());
447 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000448 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000449 return true;
450 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000451
Nils Diewaldcd226862015-02-11 22:27:45 +0000452
Nils Diewald83c9b162015-02-03 21:05:07 +0000453 // Forward till match
454 while (this.wrapDoc != this.embeddedDoc) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000455
Nils Diewald83c9b162015-02-03 21:05:07 +0000456 // Forward wrapInfo
457 if (this.wrapDoc < this.embeddedDoc) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000458
Nils Diewald83c9b162015-02-03 21:05:07 +0000459 // Set document information
460 if (!wrapSpans.skipTo(this.embeddedDoc)) {
461 this.more = false;
462 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000463 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000464 return false;
465 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000466
Nils Diewald83c9b162015-02-03 21:05:07 +0000467 if (DEBUG)
468 log.trace("Skip wrap to doc {}", this.embeddedDoc);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000469
Nils Diewald83c9b162015-02-03 21:05:07 +0000470 this.wrapDoc = this.wrapSpans.doc();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000471
Nils Diewald83c9b162015-02-03 21:05:07 +0000472 if (wrapDoc == DocIdSetIterator.NO_MORE_DOCS) {
473 this.more = false;
474 this.inSameDoc = false;
475 this.embeddedDoc = DocIdSetIterator.NO_MORE_DOCS;
476 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
477 return false;
478 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000479
480 /*
481 Remove stored information
482 */
483 if (DEBUG)
484 log.trace("Delete all span stores");
485
486 this.spanStore1.clear();
487 this.spanStore2.clear();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000488
Nils Diewaldcd226862015-02-11 22:27:45 +0000489 if (wrapDoc == embeddedDoc) {
490 this.wrapStart = this.wrapSpans.start();
491 this.embeddedStart = this.embeddedSpans.start();
492 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000493 return true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000494 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000495
Nils Diewaldcd226862015-02-11 22:27:45 +0000496 this.wrapStart = -1;
497 this.embeddedStart = -1;
Nils Diewald83c9b162015-02-03 21:05:07 +0000498 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000499
Nils Diewald83c9b162015-02-03 21:05:07 +0000500 // Forward embedInfo
501 else if (this.wrapDoc > this.embeddedDoc) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000502
Nils Diewald83c9b162015-02-03 21:05:07 +0000503 // Set document information
504 if (!this.embeddedSpans.skipTo(this.wrapDoc)) {
505 this.more = false;
506 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000507 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000508 return false;
509 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000510
Nils Diewald83c9b162015-02-03 21:05:07 +0000511 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000512
Nils Diewald83c9b162015-02-03 21:05:07 +0000513 if (this.embeddedDoc == DocIdSetIterator.NO_MORE_DOCS) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000514 this.more = false;
Nils Diewald83c9b162015-02-03 21:05:07 +0000515 this.inSameDoc = false;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000516 this.wrapDoc = DocIdSetIterator.NO_MORE_DOCS;
517 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000518 return false;
519 };
Nils Diewaldcd226862015-02-11 22:27:45 +0000520
521 if (DEBUG)
522 log.trace("Skip embedded to doc {}", this.embeddedDoc);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000523
Nils Diewaldcd226862015-02-11 22:27:45 +0000524 this.embeddedStart = this.embeddedSpans.start();
Nils Diewald83c9b162015-02-03 21:05:07 +0000525 this.embeddedEnd = -1;
526 this.embeddedPayload = null;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000527
Nils Diewaldcd226862015-02-11 22:27:45 +0000528 if (this.wrapDoc == this.embeddedDoc) {
529 this.matchDoc = this.embeddedDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000530 return true;
Nils Diewaldcd226862015-02-11 22:27:45 +0000531 };
Nils Diewald83c9b162015-02-03 21:05:07 +0000532 }
533 else {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000534 this.matchDoc = DocIdSetIterator.NO_MORE_DOCS;
Nils Diewald83c9b162015-02-03 21:05:07 +0000535 return false;
536 };
537 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000538
Nils Diewaldcd226862015-02-11 22:27:45 +0000539 this.matchDoc = this.wrapDoc;
Nils Diewald83c9b162015-02-03 21:05:07 +0000540 return true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000541 };
542
543
544 // Initialize spans
545 private boolean init () throws IOException {
546
Nils Diewald83c9b162015-02-03 21:05:07 +0000547 // There is a missing span
548 if (this.embeddedDoc >= 0)
549 return true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000550
Nils Diewald83c9b162015-02-03 21:05:07 +0000551 if (DEBUG)
552 log.trace("Initialize spans");
Nils Diewald6802acd2014-03-18 18:29:30 +0000553
Nils Diewald83c9b162015-02-03 21:05:07 +0000554 // First tick for both spans
555 if (!(this.embeddedSpans.next() && this.wrapSpans.next())) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000556
Nils Diewald83c9b162015-02-03 21:05:07 +0000557 if (DEBUG)
558 log.trace("No spans initialized");
Nils Diewaldbb33da22015-03-04 16:24:25 +0000559
Nils Diewald83c9b162015-02-03 21:05:07 +0000560 this.embeddedDoc = -1;
561 this.more = false;
562 return false;
563 };
564 this.more = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000565
Nils Diewald83c9b162015-02-03 21:05:07 +0000566 // Store current positions for wrapping and embedded spans
Nils Diewaldbb33da22015-03-04 16:24:25 +0000567 this.wrapDoc = this.wrapSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000568 this.embeddedDoc = this.embeddedSpans.doc();
Nils Diewald6802acd2014-03-18 18:29:30 +0000569
Nils Diewald83c9b162015-02-03 21:05:07 +0000570 // Set inSameDoc to true, if it is true
571 if (this.embeddedDoc == this.wrapDoc)
572 this.inSameDoc = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000573
Nils Diewald83c9b162015-02-03 21:05:07 +0000574 return true;
Nils Diewaldf399a672013-11-18 17:55:22 +0000575 };
576
577
Nils Diewaldbb33da22015-03-04 16:24:25 +0000578 /**
579 * Skips to the first match beyond the current, whose document
580 * number is
581 * greater than or equal to <i>target</i>. <p>Returns true iff
582 * there is such
583 * a match. <p>Behaves as if written: <pre class="prettyprint">
584 * boolean skipTo(int target) {
585 * do {
586 * if (!next())
587 * return false;
588 * } while (target > doc());
589 * return true;
590 * }
Nils Diewaldf399a672013-11-18 17:55:22 +0000591 * </pre>
592 * Most implementations are considerably more efficient than that.
593 */
594 public boolean skipTo (int target) throws IOException {
Nils Diewald6802acd2014-03-18 18:29:30 +0000595
Nils Diewald83c9b162015-02-03 21:05:07 +0000596 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000597 log.trace("skipTo document {}/{} -> {}", this.embeddedDoc,
598 this.wrapDoc, target);
Nils Diewaldf399a672013-11-18 17:55:22 +0000599
Nils Diewald83c9b162015-02-03 21:05:07 +0000600 // Initialize spans
601 if (!this.init())
602 return false;
Nils Diewald82a4b862014-02-20 21:17:41 +0000603
Nils Diewaldcd226862015-02-11 22:27:45 +0000604 assert target > this.embeddedDoc;
Nils Diewald82a4b862014-02-20 21:17:41 +0000605
Nils Diewald83c9b162015-02-03 21:05:07 +0000606 // Only forward embedded spans
607 if (this.more && (this.embeddedDoc < target)) {
608 if (this.embeddedSpans.skipTo(target)) {
609 this.inSameDoc = false;
610 this.embeddedStart = -1;
611 this.embeddedEnd = -1;
612 this.embeddedPayload = null;
613 this.embeddedDoc = this.embeddedSpans.doc();
614 }
Nils Diewaldf399a672013-11-18 17:55:22 +0000615
Nils Diewald83c9b162015-02-03 21:05:07 +0000616 // Can't be skipped to target
617 else {
618 this.inSameDoc = false;
619 this.more = false;
620 return false;
621 };
622 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000623
Nils Diewald83c9b162015-02-03 21:05:07 +0000624 // Move to same doc
625 return this.toSameDoc();
Nils Diewaldf399a672013-11-18 17:55:22 +0000626 };
627
Nils Diewaldbb33da22015-03-04 16:24:25 +0000628
Nils Diewald6802acd2014-03-18 18:29:30 +0000629 private void nextSpanA () {
Nils Diewald83c9b162015-02-03 21:05:07 +0000630 if (DEBUG)
631 log.trace("Try wrap next time");
632 this.tryMatch = false;
633 this.nextSpanB = false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000634 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000635
Nils Diewaldbb33da22015-03-04 16:24:25 +0000636
Nils Diewald6802acd2014-03-18 18:29:30 +0000637 private void nextSpanB () {
Nils Diewald83c9b162015-02-03 21:05:07 +0000638 if (DEBUG)
639 log.trace("Try embedded next time");
640 this.nextSpanB = true;
Nils Diewald6802acd2014-03-18 18:29:30 +0000641 };
642
643
644 // Check if the current span constellation does match
645 // Store backtracking relevant data and say, how to proceed
646 private boolean doesMatch () {
Nils Diewaldcd226862015-02-11 22:27:45 +0000647 if (DEBUG)
648 log.trace("In the match test branch");
Nils Diewald6802acd2014-03-18 18:29:30 +0000649
Nils Diewaldcd226862015-02-11 22:27:45 +0000650 if (this.wrapStart == -1)
651 this.wrapStart = this.wrapSpans.start();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000652
Nils Diewaldcd226862015-02-11 22:27:45 +0000653 if (this.embeddedStart == -1) {
654 this.embeddedStart = this.embeddedSpans.start();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000655 this.embeddedEnd = this.embeddedSpans.end();
Nils Diewaldcd226862015-02-11 22:27:45 +0000656 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000657
Nils Diewaldcd226862015-02-11 22:27:45 +0000658 this.wrapEnd = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000659
Nils Diewaldcd226862015-02-11 22:27:45 +0000660 // Shortcut to prevent lazyloading of .end()
661 if (this.wrapStart > this.embeddedStart) {
662 // Can't match for in, rin, ew, sw, and m
663 // and will always lead to next_b
664 if (flag >= WITHIN) {
665 this.nextSpanB();
666 if (DEBUG)
667 _logCurrentCase((byte) 16);
668 return false;
669 };
670 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000671
Nils Diewaldcd226862015-02-11 22:27:45 +0000672 else if (this.wrapStart < this.embeddedStart) {
673 // Can't match for sw and m and will always
674 // lead to next_a
675 if (flag >= STARTSWITH) {
676 this.nextSpanA();
677 if (DEBUG)
678 _logCurrentCase((byte) 15);
679 return false;
680 };
681 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000682
Nils Diewaldcd226862015-02-11 22:27:45 +0000683 // Now check correctly
684 byte currentCase = this.withinCase();
Nils Diewald6802acd2014-03-18 18:29:30 +0000685
Nils Diewaldcd226862015-02-11 22:27:45 +0000686 if (DEBUG)
687 _logCurrentCase(currentCase);
Nils Diewald6802acd2014-03-18 18:29:30 +0000688
Nils Diewaldcd226862015-02-11 22:27:45 +0000689 boolean match = false;
Nils Diewald6802acd2014-03-18 18:29:30 +0000690
Nils Diewaldcd226862015-02-11 22:27:45 +0000691 // Test case
692 if (currentCase >= (byte) 3 && currentCase <= (byte) 11) {
693 switch (flag) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000694
Nils Diewaldbb33da22015-03-04 16:24:25 +0000695 case WITHIN:
696 if (currentCase >= 6 && currentCase <= 10
697 && currentCase != 8)
698 match = true;
699 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000700
Nils Diewaldbb33da22015-03-04 16:24:25 +0000701 case REAL_WITHIN:
702 if (currentCase == 6 || currentCase == 9
703 || currentCase == 10)
704 match = true;
705 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000706
Nils Diewaldbb33da22015-03-04 16:24:25 +0000707 case MATCH:
708 if (currentCase == 7)
709 match = true;
710 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000711
Nils Diewaldbb33da22015-03-04 16:24:25 +0000712 case STARTSWITH:
713 if (currentCase == 7 || currentCase == 6)
714 match = true;
715 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000716
Nils Diewaldbb33da22015-03-04 16:24:25 +0000717 case ENDSWITH:
718 if (currentCase == 7 || currentCase == 10)
719 match = true;
720 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000721
Nils Diewaldbb33da22015-03-04 16:24:25 +0000722 case OVERLAP:
Nils Diewaldcd226862015-02-11 22:27:45 +0000723 match = true;
Nils Diewaldbb33da22015-03-04 16:24:25 +0000724 break;
725
726 case REAL_OVERLAP:
727 if (currentCase == 3 || currentCase == 11)
728 match = true;
729 break;
Nils Diewaldcd226862015-02-11 22:27:45 +0000730 };
731 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000732
Nils Diewaldcd226862015-02-11 22:27:45 +0000733 try {
734 this.todo(currentCase);
735 }
736 catch (IOException e) {
737 return false;
738 }
739 return match;
Nils Diewald6802acd2014-03-18 18:29:30 +0000740 };
741
742
743 private void _logCurrentCase (byte currentCase) {
Nils Diewaldcd226862015-02-11 22:27:45 +0000744 log.trace("Current Case is {}", currentCase);
Nils Diewaldbb33da22015-03-04 16:24:25 +0000745
Nils Diewaldcd226862015-02-11 22:27:45 +0000746 String _e = _currentEmbedded().toString();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000747
Nils Diewaldcd226862015-02-11 22:27:45 +0000748 log.trace(" |---| {}", _currentWrap().toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000749
Nils Diewaldcd226862015-02-11 22:27:45 +0000750 switch (currentCase) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000751 case 1:
752 log.trace("|-| {}", _e);
753 break;
754 case 2:
755 log.trace("|---| {}", _e);
756 break;
757 case 3:
758 log.trace(" |---| {}", _e);
759 break;
760 case 4:
761 log.trace(" |-----| {}", _e);
762 break;
763 case 5:
764 log.trace(" |-------| {}", _e);
765 break;
766 case 6:
767 log.trace(" |-| {}", _e);
768 break;
769 case 7:
770 log.trace(" |---| {}", _e);
771 break;
772 case 8:
773 log.trace(" |-----| {}", _e);
774 break;
775 case 9:
776 log.trace(" |-| {}", _e);
777 break;
778 case 10:
779 log.trace(" |-| {}", _e);
780 break;
781 case 11:
782 log.trace(" |---| {}", _e);
783 break;
784 case 12:
785 log.trace(" |-| {}", _e);
786 break;
787 case 13:
788 log.trace(" |-| {}", _e);
789 break;
Nils Diewald6802acd2014-03-18 18:29:30 +0000790
Nils Diewaldbb33da22015-03-04 16:24:25 +0000791 case 15:
792 // Fake case
793 log.trace(" |---? {}", _e);
794 break;
795
796 case 16:
797 // Fake case
798 log.trace(" |---? {}", _e);
799 break;
Nils Diewaldcd226862015-02-11 22:27:45 +0000800 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000801 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000802
Nils Diewald6802acd2014-03-18 18:29:30 +0000803
Nils Diewald41750bf2015-02-06 17:45:20 +0000804 private WithinSpan _currentWrap () {
805 WithinSpan _wrap = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000806 _wrap.start = this.wrapStart != -1 ? this.wrapStart : this.wrapSpans
807 .start();
808 _wrap.end = this.wrapEnd != -1 ? this.wrapEnd : this.wrapSpans.end();
809 _wrap.doc = this.wrapDoc != -1 ? this.wrapDoc : this.wrapSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000810 return _wrap;
Nils Diewald6802acd2014-03-18 18:29:30 +0000811 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000812
813
Nils Diewald41750bf2015-02-06 17:45:20 +0000814 private WithinSpan _currentEmbedded () {
815 WithinSpan _embedded = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000816 _embedded.start = this.embeddedStart != -1 ? this.embeddedStart
817 : this.embeddedSpans.start();
818 _embedded.end = this.embeddedEnd != -1 ? this.embeddedEnd
819 : this.embeddedSpans.end();
820 _embedded.doc = this.embeddedDoc != -1 ? this.embeddedDoc
821 : this.embeddedSpans.doc();
Nils Diewald83c9b162015-02-03 21:05:07 +0000822 return _embedded;
Nils Diewald6802acd2014-03-18 18:29:30 +0000823 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000824
Nils Diewald6802acd2014-03-18 18:29:30 +0000825
826 private void todo (byte currentCase) throws IOException {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000827 /*
828 Check what to do next with the spans.
829
830 The different follow up steps are:
831 - storeEmbedded -> store span B for later checks
832 - nextSpanA -> forward a
833 - nextSpanB -> forward b
Nils Diewald6802acd2014-03-18 18:29:30 +0000834
Nils Diewaldbb33da22015-03-04 16:24:25 +0000835 These rules were automatically generated
836 */
Nils Diewald6802acd2014-03-18 18:29:30 +0000837
Nils Diewaldbb33da22015-03-04 16:24:25 +0000838 // Case 1, 2
839 if (currentCase <= (byte) 2) {
Nils Diewald83c9b162015-02-03 21:05:07 +0000840 this.nextSpanB();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000841 }
842
843 // Case 12, 13
844 else if (currentCase >= (byte) 12) {
845 this.storeEmbedded();
Nils Diewald83c9b162015-02-03 21:05:07 +0000846 this.nextSpanA();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000847 }
848
849 // Case 3, 4, 5, 8
850 else if (currentCase <= (byte) 5 || currentCase == (byte) 8) {
851 if (flag <= 2)
852 this.storeEmbedded();
Nils Diewald83c9b162015-02-03 21:05:07 +0000853 this.nextSpanB();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000854 }
855
856 // Case 11
857 else if (currentCase == (byte) 11) {
858 if (this.flag == REAL_WITHIN) {
859 this.nextSpanB();
860 }
861 else if (this.flag >= STARTSWITH) {
862 this.nextSpanA();
863 }
864 else {
865 this.storeEmbedded();
866 this.nextSpanB();
867 };
868 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000869
870
Nils Diewaldbb33da22015-03-04 16:24:25 +0000871 // Case 6, 7, 9, 10
872 else {
Nils Diewald6802acd2014-03-18 18:29:30 +0000873
Nils Diewaldbb33da22015-03-04 16:24:25 +0000874 if (
875 // Case 6
876 (currentCase == (byte) 6 && this.flag == MATCH) ||
Nils Diewald6802acd2014-03-18 18:29:30 +0000877
Nils Diewaldbb33da22015-03-04 16:24:25 +0000878 // Case 7
879 (currentCase == (byte) 7 && this.flag == REAL_WITHIN) ||
880
881 // Case 9, 10
882 (currentCase >= (byte) 9 && this.flag >= STARTSWITH)) {
883
884 this.nextSpanA();
885 }
886 else {
887 this.storeEmbedded();
888 this.nextSpanB();
889 };
890 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000891 };
892
Nils Diewaldbb33da22015-03-04 16:24:25 +0000893
Nils Diewald83c9b162015-02-03 21:05:07 +0000894 // Store the current embedded span in the first spanStore
Nils Diewald6802acd2014-03-18 18:29:30 +0000895 private void storeEmbedded () throws IOException {
896
Nils Diewald83c9b162015-02-03 21:05:07 +0000897 // Create a current copy
Nils Diewald41750bf2015-02-06 17:45:20 +0000898 WithinSpan embedded = new WithinSpan();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000899 embedded.start = this.embeddedStart != -1 ? this.embeddedStart
900 : this.embeddedSpans.start();
901 embedded.end = this.embeddedEnd != -1 ? this.embeddedEnd
902 : this.embeddedSpans.end();
903 embedded.doc = this.embeddedDoc;
Nils Diewald6802acd2014-03-18 18:29:30 +0000904
Nils Diewald83c9b162015-02-03 21:05:07 +0000905 // Copy payloads
906 if (this.embeddedPayload != null) {
Nils Diewaldbb33da22015-03-04 16:24:25 +0000907 embedded.payload = new ArrayList<byte[]>(
908 this.embeddedPayload.size());
Nils Diewald83c9b162015-02-03 21:05:07 +0000909 embedded.payload.addAll(this.embeddedPayload);
910 }
911 else if (this.embeddedSpans.isPayloadAvailable()) {
912 embedded.payload = new ArrayList<byte[]>(3);
913 Collection<byte[]> payload = this.embeddedSpans.getPayload();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000914
Nils Diewald83c9b162015-02-03 21:05:07 +0000915 this.embeddedPayload = new ArrayList<byte[]>(payload.size());
916 this.embeddedPayload.addAll(payload);
917 embedded.payload.addAll(payload);
918 };
Nils Diewald6802acd2014-03-18 18:29:30 +0000919
Nils Diewald83c9b162015-02-03 21:05:07 +0000920 this.spanStore1.add(embedded);
Nils Diewald6802acd2014-03-18 18:29:30 +0000921
Nils Diewald83c9b162015-02-03 21:05:07 +0000922 if (DEBUG)
Nils Diewaldbb33da22015-03-04 16:24:25 +0000923 log.trace("Pushed to spanStore 1 {} (in storeEmbedded)",
924 embedded.toString());
Nils Diewald6802acd2014-03-18 18:29:30 +0000925 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000926
Nils Diewald6802acd2014-03-18 18:29:30 +0000927
928 // Return case number
929 private byte withinCase () {
930
Nils Diewaldcd226862015-02-11 22:27:45 +0000931 // case 1-5
932 if (this.wrapStart > this.embeddedStart) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000933
Nils Diewaldcd226862015-02-11 22:27:45 +0000934 // Case 1
935 // |-|
936 // |-|
937 if (this.wrapStart > this.embeddedEnd) {
938 return (byte) 1;
939 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000940
Nils Diewaldcd226862015-02-11 22:27:45 +0000941 // Case 2
942 // |-|
943 // |-|
944 else if (this.wrapStart == this.embeddedEnd) {
945 return (byte) 2;
946 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000947
Nils Diewaldcd226862015-02-11 22:27:45 +0000948 // Load wrapEnd
949 this.wrapEnd = this.wrapSpans.end();
Nils Diewaldbb33da22015-03-04 16:24:25 +0000950
Nils Diewaldcd226862015-02-11 22:27:45 +0000951 // Case 3
952 // |---|
953 // |---|
954 if (this.wrapEnd > this.embeddedEnd) {
955 return (byte) 3;
956 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000957
Nils Diewaldcd226862015-02-11 22:27:45 +0000958 // Case 4
959 // |-|
960 // |---|
961 else if (this.wrapEnd == this.embeddedEnd) {
962 return (byte) 4;
963 };
Nils Diewaldbb33da22015-03-04 16:24:25 +0000964
Nils Diewaldcd226862015-02-11 22:27:45 +0000965 // Case 5
966 // |-|
967 // |---|
968 return (byte) 5;
969 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000970
Nils Diewaldcd226862015-02-11 22:27:45 +0000971 // case 6-8
972 else if (this.wrapStart == this.embeddedStart) {
Nils Diewald6802acd2014-03-18 18:29:30 +0000973
Nils Diewaldcd226862015-02-11 22:27:45 +0000974 // Load wrapEnd
975 this.wrapEnd = this.wrapSpans.end();
Nils Diewald6802acd2014-03-18 18:29:30 +0000976
Nils Diewaldcd226862015-02-11 22:27:45 +0000977 // Case 6
978 // |---|
979 // |-|
980 if (this.wrapEnd > this.embeddedEnd) {
981 return (byte) 6;
982 }
Nils Diewald6802acd2014-03-18 18:29:30 +0000983
Nils Diewaldcd226862015-02-11 22:27:45 +0000984 // Case 7
985 // |---|
986 // |---|
987 else if (this.wrapEnd == this.embeddedEnd) {
988 return (byte) 7;
989 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000990
Nils Diewaldcd226862015-02-11 22:27:45 +0000991 // Case 8
992 // |-|
993 // |---|
994 return (byte) 8;
995 }
Nils Diewaldbb33da22015-03-04 16:24:25 +0000996
Nils Diewaldcd226862015-02-11 22:27:45 +0000997 // wrapStart < embeddedStart
Nils Diewaldf399a672013-11-18 17:55:22 +0000998
Nils Diewaldcd226862015-02-11 22:27:45 +0000999 // Load wrapEnd
1000 this.wrapEnd = this.wrapSpans.end();
Nils Diewald6802acd2014-03-18 18:29:30 +00001001
Nils Diewaldcd226862015-02-11 22:27:45 +00001002 // Case 13
1003 // |-|
1004 // |-|
1005 if (this.wrapEnd < this.embeddedStart) {
1006 return (byte) 13;
1007 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001008
Nils Diewaldcd226862015-02-11 22:27:45 +00001009 // Case 9
1010 // |---|
1011 // |-|
1012 else if (this.wrapEnd > this.embeddedEnd) {
1013 return (byte) 9;
1014 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001015
Nils Diewaldcd226862015-02-11 22:27:45 +00001016 // Case 10
1017 // |---|
1018 // |-|
1019 else if (this.wrapEnd == this.embeddedEnd) {
1020 return (byte) 10;
1021 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001022
Nils Diewaldcd226862015-02-11 22:27:45 +00001023 // Case 11
1024 // |---|
1025 // |---|
1026 else if (this.wrapEnd > this.embeddedStart) {
1027 return (byte) 11;
1028 }
Nils Diewaldbb33da22015-03-04 16:24:25 +00001029
Nils Diewaldcd226862015-02-11 22:27:45 +00001030 // case 12
1031 // |-|
1032 // |-|
1033 return (byte) 12;
Nils Diewaldbb33da22015-03-04 16:24:25 +00001034 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001035
1036
Nils Diewaldbb33da22015-03-04 16:24:25 +00001037 /**
1038 * Returns the document number of the current match. Initially
1039 * invalid.
1040 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001041 @Override
1042 public int doc () {
Nils Diewaldcd226862015-02-11 22:27:45 +00001043 return matchDoc;
Nils Diewaldf399a672013-11-18 17:55:22 +00001044 };
1045
Nils Diewaldbb33da22015-03-04 16:24:25 +00001046
1047 /**
1048 * Returns the start position of the embedding wrap. Initially
1049 * invalid.
1050 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001051 @Override
1052 public int start () {
Nils Diewaldcd226862015-02-11 22:27:45 +00001053 return matchStart;
Nils Diewaldf399a672013-11-18 17:55:22 +00001054 };
1055
Nils Diewaldbb33da22015-03-04 16:24:25 +00001056
1057 /**
1058 * Returns the end position of the embedding wrap. Initially
1059 * invalid.
1060 */
Nils Diewaldf399a672013-11-18 17:55:22 +00001061 @Override
1062 public int end () {
Nils Diewaldbb33da22015-03-04 16:24:25 +00001063 return matchEnd;
Nils Diewaldf399a672013-11-18 17:55:22 +00001064 };
1065
Nils Diewaldbb33da22015-03-04 16:24:25 +00001066
Nils Diewaldf399a672013-11-18 17:55:22 +00001067 /**
1068 * Returns the payload data for the current span.
1069 * This is invalid until {@link #next()} is called for
1070 * the first time.
1071 * This method must not be called more than once after each call
1072 * of {@link #next()}. However, most payloads are loaded lazily,
1073 * so if the payload data for the current position is not needed,
Nils Diewaldbb33da22015-03-04 16:24:25 +00001074 * this method may not be called at all for performance reasons.
1075 * An ordered
1076 * SpanQuery does not lazy load, so if you have payloads in your
1077 * index and
1078 * you do not want ordered SpanNearQuerys to collect payloads, you
1079 * can
Nils Diewalde0725012014-09-25 19:32:52 +00001080 * disable collection with a constructor option.<br>
Nils Diewaldf399a672013-11-18 17:55:22 +00001081 * <br>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001082 * Note that the return type is a collection, thus the ordering
1083 * should not be relied upon.
Nils Diewaldf399a672013-11-18 17:55:22 +00001084 * <br/>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001085 *
Nils Diewaldf399a672013-11-18 17:55:22 +00001086 * @lucene.experimental
Nils Diewaldbb33da22015-03-04 16:24:25 +00001087 *
1088 * @return a List of byte arrays containing the data of this
1089 * payload, otherwise null if isPayloadAvailable is false
1090 * @throws IOException
1091 * if there is a low-level I/O error
Nils Diewaldf399a672013-11-18 17:55:22 +00001092 */
1093 // public abstract Collection<byte[]> getPayload() throws IOException;
1094 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001095 public Collection<byte[]> getPayload () throws IOException {
1096 return matchPayload;
Nils Diewaldf399a672013-11-18 17:55:22 +00001097 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001098
Nils Diewaldf399a672013-11-18 17:55:22 +00001099
1100 /**
1101 * Checks if a payload can be loaded at this position.
1102 * <p/>
Nils Diewaldbb33da22015-03-04 16:24:25 +00001103 * Payloads can only be loaded once per call to {@link #next()}.
1104 *
1105 * @return true if there is a payload available at this position
1106 * that can be loaded
Nils Diewaldf399a672013-11-18 17:55:22 +00001107 */
1108 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001109 public boolean isPayloadAvailable () {
1110 return matchPayload.isEmpty() == false;
Nils Diewaldf399a672013-11-18 17:55:22 +00001111 };
1112
Nils Diewaldbb33da22015-03-04 16:24:25 +00001113
Nils Diewaldf399a672013-11-18 17:55:22 +00001114 // Todo: This may be in the wrong version
1115 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001116 public long cost () {
1117 return wrapSpans.cost() + embeddedSpans.cost();
Nils Diewaldf399a672013-11-18 17:55:22 +00001118 };
1119
Nils Diewaldbb33da22015-03-04 16:24:25 +00001120
Nils Diewaldf399a672013-11-18 17:55:22 +00001121 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001122 public String toString () {
1123 return getClass().getName()
1124 + "("
1125 + query.toString()
1126 + ")@"
1127 + (embeddedDoc <= 0 ? "START" : (more ? (doc() + ":" + start()
1128 + "-" + end()) : "END"));
Nils Diewaldf399a672013-11-18 17:55:22 +00001129 };
Nils Diewald41750bf2015-02-06 17:45:20 +00001130
1131
1132 // This was formerly the default candidate span class,
1133 // before it was refactored out
Nils Diewaldbb33da22015-03-04 16:24:25 +00001134 private class WithinSpan implements Comparable<WithinSpan>, Cloneable {
1135 public int start = -1, end = -1, doc = -1;
Nils Diewald41750bf2015-02-06 17:45:20 +00001136
1137 public Collection<byte[]> payload;
1138
1139 public short elementRef = -1;
Nils Diewaldbb33da22015-03-04 16:24:25 +00001140
1141
Nils Diewald41750bf2015-02-06 17:45:20 +00001142 public void clear () {
1143 this.start = -1;
1144 this.end = -1;
1145 this.doc = -1;
1146 clearPayload();
1147 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001148
1149
Nils Diewald41750bf2015-02-06 17:45:20 +00001150 @Override
1151 public int compareTo (WithinSpan o) {
1152 /* optimizable for short numbers to return o.end - this.end */
1153 if (this.doc < o.doc) {
1154 return -1;
1155 }
1156 else if (this.doc == o.doc) {
1157 if (this.start < o.start) {
1158 return -1;
1159 }
1160 else if (this.start == o.start) {
1161 if (this.end < o.end)
1162 return -1;
1163 };
1164 };
1165 return 1;
1166 };
1167
Nils Diewaldbb33da22015-03-04 16:24:25 +00001168
1169 public short getElementRef () {
Nils Diewald41750bf2015-02-06 17:45:20 +00001170 return elementRef;
1171 }
1172
Nils Diewaldbb33da22015-03-04 16:24:25 +00001173
1174 public void setElementRef (short elementRef) {
Nils Diewald41750bf2015-02-06 17:45:20 +00001175 this.elementRef = elementRef;
1176 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001177
1178
Nils Diewald41750bf2015-02-06 17:45:20 +00001179 @Override
Nils Diewaldbb33da22015-03-04 16:24:25 +00001180 public Object clone () {
Nils Diewald41750bf2015-02-06 17:45:20 +00001181 WithinSpan span = new WithinSpan();
1182 span.start = this.start;
1183 span.end = this.end;
1184 span.doc = this.doc;
1185 span.payload.addAll(this.payload);
1186 return span;
1187 };
1188
Nils Diewaldbb33da22015-03-04 16:24:25 +00001189
Nils Diewald41750bf2015-02-06 17:45:20 +00001190 public WithinSpan copyFrom (WithinSpan o) {
1191 this.start = o.start;
1192 this.end = o.end;
1193 this.doc = o.doc;
1194 // this.clearPayload();
1195 this.payload.addAll(o.payload);
1196 return this;
1197 };
Nils Diewaldbb33da22015-03-04 16:24:25 +00001198
1199
Nils Diewald41750bf2015-02-06 17:45:20 +00001200 public void clearPayload () {
1201 if (this.payload != null)
1202 this.payload.clear();
1203 };
1204
Nils Diewaldbb33da22015-03-04 16:24:25 +00001205
Nils Diewald41750bf2015-02-06 17:45:20 +00001206 public String toString () {
1207 StringBuilder sb = new StringBuilder("[");
Nils Diewaldbb33da22015-03-04 16:24:25 +00001208 return sb.append(this.start).append('-').append(this.end)
1209 .append('(').append(this.doc).append(')').append(']')
1210 .toString();
Nils Diewald41750bf2015-02-06 17:45:20 +00001211 };
1212 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001213};