blob: 6a1c3d87bbaae98e4a9381cb4b0161398742aaa4 [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SimpleSpanQuery;
/** An abstract class for Span enumeration whose two child spans are matched by
* their positions and do not have a partial overlap.
*
* @author margaretha
* */
public abstract class NonPartialOverlappingSpans extends SimpleSpans{
private Logger log = LoggerFactory.getLogger(NonPartialOverlappingSpans.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
public NonPartialOverlappingSpans(SimpleSpanQuery simpleSpanQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term,TermContext> termContexts) throws IOException {
super(simpleSpanQuery, context, acceptDocs, termContexts);
// Warning: not implemented, results in errors for SpanNextQuery
// This.collectPayloads = simpleSpanQuery.isCollectPayloads()
collectPayloads = true;
hasMoreSpans = secondSpans.next();
}
@Override
public boolean next() throws IOException {
// Warning: this does not work for overlapping spans
// e.g. get multiple second spans in a firstspan
hasMoreSpans &= firstSpans.next();
isStartEnumeration=false;
matchPayload.clear();
return advance();
}
/** Advance is a lucene terminology to search for the next match.
* */
protected boolean advance() throws IOException {
// The complexity is linear for searching in a document.
// It's better if we can skip to >= position in a document.
while (hasMoreSpans && ensureSameDoc(firstSpans,secondSpans)){
int matchCase = findMatch();
if (matchCase == 0){
if (DEBUG) {
log.trace("Match doc#: {}",matchDocNumber);
log.trace("Match positions: {}-{}", matchStartPosition,
matchEndPosition);
};
doCollectPayloads();
return true;
}
else if (matchCase == 1) {
hasMoreSpans = secondSpans.next();
}
else{
hasMoreSpans = firstSpans.next();
}
}
return false;
}
/** Specify the condition for a match
* @return 0 iff match is found,
* -1 to advance the firstspan,
* 1 to advance the secondspan
* */
protected abstract int findMatch();
/** Collecting available payloads from the current first and second spans */
private void doCollectPayloads() throws IOException {
if (collectPayloads){
if (DEBUG)
log.trace("Collect payloads");
if (firstSpans.isPayloadAvailable()) {
Collection<byte[]> payload = firstSpans.getPayload();
if (DEBUG)
log.trace("Found {} payloads in firstSpans", payload.size());
matchPayload.addAll(payload);
}
if (secondSpans.isPayloadAvailable()) {
Collection<byte[]> payload = secondSpans.getPayload();
if (DEBUG)
log.trace("Found {} payloads in secondSpans", payload.size());
matchPayload.addAll(payload);
}
}
}
@Override
public boolean skipTo(int target) throws IOException {
if (hasMoreSpans && (firstSpans.doc() < target)){
if (!firstSpans.skipTo(target)){
hasMoreSpans = false;
return false;
}
}
matchPayload.clear();
return advance();
}
@Override
public long cost() {
return firstSpans.cost() + secondSpans.cost();
}
}