blob: cb555c429dd6d2faa9bd0790aba1024a89e1ebf6 [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SpanElementQuery;
/**
* Enumeration of spans representing elements such as phrases,
* sentences and paragraphs. Span length is stored as a payload.
*
* Depth and certainty value payloads have not been loaded and handled
* yet.
*
* @author margaretha
* @author diewald
*/
public final class ElementSpans extends SimpleSpans {
private final TermSpans termSpans;
private boolean isPayloadLoaded;
private final Logger log = LoggerFactory.getLogger(ElementSpans.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
public static enum PayloadTypeIdentifier {
ELEMENT(64),
// ELEMENT_WITH_TUI(65),
// ELEMENT_WITH_CERTAINTY_VALUE (66),
// ELEMENT_WITH_TUI_AND_CERTAINTY_VALUE (67),
MILESTONE(65);
private byte value;
private PayloadTypeIdentifier (int value) {
this.value = (byte) value;
}
}
/**
* Constructs ElementSpans for the given {@link SpanElementQuery}.
*
* @param spanElementQuery
* A {@link SpanElementQuery}.
* @param context
* The {@link LeafReaderContext}.
* @param acceptDocs
* Bit vector representing the documents
* to be searched in.
* @param termContexts
* A map managing {@link TermState TermStates}.
* @throws IOException
*/
public ElementSpans (SpanElementQuery spanElementQuery,
LeafReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts)
throws IOException {
super(spanElementQuery, context, acceptDocs, termContexts);
termSpans = (TermSpans) this.firstSpans;
hasMoreSpans = true;
if (DEBUG)
log.trace("Create ElementSpan");
};
@Override
public boolean next () throws IOException {
if (DEBUG)
log.trace("Next element span for element");
if (!hasMoreSpans || !(hasMoreSpans = termSpans.next()))
return false;
isStartEnumeration = false;
this.matchPayload = null;
matchEndPosition = -1;
return advance();
};
private boolean advance () throws IOException {
this.matchStartPosition = termSpans.start();
this.matchDocNumber = termSpans.doc();
isPayloadLoaded = false;
return true;
};
/*
* Process payload lazily.
* This may have a little impact on queries like
* position queries, where spans can be rejected
* solely based on their starting and doc position.
*/
private void loadPayload () {
if (this.isPayloadLoaded) {
return;
}
else {
this.isPayloadLoaded = true;
}
List<byte[]> payload;
try {
payload = (List<byte[]>) termSpans.getPayload();
}
catch (IOException e) {
// silently setting empty element and payload
this.matchEndPosition = this.matchStartPosition;
this.setSpanId((short) -1);
this.hasSpanId = false;
this.matchPayload = null;
return;
}
if (!payload.isEmpty() && payload.get(0) != null){
// Get payload one by one
final int length = payload.get(0).length;
final ByteBuffer bb = ByteBuffer.allocate(length);
bb.put(payload.get(0));
this.payloadTypeIdentifier = bb.get(0);
if (payloadTypeIdentifier != PayloadTypeIdentifier.MILESTONE.value) {
this.matchEndPosition = bb.getInt(9);
};
if (payloadTypeIdentifier == PayloadTypeIdentifier.ELEMENT.value
&& length > 15) {
this.setSpanId(bb.getShort(14));
this.hasSpanId = true;
}
else {
this.setSpanId((short) -1);
this.hasSpanId = false;
}
// FIX ME
// Copy the start and end character offsets
// b = Arrays.copyOfRange(bb.array(), 1, 9);
// this.matchPayload = Collections.singletonList(b);
this.matchPayload = Collections.singletonList(bb.array());
return;
}
this.matchEndPosition = this.matchStartPosition;
this.setSpanId((short) -1);
this.hasSpanId = false;
this.matchPayload = null;
};
@Override
public int end () {
this.loadPayload();
return this.matchEndPosition;
};
@Override
public Collection<byte[]> getPayload () {
this.loadPayload();
return this.matchPayload;
};
@Override
public boolean isPayloadAvailable () {
this.loadPayload();
return !this.matchPayload.isEmpty();
};
@Override
public short getSpanId () {
this.loadPayload();
return spanId;
};
@Override
public boolean skipTo (int target) throws IOException {
if (DEBUG)
log.trace("Skip ElementSpans {} -> {}", firstSpans.doc(), target);
if (hasMoreSpans && firstSpans.doc() < target
&& firstSpans.skipTo(target)) {
return this.advance();
};
hasMoreSpans = false;
this.matchPayload = null;
return false;
};
@Override
public long cost () {
return termSpans.cost();
};
};