blob: 201e9df6c4d44239372c3b496c5421d3167e909c [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SpanAttributeQuery;
/**
* Span enumeration of attributes which are term spans with special payload
* assignments referring to another span (e.g. element/relation span) to which
* an attribute span belongs. The class is basically a wrapper of Lucene
* {@link TermSpans} with additional functionality regarding element/relation
* reference. Element/relation id is annotated ascendingly starting from the
* left side. <br/>
* <br/>
* The enumeration is ordered firstly by the start position of the attribute and
* secondly by the element/relation id descendingly. This order helps to match
* element and attributes faster.
*
* AttributeSpans contain information about the elements they belongs to, thus
* querying them alone is sufficient to get
* "any element having a specific attribute".
*
* @author margaretha
* */
public class AttributeSpans extends SimpleSpans {
private List<CandidateAttributeSpan> candidateList;
private int currentDoc, currentPosition;
private short referentId;
private boolean isFinish;
private int elementEnd;
protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
/**
* Constructs Attributespans based on the specified SpanAttributeQuery.
*
* @param spanAttributeQuery a spanAttributeQuery
* @param context
* @param acceptDocs
* @param termContexts
* @throws IOException
*/
public AttributeSpans(SpanAttributeQuery spanAttributeQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
super(spanAttributeQuery, context, acceptDocs, termContexts);
candidateList = new ArrayList<>();
hasMoreSpans = firstSpans.next();
if (hasMoreSpans) {
currentDoc = firstSpans.doc();
currentPosition = firstSpans.start();
}
}
@Override
public boolean next() throws IOException {
isStartEnumeration = false;
matchPayload.clear();
return advance();
}
/**
* Moves to the next match by checking the candidate match list or setting
* the list first when it is empty.
*
* @return true if a match is found
* @throws IOException
*/
private boolean advance() throws IOException {
while (hasMoreSpans || !candidateList.isEmpty()) {
if (!candidateList.isEmpty()) {
// set the current match from the first CandidateAttributeSpan
// in the candidate list
CandidateAttributeSpan cs = candidateList.get(0);
this.matchDocNumber = cs.getDoc();
this.matchStartPosition = cs.getStart();
this.matchEndPosition = cs.getEnd();
this.setReferentId(cs.getSpanId());
this.setElementEnd(cs.getElementEnd());
candidateList.remove(0);
return true;
} else {
setCandidateList();
currentDoc = firstSpans.doc();
currentPosition = firstSpans.start();
}
}
return false;
}
/**
* Collects all the attributes in the same start position and sort them by
* element/relation Id in a reverse order (the ones with the bigger
* element/relation Id first).
*
* @throws IOException
*/
private void setCandidateList() throws IOException {
while (hasMoreSpans && firstSpans.doc() == currentDoc
&& firstSpans.start() == currentPosition) {
candidateList.add(createCandidateSpan());
hasMoreSpans = firstSpans.next();
}
Collections.sort(candidateList);
Collections.reverse(candidateList);
}
/**
* Creates a CandidateAttributeSpan based on the child span and set the
* spanId and elementEnd from its payloads.
*
* @param firstSpans an AttributeSpans
* @return a CandidateAttributeSpan
* @throws IOException
*/
private CandidateAttributeSpan createCandidateSpan() throws IOException {
List<byte[]> payload = (List<byte[]>) firstSpans.getPayload();
ByteBuffer wrapper = ByteBuffer.wrap(payload.get(0));
short spanId = wrapper.getShort(0);
int elementEnd = -1;
if (payload.get(0).length == 6) {
elementEnd = wrapper.getInt(2);
}
return new CandidateAttributeSpan(firstSpans, spanId, elementEnd);
}
/**
* Return the span id to which an attribute span belongs, for instance a
* relation id or an element id.
*
* @return a span id, for instance a relation id or an element id
*/
public short getReferentId() {
return this.referentId;
}
/**
* Sets the span id to which an attribute span belongs, for instance a
* relation id or an element id.
*
* @param refId the span id to which an attribute span belongs, for
* instance a relation id or an element id.
*/
public void setReferentId(short refId) {
this.referentId = refId;
}
/**
* Returns the end position of the element to which an attribute span
* belongs.
*
* @return an element end position
*/
public int getElementEnd() {
return elementEnd;
}
/**
* Sets the end position of the element to which an attribute span belongs.
*
* @param elementEnd the end position of the element to which an attribute
* span belongs.
*/
public void setElementEnd(int elementEnd) {
this.elementEnd = elementEnd;
}
/**
* Tells if the enumeration of the AttributeSpans has come to an end.
*
* @return true if the enumeration has finished.
*/
public boolean isFinish() {
return isFinish;
}
/**
* Sets true if the enumeration of the AttributeSpans has come to an end.
*
* @param isFinish <code>true</code> if the enumeration of the
* AttributeSpans has come to an end, <code>false</code> otherwise.
*/
public void setFinish(boolean isFinish) {
this.isFinish = isFinish;
}
@Override
public boolean skipTo(int target) throws IOException {
if (hasMoreSpans && (firstSpans.doc() < target)) {
if (!firstSpans.skipTo(target)) {
candidateList.clear();
return false;
}
}
setCandidateList();
matchPayload.clear();
isStartEnumeration = false;
return advance();
}
@Override
public long cost() {
return firstSpans.cost();
}
/**
* CandidateAttributeSpan contains information about an Attribute span. All
* attribute spans occurring in an identical position are collected as
* CandidateAttributeSpans. The list of these CandidateAttributeSpans are
* sorted based on the span ids to which the attributes belong to. The
* attributes with smaller spanIds come first on the list.
*
* */
class CandidateAttributeSpan extends CandidateSpan implements
Comparable<CandidateSpan> {
private short spanId;
private int elementEnd;
/**
* Construct a CandidateAttributeSpan based on the given span, spanId,
* and elementEnd.
*
* @param span an AttributeSpans
* @param spanId the element or relation span id to which the current
* state of the specified AttributeSpans belongs to.
* @param elementEnd the end position of the element or relation span to
* which the current state of the specified AttributeSpans
* belongs to.
* @throws IOException
*/
public CandidateAttributeSpan(Spans span, short spanId, int elementEnd)
throws IOException {
super(span);
setSpanId(spanId);
setElementEnd(elementEnd);
}
public void setSpanId(short spanId) {
this.spanId = spanId;
}
public short getSpanId() {
return spanId;
}
public int getElementEnd() {
return elementEnd;
}
public void setElementEnd(int elementEnd) {
this.elementEnd = elementEnd;
}
@Override
public int compareTo(CandidateSpan o) {
CandidateAttributeSpan cs = (CandidateAttributeSpan) o;
if (this.spanId == cs.spanId)
return 0;
else if (this.spanId > cs.spanId)
return 1;
return -1;
}
}
}