blob: 43163c85168f1deaf820eb9f1f94f2ec93d35612 [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SpanElementQuery;
/**
* @author Nils Diewald, margaretha
*
* Use copyFrom instead of clone
*/
public class ElementSpans extends SpansWithId {
private List<CandidateElementSpans> candidateList;
private int currentDoc, currentPosition;
private TermSpans termSpans;
private Logger logger = LoggerFactory.getLogger(ElementSpans.class);
public ElementSpans(SpanElementQuery spanElementQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
super(spanElementQuery, context, acceptDocs, termContexts);
candidateList = new ArrayList<>();
termSpans = (TermSpans) firstSpans;
hasMoreSpans = termSpans.next();
if (hasMoreSpans) {
currentDoc = termSpans.doc();
currentPosition = termSpans.start();
}
}
@Override
public boolean next() throws IOException {
isStartEnumeration=false;
return advance();
}
/** Get the next match by first checking the candidate match list
* and setting the list when it is empty.
* */
private boolean advance() throws IOException {
while(hasMoreSpans || !candidateList.isEmpty()){
if (!candidateList.isEmpty()){
CandidateElementSpans cs = candidateList.get(0);
this.matchDocNumber = cs.getDoc();
this.matchStartPosition = cs.getStart();
this.matchEndPosition = cs.getEnd();
this.matchPayload = cs.getPayloads();
//this.setElementRef(cs.getSpanId());
this.setSpanId(cs.getSpanId());
candidateList.remove(0);
return true;
}
else{
//logger.info("Setting candidate list");
setCandidateList();
currentDoc = termSpans.doc();
currentPosition = termSpans.start();
}
}
return false;
}
/** Collect all the elements in the same start position and sort them by
* end position (smallest first).
* */
private void setCandidateList() throws IOException {
while (hasMoreSpans && termSpans.doc() == currentDoc &&
termSpans.start() == currentPosition){
CandidateElementSpans cs = new CandidateElementSpans(termSpans,
spanId);
//elementRef);
readPayload(cs);
candidateList.add(cs);
hasMoreSpans = termSpans.next();
}
Collections.sort(candidateList);
}
/** This method reads the payload of the termSpan and assigns the end
* position and element ref to the candidate match. The character offset
* payload is set as the candidate match payload.
* <br/><br/>
* <em>Note</em>: payloadbuffer should actually collects all other payload
* beside end position and element ref, but KorapIndex identify element's
* payload by its length (8), which is only the character offsets. So
* these offsets are directly set as the candidate match payload.
*
* @author margaretha
* */
private void readPayload(CandidateElementSpans cs) throws IOException {
List<byte[]> payload = (List<byte[]>) termSpans.getPayload();
int length = payload.get(0).length;
ByteBuffer bb = ByteBuffer.allocate(length);
bb.put(payload.get(0));
if (!payload.isEmpty()) {
// set element end position from payload
cs.setEnd(bb.getInt(8));
if (hasSpanId){ // copy element id
cs.setSpanId(bb.getShort(12));
}
else{ // set element id -1
cs.setSpanId((short) -1);
}
// Copy the start and end character offsets
byte[] b = new byte[8];
b = Arrays.copyOfRange(bb.array(), 0, 8);
cs.setPayloads(Collections.singletonList(b));
}
else {
cs.setEnd(cs.getStart());
cs.setSpanId((short) -1);
cs.setPayloads(null);
}
}
@Override
public boolean skipTo(int target) throws IOException {
if (hasMoreSpans && (firstSpans.doc() < target)){
if (!firstSpans.skipTo(target)){
candidateList.clear();
return false;
}
}
setCandidateList();
matchPayload.clear();
isStartEnumeration=false;
return advance();
}
@Override
public long cost() {
return termSpans.cost();
}
/** Match candidate for element spans.
* */
class CandidateElementSpans extends CandidateSpans {
private short elementRef;
public CandidateElementSpans(Spans span, short elementRef)
throws IOException {
super(span);
setSpanId(elementRef);
}
public void setSpanId(short elementRef) {
this.elementRef = elementRef;
}
public short getSpanId() {
return elementRef;
}
}
};