blob: 9651cba1a70c81a5471ada11958600a20cdd0d25 [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SpanNextQuery;
/** NextSpans is an enumeration of Span matches, which ensures that
* a span is immediately followed by another span.
*
* Update: allow multiple matches at the same firstspan position
*
* @author margaretha
* */
public class NextSpans extends SimpleSpans {
private List<CandidateSpan> matchList;
private List<CandidateSpan> candidateList;
private int candidateListDocNum;
private boolean hasMoreFirstSpan;
private Logger log = LoggerFactory.getLogger(NextSpans.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
public NextSpans (SpanNextQuery spanNextQuery,
AtomicReaderContext context,
Bits acceptDocs,
Map<Term,TermContext> termContexts) throws IOException {
super(spanNextQuery, context, acceptDocs, termContexts);
collectPayloads = spanNextQuery.isCollectPayloads();
hasMoreSpans = secondSpans.next();
matchList = new ArrayList<>();
candidateList = new ArrayList<>();
}
@Override
public boolean next() throws IOException {
isStartEnumeration=false;
matchPayload.clear();
return advance();
}
private boolean advance() throws IOException {
while (hasMoreSpans || !matchList.isEmpty() || !candidateList.isEmpty()){
if (!matchList.isEmpty()){
matchDocNumber = firstSpans.doc();
matchStartPosition = firstSpans.start();
matchEndPosition = matchList.get(0).getEnd();
if (collectPayloads)
matchPayload.addAll( matchList.get(0).getPayloads() );
if (DEBUG) {
log.trace("Match doc#: {}",matchDocNumber);
log.trace("Match positions: {}-{}", matchStartPosition,
matchEndPosition);
};
matchList.remove(0);
return true;
}
// Forward firstspan
hasMoreFirstSpan = firstSpans.next();
if (hasMoreFirstSpan) setMatchList();
else {
hasMoreSpans = false;
candidateList.clear(); }
}
return false;
}
private void setMatchList() throws IOException {
if (firstSpans.doc() == candidateListDocNum){
searchCandidates();
searchMatches();
}
else{
candidateList.clear();
if (hasMoreSpans && ensureSameDoc(firstSpans,secondSpans)){
candidateListDocNum = firstSpans.doc();
searchMatches();
}
}
}
private void searchCandidates() throws IOException {
Iterator<CandidateSpan> i = candidateList.iterator();
CandidateSpan cs;
while(i.hasNext()){
cs = i.next();
if (cs.getStart() == firstSpans.end()){
addMatch(cs);
}
else{
//System.out.println(cs.getStart() + " " +firstSpans.end());
i.remove();
}
}
}
private void searchMatches() throws IOException {
while (hasMoreSpans && candidateListDocNum == secondSpans.doc()){
if (secondSpans.start() > firstSpans.end()){
break;
}
if (secondSpans.start() == firstSpans.end()){
candidateList.add(new CandidateSpan(secondSpans));
addMatch(new CandidateSpan(secondSpans));
}
hasMoreSpans = secondSpans.next();
}
}
private void addMatch(CandidateSpan cs) throws IOException{
int start = firstSpans.start();
long cost = firstSpans.cost() + cs.getCost();
List<byte[]> payloads = new ArrayList<byte[]>();
if (collectPayloads) {
if (firstSpans.isPayloadAvailable())
payloads.addAll(firstSpans.getPayload());
if (cs.getPayloads() != null)
payloads.addAll(cs.getPayloads());
}
matchList.add(new CandidateSpan(start,cs.getEnd(),candidateListDocNum,cost,
payloads));
}
@Override
public boolean skipTo(int target) throws IOException {
if (hasMoreSpans && (firstSpans.doc() < target)){
if (!firstSpans.skipTo(target)){
hasMoreSpans = false;
return false;
}
}
matchPayload.clear();
return advance();
}
@Override
public long cost() {
return firstSpans.cost() + secondSpans.cost();
}
};