blob: b8fc20f4464863c71485101f9ef428469ea70f78 [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import de.ids_mannheim.korap.query.SpanDistanceQuery;
/** Span enumeration of the first spans which do NOT occur together
* with the second spans within a range of some element-based distance
* (sentence or paragraph). Note: The element distance unit does not
* overlap to each other.
*
* @author margaretha
* */
public class ElementDistanceExclusionSpan extends DistanceSpans{
private Spans elements;
private boolean hasMoreElements;
private int elementPosition;
private boolean isOrdered;
private boolean hasMoreSecondSpans;
protected List<CandidateSpan> candidateList, targetList;
private int currentDocNum;
private int minDistance, maxDistance;
private int firstSpanPostion;
public static final boolean DEBUG = false;
public ElementDistanceExclusionSpan(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts, boolean isOrdered)
throws IOException {
super(query, context, acceptDocs, termContexts);
elements = query.getElementQuery().
getSpans(context, acceptDocs, termContexts);
hasMoreElements = elements.next();
hasMoreSpans = firstSpans.next() && hasMoreElements;
hasMoreSecondSpans = secondSpans.next();
elementPosition=0;
this.isOrdered = isOrdered;
candidateList = new ArrayList<CandidateSpan>();
targetList = new ArrayList<CandidateSpan>();
currentDocNum = firstSpans.doc();
minDistance = query.getMinDistance();
maxDistance = query.getMaxDistance();
}
@Override
protected boolean advance() throws IOException {
while(!targetList.isEmpty() || (hasMoreSpans && ensureSameDoc(firstSpans, elements))){
if (!targetList.isEmpty()){
if (isTargetValid()) return true;
else continue;
}
if (checkFirstSpan()) return true;
}
return false;
}
private boolean isTargetValid() throws IOException{
CandidateSpan target = targetList.get(0);
targetList.remove(0);
firstSpanPostion = target.getPosition();
filterCandidateList(firstSpanPostion);
collectRightCandidates();
if (isWithinDistance()){
return false;
}
setMatchProperties(target);
return true;
}
private boolean checkFirstSpan() throws IOException{
if (firstSpans.doc() != currentDocNum){
currentDocNum = firstSpans.doc();
candidateList.clear();
}
if (hasMoreSecondSpans) {
if (secondSpans.doc() == firstSpans.doc()){
return (findMatch() ? true : false);
}
else if (secondSpans.doc() < firstSpans.doc()){
hasMoreSecondSpans = secondSpans.skipTo(firstSpans.doc());
return false;
}
}
return (isFirstSpanValid() ? true : false);
}
private boolean isFirstSpanValid() throws IOException{
if (candidateList.isEmpty()){
if (isFirstSpanInElement()){
setMatchProperties(new CandidateSpan(firstSpans,elementPosition));
hasMoreSpans = firstSpans.next();
return true;
}
hasMoreSpans = firstSpans.next();
return false;
}
return (findMatch() ? true : false);
}
private boolean advanceElementTo(Spans span) throws IOException{
while (hasMoreElements &&
elements.doc() == currentDocNum &&
elements.start() < span.end()){
if (span.start() >= elements.start() &&
span.end() <= elements.end()){
return true;
}
hasMoreElements = elements.next();
elementPosition++;
}
return false;
}
private boolean findMatch() throws IOException {
if (!isOrdered) collectLeftCandidates();
if (isFirstSpanInElement()){
CandidateSpan target = new CandidateSpan(firstSpans,elementPosition);
hasMoreSpans = firstSpans.next();
// Checking the secondspans in the left side
if (!isOrdered && isWithinDistance()) return false;
// Checking the secondspans in the right side
collectRightCandidates();
if (isWithinDistance()) return false;
setMatchProperties(target);
return true;
}
hasMoreSpans = firstSpans.next();
return false;
}
private void collectRightCandidates() throws IOException{
while (hasMoreSecondSpans && secondSpans.doc() == currentDocNum){
if (elementPosition > firstSpanPostion+maxDistance){
break;
}
if (hasMoreSpans && firstSpans.start() < secondSpans.start() &&
firstSpans.doc() == currentDocNum){
if (advanceElementTo(firstSpans)){
targetList.add(new CandidateSpan(firstSpans, elementPosition));
}
hasMoreSpans = firstSpans.next();
continue;
}
if (advanceElementTo(secondSpans)){
candidateList.add(new CandidateSpan(secondSpans,elementPosition));
}
hasMoreSecondSpans = secondSpans.next();
}
}
private void collectLeftCandidates() throws IOException{
while(hasMoreSecondSpans && secondSpans.doc() == firstSpans.doc() &&
secondSpans.start() < firstSpans.end()){
if (advanceElementTo(secondSpans)){
candidateList.add(new CandidateSpan(secondSpans,elementPosition));
filterCandidateList(elementPosition);
}
hasMoreSecondSpans = secondSpans.next();
}
}
private boolean isWithinDistance(){
int actualDistance;
for (CandidateSpan cs: candidateList){
actualDistance = cs.getPosition() - firstSpanPostion;
if (!isOrdered) actualDistance = Math.abs(actualDistance);
if (minDistance <= actualDistance && actualDistance <= maxDistance)
return true;
}
return false;
}
private boolean isFirstSpanInElement() throws IOException {
if (advanceElementTo(firstSpans)){
firstSpanPostion = elementPosition;
filterCandidateList(firstSpanPostion);
return true;
}
return false;
}
private void filterCandidateList(int position){
Iterator<CandidateSpan> i = candidateList.iterator();
CandidateSpan cs;
while(i.hasNext()){
cs = i.next();
if (cs.getPosition() == position ||
cs.getPosition()+maxDistance >= position){
break;
}
i.remove();
}
}
private void setMatchProperties(CandidateSpan match) throws IOException{
matchDocNumber = match.getDoc();
matchStartPosition = match.getStart();
matchEndPosition = match.getEnd();
if (collectPayloads && match.getPayloads() != null)
matchPayload.addAll(match.getPayloads());
setMatchFirstSpan(match);
if (DEBUG)
log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
matchEndPosition);
}
@Override
public boolean skipTo(int target) throws IOException {
if (hasMoreSpans && firstSpans.doc() < target){
if (!firstSpans.skipTo(target)){
hasMoreSpans = false;
return false;
}
}
return advance();
}
@Override
public long cost() {
return elements.cost() + firstSpans.cost() + secondSpans.cost();
}
}