blob: e4a77d6d0b9f488b47d31561c8477146e3b6f3c2 [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import de.ids_mannheim.korap.query.SpanDistanceQuery;
/**
* Span enumeration of element-based distance span matches. Each match consists
* of two child spans. The element-distance between the child spans is the
* difference between the element position numbers where the child spans are.
* The element-distance unit can be a sentence or a paragraph. All other child
* spans' occurrences which are not in a sentence or a paragraph (with respect
* to the element distance type currently used), are ignored.
*
* Note: elements cannot overlap with each other.
*
* @author margaretha
* */
public class ElementDistanceSpans extends OrderedDistanceSpans {
private Spans elements;
private boolean hasMoreElements;
private int elementPosition;
private int secondSpanPostion;
/**
* Constructs ElementDistanceSpans based on the given SpanDistanceQuery.
*
* @param query a SpanDistanceQuery
* @param context
* @param acceptDocs
* @param termContexts
* @throws IOException
*/
public ElementDistanceSpans(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
super(query, context, acceptDocs, termContexts);
elements = query.getElementQuery().getSpans(context, acceptDocs,
termContexts);
hasMoreElements = elements.next();
hasMoreSpans = hasMoreFirstSpans && hasMoreElements;
elementPosition = 0;
}
@Override
protected boolean findMatch() throws IOException {
CandidateSpan candidateSpan = candidateList.get(candidateListIndex);
int actualDistance = secondSpanPostion - candidateSpan.getPosition();
// In the same element
if (minDistance == 0 && actualDistance == 0) {
setMatchProperties(candidateSpan, true);
return true;
}
if (minDistance <= actualDistance && actualDistance <= maxDistance) {
setMatchProperties(candidateSpan, false);
return true;
}
return false;
}
@Override
protected void setCandidateList() throws IOException {
if (candidateListDocNum == elements.doc()
&& candidateListDocNum == secondSpans.doc()) {
candidateListIndex = -1;
addNewCandidates();
} else {
candidateList.clear();
if (hasMoreFirstSpans
&& findSameDoc(firstSpans, secondSpans, elements)) {
candidateListDocNum = firstSpans.doc();
elementPosition = 0;
candidateListIndex = -1;
addNewCandidates();
}
}
}
/**
* Add new possible (candidate) firstspans. Candidate firstspans must be in
* an element and not too far from the secondspan.
*
* @throws IOException
*/
private void addNewCandidates() throws IOException {
while (hasMoreFirstSpans && firstSpans.doc() == candidateListDocNum
&& firstSpans.start() < secondSpans.end()) {
if (advanceElementTo(firstSpans)) {
candidateList
.add(new CandidateSpan(firstSpans, elementPosition));
filterCandidateList(elementPosition);
}
hasMoreFirstSpans = firstSpans.next();
}
}
/**
* Advance elements until encountering a span within the given document.
*
* @return true iff an element containing the span, is found.
*/
private boolean advanceElementTo(Spans span) throws IOException {
while (hasMoreElements && elements.doc() == candidateListDocNum
&& elements.start() < span.end()) {
if (span.start() >= elements.start()
&& span.end() <= elements.end()) {
return true;
}
hasMoreElements = elements.next();
elementPosition++;
}
return false;
}
/**
* Reduce the number of candidates by removing all candidates that are not
* within the max distance from the given element position.
*
* @param position an element position
*/
private void filterCandidateList(int position) {
Iterator<CandidateSpan> i = candidateList.iterator();
CandidateSpan cs;
while (i.hasNext()) {
cs = i.next();
if (cs.getPosition() == position
|| cs.getPosition() + maxDistance >= position) {
break;
}
i.remove();
}
// System.out.println("pos "+position+" " +candidateList.size());
}
@Override
protected boolean isSecondSpanValid() throws IOException {
if (advanceElementTo(secondSpans)) {
secondSpanPostion = elementPosition;
filterCandidateList(secondSpanPostion);
return true;
}
// second span is not in an element
return false;
}
@Override
public long cost() {
CandidateSpan candidateSpan = candidateList.get(candidateListIndex);
return elements.cost() + candidateSpan.getCost() + secondSpans.cost();
}
}