blob: 3d9fd0a85f5fe3297d8c8d078954acc6ec0063bc [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import de.ids_mannheim.korap.query.SpanDistanceQuery;
/** Enumeration of span matches, whose two child spans have a specific range
* of distance (within a min and a max distance) and can be in any order.
*
* @author margaretha
* */
public abstract class UnorderedDistanceSpans extends DistanceSpans{
protected int minDistance, maxDistance;
protected boolean hasMoreFirstSpans, hasMoreSecondSpans;
protected List<CandidateSpan> firstSpanList, secondSpanList;
protected List<CandidateSpan> matchList;
private long matchCost;
private int matchListSpanNum;
protected int currentDocNum;
public UnorderedDistanceSpans(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
super(query, context, acceptDocs, termContexts);
minDistance = query.getMinDistance();
maxDistance = query.getMaxDistance();
firstSpanList = new ArrayList<CandidateSpan>();
secondSpanList = new ArrayList<CandidateSpan>();
matchList = new ArrayList<CandidateSpan>();
hasMoreFirstSpans = firstSpans.next();
hasMoreSecondSpans = secondSpans.next();
hasMoreSpans = hasMoreFirstSpans && hasMoreSecondSpans;
}
@Override
protected boolean advance() throws IOException {
while (hasMoreSpans || !matchList.isEmpty()){
if (!matchList.isEmpty()){
setMatchProperties();
return true;
}
if (prepareLists()) setMatchList();
}
return false;
}
/** Add the next possible first and second spans. Both the spans must be in the
* same document. In UnorderedElementDistanceSpans, a span that is not in
* an element, is not added to its candidate list. The element must also be in
* the same document.
*
* @return true iff at least one of the candidate lists can be filled.
* */
protected abstract boolean prepareLists() throws IOException;
/** Set the list of matches between the span having the smallest position, and
* its candidates. Simply remove the span if it does not have any candidates.
* */
protected void setMatchList() throws IOException {
hasMoreFirstSpans = setCandidateList(firstSpanList,firstSpans,
hasMoreFirstSpans,secondSpanList);
hasMoreSecondSpans = setCandidateList(secondSpanList,secondSpans,
hasMoreSecondSpans,firstSpanList);
// System.out.println("--------------------");
// System.out.println("firstSpanList:");
// for (CandidateSpan cs: firstSpanList) {
// System.out.println(cs.getStart() +" "+ cs.getEnd());
// }
//
// System.out.println("secondSpanList:");
// for (CandidateSpan cs: secondSpanList) {
// System.out.println(cs.getStart() +" "+ cs.getEnd());
// }
CandidateSpan currentFirstSpan, currentSecondSpan;
if (!firstSpanList.isEmpty() && !secondSpanList.isEmpty()){
currentFirstSpan = firstSpanList.get(0) ;
currentSecondSpan = secondSpanList.get(0);
if (currentFirstSpan.getEnd() < currentSecondSpan.getEnd() ||
isLastCandidateSmaller(currentFirstSpan, currentSecondSpan)){
// System.out.println("current target: "+firstSpanList.get(0).getStart() +" "+firstSpanList.get(0).getEnd());
// System.out.println("candidates:");
// for (CandidateSpan cs: secondSpanList) {
// System.out.println(cs.getStart() +" "+ cs.getEnd());
// }
matchList = findMatches(currentFirstSpan, secondSpanList);
setMatchFirstSpan(currentFirstSpan);
matchListSpanNum = 2;
updateList(firstSpanList);
}
else {
// System.out.println("current target: "+secondSpanList.get(0).getStart() +" "+secondSpanList.get(0).getEnd());
// System.out.println("candidates:");
// for (CandidateSpan cs: firstSpanList) {
// System.out.println(cs.getStart() +" "+ cs.getEnd());
// }
matchList = findMatches(currentSecondSpan, firstSpanList);
setMatchSecondSpan(currentSecondSpan);
matchListSpanNum = 1;
updateList(secondSpanList);
}
}
else if (firstSpanList.isEmpty()){
// System.out.println("current target: "+secondSpanList.get(0).getStart() +" "+secondSpanList.get(0).getEnd());
// System.out.println("candidates: empty");
updateList(secondSpanList);
}
else{
// System.out.println("current target: "+firstSpanList.get(0).getStart() +" "+firstSpanList.get(0).getEnd());
// System.out.println("candidates: empty");
updateList(firstSpanList);
}
}
private boolean isLastCandidateSmaller(CandidateSpan currentFirstSpan, CandidateSpan
currentSecondSpan){
if (currentFirstSpan.getEnd() == currentSecondSpan.getEnd() ){
int secondEnd = secondSpanList.get(secondSpanList.size()-1).getEnd();
int firstEnd = firstSpanList.get(firstSpanList.size()-1).getEnd();
return (secondEnd < firstEnd ? true : false);
}
return false;
}
protected abstract void updateList(List<CandidateSpan> candidateList);
/** Set the candidate list for the first element in the target list.
* @return true iff the spans enumeration still has a next element
* to be a candidate
*/
protected abstract boolean setCandidateList(List<CandidateSpan>
candidateList, Spans candidate, boolean hasMoreCandidates,
List<CandidateSpan> targetList) throws IOException;
/** Search all matches between the target span and its candidates in the candidate
* list.
* @return the matches in a list
* */
protected abstract List<CandidateSpan> findMatches(CandidateSpan target,
List<CandidateSpan> candidateList);
/** Compute match properties and create a candidate span match
* to be added to the match list.
* @return a candidate span match
* */
protected CandidateSpan createMatchCandidate(CandidateSpan target,
CandidateSpan cs, boolean isDistanceZero) {
int start = Math.min(target.getStart(), cs.getStart());
int end = Math.max(target.getEnd(),cs.getEnd());
int doc = target.getDoc();
long cost = target.getCost() + cs.getCost();
Collection<byte[]> payloads = new LinkedList<byte[]>();
if (collectPayloads) {
if (target.getPayloads() != null){
payloads.addAll(target.getPayloads());
}
if (cs.getPayloads() != null){
payloads.addAll(cs.getPayloads());
}
}
CandidateSpan match = new CandidateSpan(start,end,doc,cost,payloads);
match.setChildSpan(cs);
return match;
}
/** Assign the first candidate span in the match list as the current span match.
* */
private void setMatchProperties() {
CandidateSpan cs = matchList.get(0);
matchDocNumber = cs.getDoc();
matchStartPosition = cs.getStart();
matchEndPosition = cs.getEnd();
matchCost = cs.getCost();
matchPayload.addAll(cs.getPayloads());
matchList.remove(0);
if (matchListSpanNum == 1)
setMatchFirstSpan(cs.getChildSpan());
else setMatchSecondSpan(cs.getChildSpan());
log.trace("Match doc#={} start={} end={}", matchDocNumber,
matchStartPosition,matchEndPosition);
//System.out.println("firstspan "+getMatchFirstSpan().getStart()+" "+ getMatchFirstSpan().getEnd());
//System.out.println("secondspan "+getMatchSecondSpan().getStart()+" "+ getMatchSecondSpan().getEnd());
}
@Override
public boolean skipTo(int target) throws IOException {
if (hasMoreSpans && (secondSpans.doc() < target)){
if (!secondSpans.skipTo(target)){
hasMoreSpans = false;
return false;
}
}
firstSpanList.clear();
secondSpanList.clear();
matchPayload.clear();
isStartEnumeration=false;
return advance();
}
@Override
public long cost() {
return matchCost;
}
}