| package de.ids_mannheim.korap.query.spans; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermContext; |
| import org.apache.lucene.util.Bits; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import de.ids_mannheim.korap.query.SpanNextQuery; |
| |
| /** |
| * NextSpans is an enumeration of Span matches, which ensures that a |
| * span is |
| * immediately followed by another span. |
| * |
| * The implementation allows multiple matches at the same firstspan |
| * position. |
| * |
| * @author margaretha |
| * @author diewald |
| */ |
| public class NextSpans extends SimpleSpans { |
| |
| private List<CandidateSpan> matchList; |
| private List<CandidateSpan> candidateList; |
| private int candidateListDocNum; |
| private boolean hasMoreFirstSpan; |
| |
| private Logger log = LoggerFactory.getLogger(NextSpans.class); |
| |
| |
| /** |
| * Constructs NextSpans for the given {@link SpanNextQuery}. |
| * |
| * @param spanNextQuery |
| * a SpanNextQuery |
| * @param context |
| * @param acceptDocs |
| * @param termContexts |
| * @throws IOException |
| */ |
| public NextSpans (SpanNextQuery spanNextQuery, LeafReaderContext context, |
| Bits acceptDocs, Map<Term, TermContext> termContexts) |
| throws IOException { |
| super(spanNextQuery, context, acceptDocs, termContexts); |
| collectPayloads = spanNextQuery.isCollectPayloads(); |
| hasMoreSpans = secondSpans.next(); |
| matchList = new ArrayList<>(); |
| candidateList = new ArrayList<>(); |
| } |
| |
| |
| @Override |
| public boolean next () throws IOException { |
| isStartEnumeration = false; |
| matchPayload.clear(); |
| return advance(); |
| } |
| |
| |
| /** |
| * Advances the NextSpans to the next match by checking the |
| * matchList or |
| * setting the matchlist first, if it is empty. |
| * |
| * @return <code>true</code> if a match is found, |
| * <code>false</code> |
| * otherwise. |
| * @throws IOException |
| */ |
| private boolean advance () throws IOException { |
| |
| while (hasMoreSpans || !matchList.isEmpty() || !candidateList.isEmpty()) { |
| if (!matchList.isEmpty()) { |
| matchDocNumber = firstSpans.doc(); |
| matchStartPosition = firstSpans.start(); |
| matchEndPosition = matchList.get(0).getEnd(); |
| spanId = matchList.get(0).getSpanId(); |
| if (collectPayloads) |
| matchPayload.addAll(matchList.get(0).getPayloads()); |
| matchList.remove(0); |
| return true; |
| } |
| // Forward firstspan |
| hasMoreFirstSpan = firstSpans.next(); |
| if (hasMoreFirstSpan) |
| setMatchList(); |
| else { |
| hasMoreSpans = false; |
| candidateList.clear(); |
| } |
| } |
| return false; |
| } |
| |
| |
| /** |
| * Sets the matchlist by first searching the candidates and then |
| * find all |
| * the matches. |
| * |
| * @throws IOException |
| */ |
| private void setMatchList () throws IOException { |
| if (firstSpans.doc() == candidateListDocNum) { |
| searchCandidates(); |
| searchMatches(); |
| } |
| else { |
| candidateList.clear(); |
| if (hasMoreSpans && ensureSameDoc(firstSpans, secondSpans)) { |
| candidateListDocNum = firstSpans.doc(); |
| searchMatches(); |
| } |
| } |
| } |
| |
| |
| /** |
| * Removes all second span candidates whose start position is not |
| * the same |
| * as the firstspan's end position, otherwise creates a match and |
| * add it to |
| * the matchlist. |
| * |
| * @throws IOException |
| */ |
| private void searchCandidates () throws IOException { |
| Iterator<CandidateSpan> i = candidateList.iterator(); |
| CandidateSpan cs; |
| while (i.hasNext()) { |
| cs = i.next(); |
| if (cs.getStart() == firstSpans.end()) { |
| addMatch(cs); |
| } |
| else { |
| i.remove(); |
| } |
| } |
| } |
| |
| |
| /** |
| * Finds all secondspans whose start position is the same as the |
| * end |
| * position of the firstspans, until the secondspans' start |
| * position is |
| * bigger than the firstspans' end position. Adds those |
| * secondspans to the |
| * candidateList and creates matches. |
| * |
| * @throws IOException |
| */ |
| private void searchMatches () throws IOException { |
| |
| while (hasMoreSpans && candidateListDocNum == secondSpans.doc()) { |
| if (secondSpans.start() > firstSpans.end()) { |
| break; |
| } |
| if (secondSpans.start() == firstSpans.end()) { |
| candidateList.add(new CandidateSpan(secondSpans)); |
| addMatch(new CandidateSpan(secondSpans)); |
| } |
| hasMoreSpans = secondSpans.next(); |
| } |
| } |
| |
| |
| /** |
| * Creates a match from the given CandidateSpan representing a |
| * secondspan |
| * state whose start position is identical to the end position of |
| * the |
| * current firstspan, and adds it to the matchlist. |
| * |
| * @param cs |
| * a CandidateSpan |
| * @throws IOException |
| */ |
| private void addMatch (CandidateSpan cs) throws IOException { |
| |
| int start = firstSpans.start(); |
| long cost = firstSpans.cost() + cs.getCost(); |
| |
| List<byte[]> payloads = new ArrayList<byte[]>(); |
| if (collectPayloads) { |
| if (firstSpans.isPayloadAvailable()) |
| payloads.addAll(firstSpans.getPayload()); |
| if (cs.getPayloads() != null) |
| payloads.addAll(cs.getPayloads()); |
| } |
| |
| matchList.add(new CandidateSpan(start, cs.getEnd(), |
| candidateListDocNum, cost, payloads)); |
| } |
| |
| |
| @Override |
| public boolean skipTo (int target) throws IOException { |
| if (hasMoreSpans && (firstSpans.doc() < target)) { |
| if (!firstSpans.skipTo(target)) { |
| hasMoreSpans = false; |
| return false; |
| } |
| } |
| matchPayload.clear(); |
| return advance(); |
| } |
| |
| |
| @Override |
| public long cost () { |
| return firstSpans.cost() + secondSpans.cost(); |
| } |
| }; |