blob: da70fd9b677478bc46e0b8a867f34a479bc04013 [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
import de.ids_mannheim.korap.query.SpanExpansionQuery;
/**
* Enumeration of spans expanded with minimum <code>m</code> and maximum
* <code>n</code> token positions to either left or right direction from the
* original spans. See examples in {@link SpanExpansionQuery}.
*
* The expansion offsets, namely the start and end position of an expansion
* part, can be stored in payloads. A class number is assigned to the offsets
* grouping them altogether.
*
* @author margaretha
* */
public class ExpandedSpans extends SimpleSpans {
private int min, max;
private byte classNumber;
private int direction;
private List<CandidateSpan> candidateSpans;
private long matchCost;
/**
* Constructs ExpandedSpans from the given {@link SpanExpansionQuery}.
*
* @param spanExpansionQuery a SpanExpansionQuery
* @param context
* @param acceptDocs
* @param termContexts
* @throws IOException
*/
public ExpandedSpans(SpanExpansionQuery spanExpansionQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
super(spanExpansionQuery, context, acceptDocs, termContexts);
this.min = spanExpansionQuery.getMin();
this.max = spanExpansionQuery.getMax();
this.direction = spanExpansionQuery.getDirection();
this.classNumber = spanExpansionQuery.getClassNumber();
candidateSpans = new ArrayList<CandidateSpan>();
hasMoreSpans = true;
}
@Override
public boolean next() throws IOException {
matchPayload.clear();
isStartEnumeration = false;
if (candidateSpans.size() == 0 && hasMoreSpans)
hasMoreSpans = firstSpans.next();
return advance();
}
/**
* Advances the ExpandedSpans to the next match by setting the first element
* in the candidateList as the match. Set the candidateList, if it is empty
*
* @return <code>true</code> if a match is found, <code>false</code>
* otherwise.
* @throws IOException
*/
private boolean advance() throws IOException {
while (candidateSpans.size() > 0 || hasMoreSpans) {
if (candidateSpans.size() > 0) {
setMatch(candidateSpans.get(0));
candidateSpans.remove(0);
return true;
} else {
setCandidateList();
}
}
return false;
}
/**
* Sets the candidateList by adding new candidate match spans for all
* possible expansion with respect to the expansion length (min,max)
* variables.
*
* @throws IOException
*/
private void setCandidateList() throws IOException {
CandidateSpan cs;
int counter, start, end;
if (direction < 0) {
counter = max;
while (counter >= min) {
start = Math.max(0, firstSpans.start() - counter);
cs = new CandidateSpan(start, firstSpans.end(),
firstSpans.doc(), firstSpans.cost(), createPayloads(
start, firstSpans.start()));
candidateSpans.add(cs);
counter--;
}
} else {
counter = min;
while (counter <= max) {
// TODO: How do I know if the end is already too far (over the end of the doc)?
end = firstSpans.end() + counter;
cs = new CandidateSpan(firstSpans.start(), end,
firstSpans.doc(), firstSpans.cost(), createPayloads(
firstSpans.end(), end));
candidateSpans.add(cs);
counter++;
}
}
}
/**
* Prepares the payloads for a candidate match (ExpandedSpans). If the class
* number is set, the extension offsets with the given start and end
* positions are to be stored in the payloads.
*
* @param start
* @param end
* @return the payloads for a candidaete match
* @throws IOException
*/
private ArrayList<byte[]> createPayloads(int start, int end)
throws IOException {
ArrayList<byte[]> payload = new ArrayList<byte[]>();
if (firstSpans.isPayloadAvailable()) {
payload.addAll(firstSpans.getPayload());
}
if (classNumber > 0) {
//System.out.println("Extension offsets "+start+","+end);
payload.add(createExtensionPayloads(start, end));
}
return payload;
}
/**
* Prepares a byte array of extension offsets with the given start and end
* positions and the class number, to be stored in payloads.
*
* @param start
* @param end
* @return a byte array of extension offsets and the class number
*/
private byte[] createExtensionPayloads(int start, int end) {
ByteBuffer buffer = ByteBuffer.allocate(9);
buffer.putInt(start);
buffer.putInt(end);
buffer.put(classNumber);
return buffer.array();
}
/**
* Sets the properties of the given candidate match span as the current
* match (state of ExpandedSpans).
*
* @param candidateSpan
*/
private void setMatch(CandidateSpan candidateSpan) {
matchDocNumber = candidateSpan.getDoc();
matchStartPosition = candidateSpan.getStart();
matchEndPosition = candidateSpan.getEnd();
matchPayload = candidateSpan.getPayloads();
matchCost = candidateSpan.getCost();
}
@Override
public boolean skipTo(int target) throws IOException {
if (hasMoreSpans && (firstSpans.doc() < target)) {
if (!firstSpans.skipTo(target)) {
hasMoreSpans = false;
return false;
}
}
matchPayload.clear();
return advance();
}
@Override
public long cost() {
return matchCost;
}
}