blob: a45e1ee98ba10b6a5c32865dcccc4a34222f1c67 [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.Map;
import java.util.ArrayList;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.ByteBuffer;
/**
* Modify matches to, for example, return only certain class or span ranges.
*
* @author diewald
*/
public class MatchModifyClassSpans extends Spans {
private List<byte[]> wrappedPayload;
private Collection<byte[]> payload;
private final Spans spans;
private byte number;
private boolean divide;
private ByteBuffer bb;
private SpanQuery wrapQuery;
private final Logger log = LoggerFactory.getLogger(MatchModifyClassSpans.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
private int start = -1, end;
private int tempStart = 0, tempEnd = 0;
public MatchModifyClassSpans (
SpanQuery wrapQuery,
AtomicReaderContext context,
Bits acceptDocs,
Map<Term,TermContext> termContexts,
byte number,
boolean divide) throws IOException {
this.spans = wrapQuery.getSpans(context, acceptDocs, termContexts);
this.number = number;
this.divide = divide;
this.wrapQuery = wrapQuery;
this.bb = ByteBuffer.allocate(9);
this.wrappedPayload = new ArrayList<byte[]>(6);
};
@Override
public Collection<byte[]> getPayload() throws IOException {
return wrappedPayload;
};
@Override
public boolean isPayloadAvailable() {
return wrappedPayload.isEmpty() == false;
};
public int doc() { return spans.doc(); }
// inherit javadocs
@Override
public int start() { return start; }
// inherit javadocs
@Override
public int end() { return end; }
// inherit javadocs
@Override
public boolean next() throws IOException {
/* TODO:
* In case of a split() (instead of a submatch())
* Is the cache empty?
* Otherwise: Next from list
*/
if (DEBUG)
log.trace("Forward next match");
// Next span
while (spans.next()) {
if (DEBUG)
log.trace("Forward next inner span");
// No classes stored
wrappedPayload.clear();
start = -1;
if (spans.isPayloadAvailable()) {
end = 0;
// Iterate over all payloads and find the maximum span per class
for (byte[] payload : spans.getPayload()) {
bb.clear();
bb.put(payload);
bb.position(8);
// Todo: Implement Divide
// Found class payload of structure <i>start<i>end<b>class
if (payload.length == 9 && bb.get() == this.number) {
bb.rewind();
tempStart = bb.getInt();
tempEnd = bb.getInt();
if (DEBUG)
log.trace("Found matching class {}-{}", tempStart, tempEnd);
// Set start position
if (start == -1)
start = tempStart;
else if (tempStart < start)
start = tempStart;
// Set end position
if (tempEnd > end)
end = tempEnd;
}
// No class payload - but keep!
else {
if (DEBUG)
log.trace("Remember old payload {}", payload);
wrappedPayload.add(payload);
};
};
};
// Class not found
if (start == -1)
continue;
if (DEBUG)
log.trace(
"Start to focus on class {} from {} to {}",
number,
start,
end
);
return true;
};
// No more spans
return false;
};
// inherit javadocs
@Override
public boolean skipTo (int target) throws IOException {
return spans.skipTo(target);
};
@Override
public String toString () {
return getClass().getName() + "(" + this.wrapQuery.toString() + ")@" +
(doc() + ":" + start() + "-" + end());
};
@Override
public long cost () {
return spans.cost();
};
};