blob: 902a33b8389f6393c3d12283bfe445ae556374bf [file] [log] [blame]
package de.ids_mannheim.korap.query.spans;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.BytesRef;
import java.nio.ByteBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Collections;
import java.util.Collection;
import java.util.LinkedList;
import java.util.ArrayList;
import java.util.List;
// TODO: Store payloads in 12 byte instead of the complicated ByteBuffer stuff!
import de.ids_mannheim.korap.query.spans.KorapTermSpan;
public class ElementSpans extends Spans {
private byte[] payloadByte = new byte[4];
private ByteBuffer bb = ByteBuffer.allocate(4);
protected final DocsAndPositionsEnum postings;
protected final Term term;
private int doc, freq, count, position, end;
protected boolean readPayload;
private LinkedList<KorapTermSpan> memory;
private ByteBuffer storedPayload = ByteBuffer.allocate(128);
boolean hasStoredPayload = false;
private KorapTermSpan overflow, tempSpan;
private final static Logger log = LoggerFactory.getLogger(ElementSpans.class);
public ElementSpans(DocsAndPositionsEnum postings, Term term) {
this.postings = postings;
this.term = term;
this.doc = -1;
this.end = -1;
storedPayload.clear();
hasStoredPayload = false;
// storedPayload = null;
memory = new LinkedList<KorapTermSpan>();
overflow = new KorapTermSpan();
tempSpan = new KorapTermSpan();
};
// only for EmptyElementSpans (below)
ElementSpans() {
term = null;
postings = null;
};
@Override
public boolean next() throws IOException {
end = -1;
if (memory.size() > 0) {
log.trace("There is a memory entry");
_setToCurrent(memory.removeFirst());
log.trace("Current1: [{}-{}]", position, end);
return true;
};
log.trace("There is no memory entry");
if (count == freq) {
log.trace("last position in document");
// Check for overflow on document boundary
if (overflow.start != -1) {
log.trace(" but there is an overflow");
_setToCurrent(overflow).clear();
log.trace("Current2: [{}-{}]", position, end);
return true;
};
if (postings == null) {
log.trace("no more postings");
return false;
};
log.trace("Go to next doc");
doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
log.trace("no more docs");
return false;
};
// New doc!
end = -1;
storedPayload.clear();
hasStoredPayload = false;
freq = postings.freq();
count = 0;
};
int pos = overflow.start;
while (true) {
/*
if (DEBUG)
System.err.println(">> Reset end and payload");
storedPayload.clear();
end = -1;
*/
log.trace("pos is {}", pos);
_log_payloads(1);
if (count == freq) {
log.trace("last position in document");
if (postings == null) {
log.trace("no more postings");
// Check for overflow on document boundary
if (overflow.start != -1) {
log.trace(" but there is an overflow");
_setToCurrent(overflow).clear();
log.trace("Current3: [{}-{}]", position, end);
return true;
};
return false;
};
log.trace("go to next doc");
_log_payloads(2);
if (overflow.start != -1) {
log.trace("Storing overflow {} ...", overflow.toString());
log.trace("... in memory with {}-{}", overflow.startChar(), overflow.endChar());
memory.add((KorapTermSpan) overflow.clone());
overflow.clear();
};
_log_payloads(3);
if (memory.size() > 0) {
log.trace("sort and return first");
_log_payloads(4);
Collections.sort(memory);
_log_payloads(5);
_setToCurrent(memory.removeFirst());
_log_payloads(6);
log.trace("Current4: [{}-{}]]", position, end);
break;
};
doc = postings.nextDoc();
// New doc
end = -1;
pos = -1;
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
log.trace("no more docs");
return false;
};
freq = postings.freq();
count = 0;
};
log.trace("Forward postings");
position = postings.nextPosition();
// New pos!
end = -1;
_log_payloads(9);
log.trace("CLEAR PAYLOAD");
storedPayload.clear();
hasStoredPayload = false;
_log_payloads(10);
count++;
log.trace("next position is {}", position);
// There was no overflow
if (pos == -1 || pos == position) {
if (pos == position) {
log.trace("Add overflow to memory");
memory.add((KorapTermSpan) overflow.clone());
}
else {
log.trace("There was no overflow");
pos = position;
};
_log_payloads(8);
log.trace("*****************************");
_setCurrentTo(overflow);
log.trace("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
log.trace("Set overflow and continue: {} ...", overflow.toString());
log.trace("... with {}-{}", overflow.startChar(), overflow.endChar());
continue;
}
// overflow was older
else if (pos != position) {
log.trace("Overflow was older");
// Use memory
if (memory.size() > 0) {
log.trace("Add overflow to memory");
memory.add((KorapTermSpan) overflow.clone());
log.trace("Sort memory");
// Sort by end position
Collections.sort(memory);
// Store current information in overflow
_setCurrentTo(overflow);
log.trace("Set new overflow: {}", overflow.toString());
log.trace("Get first element from sorted memory");
_setToCurrent(memory.removeFirst());
}
// Nothing in memory - use overflow!
else {
log.trace("There is nothing in memory");
/* Make overflow active and store last position in overflow */
_setCurrentTo(tempSpan);
log.trace("Temp is now {}", overflow.toString());
_setToCurrent(overflow);
// Store current information in overflow
overflow.copyFrom(tempSpan);
log.trace("Overflow is now {}", overflow.toString());
};
break;
};
};
log.trace("Current4: [{}-{}]", position, end);
readPayload = false;
return true;
};
private KorapTermSpan _setToCurrent (KorapTermSpan act) {
if (act.payload != null)
act.payload.rewind();
log.trace("Set to current with {}, meaning {} - {}", act.toString(), act.payload.getInt(0), act.payload.getInt(4));
if (act.payload != null)
act.payload.rewind();
position = act.start;
end = act.end;
storedPayload.clear();
hasStoredPayload = false;
if (act.payload != null) {
log.trace("Payload is not null");
act.payload.rewind();
storedPayload.put(act.payload);
hasStoredPayload = true;
}
else {
log.trace("Payload is null");
};
return act;
};
private void _log_payloads (int nr) {
if (hasStoredPayload) {
log.trace(
"[{}] payload offsets are {}-{}",
nr,
storedPayload.getInt(0),
storedPayload.getInt(4)
);
}
else {
log.trace("[{}] payload is empty", nr);
};
};
private void _setCurrentTo () {
overflow.start = position;
overflow.end = this.end();
overflow.payload.clear();
if (hasStoredPayload) {
overflow.payload.put(storedPayload);
};
log.trace("Set current to Overflow {} with {}-{}", overflow.toString(), overflow.startChar(), overflow.endChar());
};
private void _setCurrentTo (KorapTermSpan o) {
_log_payloads(7);
o.start = position;
o.end = this.end();
o.payload.clear();
if (hasStoredPayload) {
storedPayload.rewind();
o.payload.put(storedPayload);
log.trace("Object now has offset {}-{}", o.payload.getInt(0), o.payload.getInt(4));
// Import:
o.payload.rewind();
};
log.trace("Set current to object {} ...", o.toString());
if (hasStoredPayload) {
log.trace("with {}-{} from {}-{}", o.startChar(), o.endChar(), storedPayload.getInt(0), storedPayload.getInt(4));
storedPayload.rewind();
};
};
@Override
public boolean skipTo(int target) throws IOException {
assert target > doc;
doc = postings.advance(target);
end = -1;
overflow.clear();
storedPayload.clear();
hasStoredPayload = false;
if (memory != null)
memory.clear();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
};
freq = postings.freq();
count = 0;
position = postings.nextPosition();
count++;
readPayload = false;
return true;
};
@Override
public int doc() {
return doc;
};
@Override
public int start() {
return position;
};
@Override
public int end() {
if (end >= 0)
return end;
try {
end = this.getPayloadEndPosition();
}
catch (Exception e) {
end = position;
};
return end;
};
@Override
public long cost() {
return postings.cost();
};
@Override
public Collection<byte[]> getPayload() throws IOException {
byte[] offsetCharacters = new byte[8];
if (storedPayload.position() <= 0)
this.getPayloadEndPosition();
if (hasStoredPayload) {
log.trace("storedPayload: {} - {}", storedPayload.getInt(0), storedPayload.getInt(4));
}
else {
log.trace("storedPayload is empty");
};
System.arraycopy(storedPayload.array(), 0, offsetCharacters, 0, 8);
// return Collections.singletonList(storedPayload.array());
return Collections.singletonList(offsetCharacters);
};
@Override
public boolean isPayloadAvailable() throws IOException {
return readPayload == false && postings.getPayload() != null;
};
@Override
public String toString() {
return "spans(" + term.toString() + ")@" +
(doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position);
};
public DocsAndPositionsEnum getPostings() {
return postings;
};
private int getPayloadEndPosition () {
log.trace("getPayloadEndPosition of element ...");
try {
BytesRef payload = postings.getPayload();
log.trace(" BytesRef: {}", payload.toString());
readPayload = true;
storedPayload.clear();
hasStoredPayload = false;
if (payload != null) {
log.trace("Do bit magic");
storedPayload.put(payload.bytes, payload.offset, 8);
storedPayload.put(payload.bytes, payload.offset + 12, payload.length - 12);
System.arraycopy(payload.bytes, payload.offset + 8, payloadByte, 0, 4);
hasStoredPayload = true;
log.trace("~~ Bytes: {}-{}-{}",
storedPayload.getInt(0),
storedPayload.getInt(4),
payloadByte);
}
else {
log.trace("There's no payload available");
payloadByte = null;
};
if (payloadByte != null) {
bb.clear();
int t = bb.wrap(payloadByte).getInt();
log.trace(" |-> {}", t);
return t;
};
}
catch (IOException e) {
log.trace("IOException {}", e);
};
return -1;
};
private static final class EmptyElementSpans extends ElementSpans {
@Override
public boolean next() { return false; };
@Override
public boolean skipTo(int target) { return false; };
@Override
public int doc() { return DocIdSetIterator.NO_MORE_DOCS; };
@Override
public int start() { return -1; };
@Override
public int end() { return -1; };
@Override
public Collection<byte[]> getPayload() { return null; };
@Override
public boolean isPayloadAvailable() { return false; };
@Override
public long cost() { return 0; };
};
public static final ElementSpans EMPTY_ELEMENT_SPANS = new EmptyElementSpans();
};