Bugfixing in ElementSpans and KorapMatch
diff --git a/src/main/java/de/ids_mannheim/korap/KorapMatch.java b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
index 3467b53..dc28449 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapMatch.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapMatch.java
@@ -42,7 +42,7 @@
private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = true;
+ public static final boolean DEBUG = false;
// Mapper for JSON serialization
ObjectMapper mapper = new ObjectMapper();
@@ -841,13 +841,23 @@
HighlightCombinatorElement lastComb;
this.tempStack.clear();
- StringBuilder sb = new StringBuilder("Stack for checking with ");
- sb.append(number).append(" is ");
- for (int s : this.balanceStack) {
- sb.append('[').append(s).append(']');
+ // Shouldn't happen
+ if (this.balanceStack.size() == 0) {
+ if (DEBUG)
+ log.trace("The balance stack is empty");
+ return;
};
- if (DEBUG)
+
+ if (DEBUG) {
+ StringBuilder sb = new StringBuilder(
+ "Stack for checking with class "
+ );
+ sb.append(number).append(" is ");
+ for (int s : this.balanceStack) {
+ sb.append('[').append(s).append(']');
+ };
log.trace(sb.toString());
+ };
// class number of the last element
int eold = this.balanceStack.removeLast();
@@ -1091,6 +1101,11 @@
if (openList.isEmpty()) {
stack.addAll(closeList);
break;
+ }
+
+ // Not sure about this, but it can happen
+ else if (closeList.isEmpty()) {
+ break;
};
if (openList.peekFirst()[0] < closeList.peekFirst()[1]) {
@@ -1129,6 +1144,9 @@
// Match position
startPosChar = this.positionsToOffset.start(ldid, this.startPos);
+ if (DEBUG)
+ log.trace("Unaltered startPosChar is {}", startPosChar);
+
// Check potential differing start characters
// e.g. from element spans
if (potentialStartPosChar != -1 &&
@@ -1136,27 +1154,22 @@
startPosChar = potentialStartPosChar;
endPosChar = this.positionsToOffset.end(ldid, this.endPos - 1);
-
+
if (DEBUG)
- log.trace("Match offset is pos {}-{} (chars {}-{})",
+ log.trace("Unaltered endPosChar is {}", endPosChar);
+
+ // Potential end characters may come from spans with
+ // defined character offsets like sentences including .", ... etc.
+ if (endPosChar < potentialEndPosChar)
+ endPosChar = potentialEndPosChar;
+
+ if (DEBUG)
+ log.trace("Refined: Match offset is pos {}-{} (chars {}-{})",
this.startPos,
this.endPos,
startPosChar,
endPosChar);
- // Potential end characters may come from spans with
- // defined character offsets like sentences including .", ... etc.
- if (endPosChar < potentialEndPosChar) {
- endPosChar = potentialEndPosChar;
-
- if (DEBUG)
- log.trace("Refined: Match offset is pos {}-{} (chars {}-{})",
- this.startPos,
- this.endPos,
- startPosChar,
- endPosChar);
- };
-
// left context
if (leftTokenContext) {
if (DEBUG)
@@ -1222,6 +1235,7 @@
}
else {
this.tempSnippet = this.getPrimaryData(startOffsetChar);
+ // endPosChar = this.tempSnippet.length() - 1 + startOffsetChar;
endMore = false;
};
@@ -1241,12 +1255,23 @@
-1,
0};
+ if (DEBUG)
+ log.trace("The match entry is {}-{} ({}-{}) with startOffsetChar {}",
+ startPosChar - startOffsetChar,
+ endPosChar - startOffsetChar,
+ startPosChar,
+ endPosChar,
+ startOffsetChar);
+
// Add match span
this.span.add(intArray);
// highlights
// -- I'm not sure about this.
if (this.highlight != null) {
+ if (DEBUG)
+ log.trace("There are highlights!");
+
for (Highlight highlight : this.highlight) {
int start = this.positionsToOffset.start(
ldid, highlight.start
diff --git a/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java b/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java
index cab5410..eae79c1 100644
--- a/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java
+++ b/src/main/java/de/ids_mannheim/korap/index/PositionsToOffset.java
@@ -76,6 +76,9 @@
public void add (PositionsToOffsetArray ptoa) {
if (DEBUG)
log.trace("Add positionsToOffsetArray {}/{}", ptoa.docID, ptoa.pos);
+ if (ptoa.pos < 0)
+ return;
+
if (this.processed && this.exists(ptoa))
return;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index d32c520..9f39290 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -21,27 +21,27 @@
import java.util.List;
// TODO: Store payloads in 12 byte instead of the complicated ByteBuffer stuff!
+// Todo: Use copyFrom() instead of clone()
/**
* @author Nils Diewald
*
- * TODO: Support lazy loading for .end()
+ * Use copyFrom instead of clone
*/
public class ElementSpans extends Spans {
- private byte[] payloadByte = new byte[4];
+ private byte[] payloadByte;
private ByteBuffer bb = ByteBuffer.allocate(4);
protected final DocsAndPositionsEnum postings;
protected final Term term;
- private int doc, freq, count, position, end;
- protected boolean readPayload;
-
+ private int freq = 0, count = 0;
+
private LinkedList<KorapTermSpan> memory;
- private ByteBuffer storedPayload = ByteBuffer.allocate(128);
- boolean hasStoredPayload = false;
-
- private KorapTermSpan overflow, tempSpan;
+ private KorapTermSpan overflow, current, temp;
+
+ public static final ElementSpans EMPTY_ELEMENT_SPANS
+ = new EmptyElementSpans();
private final static Logger log = LoggerFactory.getLogger(ElementSpans.class);
// This advices the java compiler to ignore all loggings
@@ -54,523 +54,379 @@
public ElementSpans(DocsAndPositionsEnum postings, Term term) {
this.postings = postings;
this.term = term;
- this.doc = -1;
- this.end = -1;
- storedPayload.clear();
- hasStoredPayload = false;
+
// storedPayload = null;
- memory = new LinkedList<KorapTermSpan>();
- overflow = new KorapTermSpan();
- tempSpan = new KorapTermSpan();
+ this.memory = new LinkedList<KorapTermSpan>();
+
+ // Overflow span
+ this.overflow = new KorapTermSpan();
+
+ // Current span
+ this.current = new KorapTermSpan();
+
+ // Temporary span
+ this.temp = new KorapTermSpan();
};
+
// only for EmptyElementSpans (below)
public ElementSpans() {
- term = null;
- postings = null;
+ this.term = null;
+ this.postings = null;
};
@Override
public boolean next() throws IOException {
- end = -1;
-
- if (memory.size() > 0) {
- if (DEBUG)
- log.trace("There is a memory entry");
-
- _setToCurrent(memory.removeFirst());
+
+ // There is a memory
+ if (this.memory.size() > 0) {
+ this.setToCurrent(memory.removeFirst(), 1);
if (DEBUG)
- log.trace("Current1: [{}-{}]", position, end);
-
+ log.trace(" --- MATCH --- Fetch from memory {}",
+ this.current.toString());
+
return true;
};
- if (DEBUG)
- log.trace("There is no memory entry");
+ // Last element in document is reached
+ if (this.count == this.freq) {
- if (count == freq) {
+ if (this.postings == null)
+ return false;
- if (DEBUG)
- log.trace("last position in document");
- // Check for overflow on document boundary
- if (overflow.start != -1) {
+ // There is an overflow
+ if (this.overflow.doc != -1) {
+ if (DEBUG)
+ log.trace("Fetch from overflow");
+
+ this.setToCurrent(this.overflow, 2);
+
+ // Reset overflow
+ this.overflow.reset();
if (DEBUG)
- log.trace(" but there is an overflow");
-
- _setToCurrent(overflow).clear();
-
- if (DEBUG)
- log.trace("Current2: [{}-{}]", position, end);
-
+ log.trace(" --- MATCH --- Fetch from memory {}",
+ this.current.toString());
+
return true;
};
- if (postings == null) {
- if (DEBUG)
- log.trace("no more postings");
+ // There is no next document
+ if (!this.nextDoc())
return false;
- };
-
- if (DEBUG)
- log.trace("Go to next doc");
-
- doc = postings.nextDoc();
-
- if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- if (DEBUG)
- log.trace("no more docs");
- return false;
- };
-
- // New doc!
- end = -1;
- storedPayload.clear();
- hasStoredPayload = false;
-
- freq = postings.freq();
- count = 0;
};
- int pos = overflow.start;
-
- while (true) {
- if (DEBUG) {
- log.trace("pos is {}", pos);
- _log_payloads(1);
+ // overflow is not empty - let's treat this as current
+ if (this.overflow.doc != -1) {
+
+ if (DEBUG)
+ log.trace("Overflow is not empty");
+
+ this.setToCurrent(this.overflow, 3);
+
+ // TODO: newOverflow() ???
+ this.overflow.reset();
+ }
+ else {
+ if (DEBUG)
+ log.trace("Overflow is empty");
+
+ // Get next posting - count is still < freq
+ this.setToCurrent(4);
+
+ if (this.count == this.freq) {
+ if (DEBUG)
+ log.trace(" --- MATCH --- Direct {}",
+ this.current.toString());
+ return true;
};
+ };
- if (count == freq) {
- if (DEBUG)
- log.trace("last position in document");
+ while (this.count < this.freq) {
- if (postings == null) {
+ // Temp is now the old current
+ this.setCurrentToTemp();
+ // Get new current
+ this.setToCurrent(5);
+
+ if (DEBUG)
+ log.trace("Compare {} with {}",
+ this.current.toString(),
+ this.temp.toString());
+
+ // The next span is not at the same position
+ if (this.current.start != this.temp.start) {
+
+ // Add this to memory
+ if (this.memory.size() > 0) {
if (DEBUG)
- log.trace("no more postings");
-
- // Check for overflow on document boundary
- if (overflow.start != -1) {
- if (DEBUG)
- log.trace(" but there is an overflow");
-
- _setToCurrent(overflow).clear();
- if (DEBUG)
- log.trace("Current3: [{}-{}]", position, end);
-
- return true;
- };
-
- return false;
- };
-
- if (DEBUG) {
- log.trace("go to next doc");
- _log_payloads(2);
- };
-
- if (overflow.start != -1) {
- if (DEBUG) {
- log.trace("Storing overflow {} ...", overflow.toString());
- log.trace("... in memory with {}-{}", overflow.startChar(), overflow.endChar());
- };
- memory.add((KorapTermSpan) overflow.clone());
- overflow.clear();
- };
- if (DEBUG)
- _log_payloads(3);
-
- if (memory.size() > 0) {
- if (DEBUG) {
- log.trace("sort and return first");
- _log_payloads(4);
- };
-
- Collections.sort(memory);
-
- if (DEBUG)
- _log_payloads(5);
-
- _setToCurrent(memory.removeFirst());
-
- if (DEBUG)
- _log_payloads(6);
-
- if (DEBUG)
- log.trace("Current4: [{}-{}]]", position, end);
+ log.trace("[1] Add to memory {}", this.temp.toString());
+ this.memory.add((KorapTermSpan) this.temp.clone());
+ this.overflow = this.current;
break;
};
- doc = postings.nextDoc();
- // New doc
- end = pos = -1;
-
- if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- if (DEBUG)
- log.trace("no more docs");
- return false;
- };
-
- freq = postings.freq();
- count = 0;
- };
-
-
- if (DEBUG)
- log.trace("Forward postings");
-
- position = postings.nextPosition();
- // New pos!
- end = -1;
-
- if (DEBUG) {
- _log_payloads(9);
- log.trace("CLEAR PAYLOAD");
- };
-
- storedPayload.clear();
- hasStoredPayload = false;
-
- if (DEBUG) {
- _log_payloads(10);
- log.trace("next position is {}", position);
- };
-
- count++;
-
- // There was no overflow
- if (pos == -1 || pos == position) {
- if (pos == position) {
- if (DEBUG)
- log.trace("Add overflow to memory");
-
- memory.add((KorapTermSpan) overflow.clone());
- }
-
- else {
- if (DEBUG)
- log.trace("There was no overflow");
- pos = position;
- };
-
- if (DEBUG) {
- _log_payloads(8);
- log.trace("*****************************");
- };
-
- _setCurrentTo(overflow);
-
- if (DEBUG) {
- log.trace("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
- log.trace("Set overflow and continue: {} ...", overflow.toString());
- log.trace("... with {}-{}", overflow.startChar(), overflow.endChar());
- };
-
- continue;
- }
-
- // overflow was older
- else if (pos != position) {
+ // There is no reason to start a memory
+ this.overflow = this.current;
+ this.current = this.temp;
if (DEBUG)
- log.trace("Overflow was older");
+ log.trace(" --- MATCH --- Fetch from memory {}",
+ this.current.toString());
- // Use memory
- if (memory.size() > 0) {
+ return true;
+ }
- if (DEBUG)
- log.trace("Add overflow to memory");
-
- memory.add((KorapTermSpan) overflow.clone());
-
- if (DEBUG)
- log.trace("Sort memory");
-
- // Sort by end position
- Collections.sort(memory);
-
- // Store current information in overflow
- _setCurrentTo(overflow);
-
- if (DEBUG) {
- log.trace("Set new overflow: {}", overflow.toString());
- log.trace("Get first element from sorted memory");
- };
-
- _setToCurrent(memory.removeFirst());
- }
-
- // Nothing in memory - use overflow!
- else {
-
- if (DEBUG)
- log.trace("There is nothing in memory");
-
- /* Make overflow active and store last position in overflow */
- _setCurrentTo(tempSpan);
-
- if (DEBUG)
- log.trace("Temp is now {}", overflow.toString());
-
- _setToCurrent(overflow);
-
- // Store current information in overflow
- overflow.copyFrom(tempSpan);
-
- if (DEBUG)
- log.trace("Overflow is now {}", overflow.toString());
-
- };
- break;
+ // The positions are equal
+ else {
+ if (DEBUG)
+ log.trace("[2] Add to memory {}", this.temp.toString());
+ this.memory.add((KorapTermSpan) this.temp.clone());
};
};
- if (DEBUG)
- log.trace("Current4: [{}-{}]", position, end);
-
- readPayload = false;
- return true;
- };
-
- private KorapTermSpan _setToCurrent (KorapTermSpan act) {
- if (act.payload != null)
- act.payload.rewind();
-
- if (DEBUG)
- log.trace("Set to current with {}, meaning {} - {}",
- act.toString(),
- act.payload.getInt(0),
- act.payload.getInt(4)
- );
-
- if (act.payload != null)
- act.payload.rewind();
-
- position = act.start;
- end = act.end;
- storedPayload.clear();
- hasStoredPayload = false;
-
- if (act.payload != null) {
+ if (this.temp.doc == this.current.doc &&
+ this.temp.start == this.current.start) {
if (DEBUG)
- log.trace("Payload is not null");
-
- act.payload.rewind();
- storedPayload.put(act.payload);
- hasStoredPayload = true;
- }
- else if (DEBUG)
- log.trace("Payload is null");
-
- return act;
- };
-
- private void _log_payloads (int nr) {
- if (!DEBUG)
- return;
-
- if (hasStoredPayload)
- log.trace(
- "[{}] payload offsets are {}-{}",
- nr,
- storedPayload.getInt(0),
- storedPayload.getInt(4)
- );
- else
- log.trace("[{}] payload is empty", nr);
- };
-
- private void _setCurrentTo () {
- overflow.start = position;
- overflow.end = this.end();
- overflow.payload.clear();
-
- if (hasStoredPayload)
- overflow.payload.put(storedPayload);
-
- if (DEBUG)
- log.trace("Set current to Overflow {} with {}-{}", overflow.toString(), overflow.startChar(), overflow.endChar());
- };
-
- private void _setCurrentTo (KorapTermSpan o) {
-
- if (DEBUG)
- _log_payloads(7);
-
- o.start = position;
- o.end = this.end();
- o.payload.clear();
-
- if (hasStoredPayload) {
- storedPayload.rewind();
- o.payload.put(storedPayload);
-
- if (DEBUG)
- log.trace("Object now has offset {}-{}", o.payload.getInt(0), o.payload.getInt(4));
-
- // Import:
- o.payload.rewind();
+ log.trace("[3] Add to memory {}", this.current.toString());
+ this.memory.add((KorapTermSpan) this.current.clone());
};
- if (DEBUG)
- log.trace("Set current to object {} ...", o.toString());
-
- if (hasStoredPayload) {
- if (DEBUG)
- log.trace("with {}-{} from {}-{}", o.startChar(), o.endChar(), storedPayload.getInt(0), storedPayload.getInt(4));
+ // Sort the memory
+ Collections.sort(memory);
- storedPayload.rewind();
- };
+ // There is now a memory
+ return this.next();
};
+
+ // get next doc
+ private boolean nextDoc () throws IOException {
- @Override
- public boolean skipTo(int target) throws IOException {
- assert target > doc;
- doc = postings.advance(target);
-
- end = -1;
- overflow.clear();
- storedPayload.clear();
- hasStoredPayload = false;
-
- if (memory != null)
- memory.clear();
-
- if (doc == DocIdSetIterator.NO_MORE_DOCS)
+ // Check if this doc is the last
+ if (this.current.doc == DocIdSetIterator.NO_MORE_DOCS)
return false;
- freq = postings.freq();
- count = 0;
- position = postings.nextPosition();
- count++;
- readPayload = false;
+ if (DEBUG)
+ log.trace("Go to next document");
+
+ this.current.reset();
+
+ // Advance to next doc
+ this.current.doc = this.postings.nextDoc();
+
+ // Check if this doc is the last
+ if (this.current.doc == DocIdSetIterator.NO_MORE_DOCS)
+ return false;
+
+ // check frequencies
+ this.freq = this.postings.freq();
+
+ if (DEBUG)
+ log.trace("Document <{}> has {} occurrences",
+ this.current.doc,
+ this.freq);
+
+
+ this.count = 0;
return true;
};
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+
+ assert target > this.current.doc;
+
+ // Get this doc
+ this.current.doc = postings.advance(target);
+
+ if (this.current.doc == DocIdSetIterator.NO_MORE_DOCS)
+ return false;
+
+ if (this.memory != null)
+ this.memory.clear();
+
+ this.overflow.reset();
+
+
+ this.freq = this.postings.freq();
+
+ if (DEBUG)
+ log.trace("Document {} has {} occurrences", this.current.doc, this.freq);
+
+
+ this.count = 0;
+
+ if (this.next())
+ return true;
+
+ return false;
+ };
+
+
@Override
public int doc() {
- return doc;
+ return this.current.doc;
};
+
@Override
public int start() {
- return position;
+ return this.current.start;
};
+
@Override
public int end() {
- if (end >= 0)
- return end;
+ if (this.current.end >= 0)
+ return this.current.end;
try {
- end = this.getPayloadEndPosition();
+ this.current.end = this.getPayloadEndPosition();
}
catch (Exception e) {
- end = position;
+ this.current.end = this.current.start;
};
- return end;
+ return this.current.end;
};
+
@Override
public long cost() {
- return postings.cost();
+ // ???
+ return this.postings.cost();
};
+
@Override
public Collection<byte[]> getPayload() throws IOException {
byte[] offsetCharacters = new byte[8];
-
- if (storedPayload.position() <= 0)
+ if (this.current.end <= 0)
this.getPayloadEndPosition();
- if (DEBUG) {
- if (hasStoredPayload)
- log.trace("storedPayload: {} - {}",
- storedPayload.getInt(0),
- storedPayload.getInt(4));
- else
- log.trace("storedPayload is empty");
- };
-
- System.arraycopy(storedPayload.array(), 0, offsetCharacters, 0, 8);
+ System.arraycopy(this.current.payload.array(), 0, offsetCharacters, 0, 8);
return Collections.singletonList(offsetCharacters);
};
- @Override
- public boolean isPayloadAvailable() throws IOException {
- return readPayload == false && postings.getPayload() != null;
+
+ /**
+ * Sets KorapTermSpan to current element
+ */
+ private void setToCurrent (KorapTermSpan act, int debugNumber) {
+
+ if (DEBUG)
+ log.trace(
+ "[{}] Set to current with {}",
+ debugNumber,
+ act.toString()
+ );
+
+ this.current = (KorapTermSpan) act.clone();
};
- @Override
- public String toString() {
- return "spans(" + term.toString() + ")@" +
- (doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position);
+ /**
+ * Sets KorapTermSpan to current element
+ */
+ private void setToCurrent (int debugNumber) throws IOException {
+
+ this.current.start = this.postings.nextPosition();
+
+ // This will directly save stored payloads
+ this.current.end = this.getPayloadEndPosition();
+
+ if (DEBUG)
+ log.trace(
+ "[{}] Set new to current with {}",
+ debugNumber,
+ this.current.toString()
+ );
+
+ this.count++;
};
- public DocsAndPositionsEnum getPostings() {
- return postings;
+ private void setCurrentToTemp () {
+ this.temp = (KorapTermSpan) this.current.clone();
};
+
private int getPayloadEndPosition () {
- if (DEBUG)
- log.trace("getPayloadEndPosition of element ...");
-
try {
BytesRef payload = postings.getPayload();
- if (DEBUG)
- log.trace(" BytesRef: {}", payload.toString());
+ this.current.clearPayload();
- readPayload = true;
- storedPayload.clear();
- hasStoredPayload = false;
-
if (payload != null) {
- if (DEBUG)
- log.trace("Do bit magic");
-
- storedPayload.put(payload.bytes, payload.offset, 8);
- storedPayload.put(payload.bytes, payload.offset + 12, payload.length - 12);
- System.arraycopy(payload.bytes, payload.offset + 8, payloadByte, 0, 4);
- hasStoredPayload = true;
- if (DEBUG)
- log.trace("~~ Bytes: {}-{}-{}",
- storedPayload.getInt(0),
- storedPayload.getInt(4),
- payloadByte);
+ this.payloadByte = new byte[4];
+
+ // Copy some payloads like start character and end character
+ this.current.payload.put(payload.bytes, payload.offset, 8);
+ this.current.payload.put(payload.bytes, payload.offset + 12, payload.length - 12);
+
+ // Copy end position integer to payloadByte
+ System.arraycopy(payload.bytes, payload.offset + 8, this.payloadByte, 0, 4);
}
- else {
- if (DEBUG)
- log.trace("There's no payload available");
-
- payloadByte = null;
+ else {
+ this.payloadByte = null;
};
- if (payloadByte != null) {
+ // Todo: REWRITE!
+ if (this.payloadByte != null) {
+
+ // Todo: This is weird!
+
bb.clear();
int t = bb.wrap(payloadByte).getInt();
+
if (DEBUG)
- log.trace(" |-> {}", t);
+ log.trace("Get Endposition and payload: {}-{} with end position {} in doc {}",
+ this.current.payload.getInt(0),
+ this.current.payload.getInt(4),
+ t,
+ this.current.doc);
return t;
+ }
+ else if (DEBUG) {
+ log.trace("Get Endposition and payload: None found");
};
-
}
catch (IOException e) {
if (DEBUG)
log.trace("IOException {}", e);
};
+
return -1;
};
+ @Override
+ public boolean isPayloadAvailable() throws IOException {
+
+ if (current.payload != null)
+ return true;
+
+ return false;
+ };
+
+
+ @Override
+ public String toString() {
+ return "spans(" + this.term.toString() + ")@" +
+ (this.current.doc == -1 ? "START" : (this.current.doc == Integer.MAX_VALUE) ? "END" : this.current.doc + "-" + this.current.start);
+ };
+
+ public DocsAndPositionsEnum getPostings() {
+ return postings;
+ };
+
private static final class EmptyElementSpans extends ElementSpans {
@Override
@@ -597,6 +453,4 @@
@Override
public long cost() { return 0; };
};
-
- public static final ElementSpans EMPTY_ELEMENT_SPANS = new EmptyElementSpans();
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
index a865ca4..098573c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/KorapSpan.java
@@ -23,7 +23,7 @@
this.start = o.start;
this.end = o.end;
this.doc = o.doc;
- clearPayload();
+ this.clearPayload();
return this;
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
index 4d50e14..c1fe75e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/KorapTermSpan.java
@@ -21,21 +21,23 @@
public Object clone() {
KorapTermSpan span = new KorapTermSpan();
span.start = this.start;
- span.end = this.end;
- span.doc = this.doc;
+ span.end = this.end;
+ span.doc = this.doc;
- this.payload.rewind();
- span.payload.put(this.payload);
+ if (this.payload != null) {
+ this.payload.rewind();
+ span.payload.put(this.payload);
- if (DEBUG) {
- log.trace("Clone payload {} to payload {} ...",
- this.payload.toString(),
- span.payload.toString());
- log.trace("... from {}-{} to {}-{}",
- this.startChar(),
- this.endChar(),
- span.startChar(),
- span.endChar());
+ if (DEBUG) {
+ log.trace("[TS] Clone payload {} to payload {} ...",
+ this.payload.toString(),
+ span.payload.toString());
+ log.trace("[TS] ... from {}-{} to {}-{}",
+ this.startChar(),
+ this.endChar(),
+ span.startChar(),
+ span.endChar());
+ };
};
return span;
@@ -47,11 +49,18 @@
return this;
};
+ public KorapSpan shallowCopyFrom (KorapTermSpan o) {
+ super.copyFrom((KorapSpan) o);
+ this.payload = o.payload;
+ return this;
+ };
+
+
@Override
public void clearPayload () {
if (this.payload != null) {
this.payload.clear();
- this.payload.rewind();
+ // this.payload.rewind();
};
};
@@ -60,12 +69,13 @@
this.payload = ByteBuffer.allocate(128);
};
-
@Override
public String toString () {
StringBuilder sb = new StringBuilder("[");
return sb.append(this.start).append('-')
.append(this.end)
+ .append("#")
+ .append(this.startChar()).append('-').append(this.endChar())
.append('(').append(this.doc).append(')')
.append('$').append(this.payload.toString())
.append(']')
@@ -79,4 +89,11 @@
public int endChar () {
return this.payload.getInt(4);
};
+
+ public void reset () {
+ this.clearPayload();
+ this.start = -1;
+ this.end = -1;
+ this.doc = -1;
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
index 6a1b93a..94413ed 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
@@ -337,14 +337,15 @@
this.wrapStart = -1;
this.wrapEnd = -1;
+ // Retrieve doc information
+ this.wrapDoc = this.wrapSpans.doc();
+
if (DEBUG)
log.trace(
" Forward wrap span to {}",
_currentWrap().toString()
);
- // Retrieve doc information
- this.wrapDoc = this.wrapSpans.doc();
if (this.embeddedDoc != this.wrapDoc) {
if (DEBUG)
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index ac757b7..f48fe94 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,11 +1,11 @@
## logger file can be used with
-#log4j.rootLogger = DEBUG, stdout
+log4j.rootLogger = DEBUG, stdout
# Spans:
-# log4j.logger.de.ids_mannheim.korap.query.spans.ElementSpans = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.query.spans.ElementSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.KorapTermSpan = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.query.spans.WithinSpans = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.query.spans.WithinSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.NextSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.SimpleSpans = TRACE, stdout
@@ -19,8 +19,8 @@
# Results:
# log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
-// log4j.logger.de.ids_mannheim.korap.KorapMatch = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.index.PositionsToOffset = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.KorapMatch = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.index.PositionsToOffset = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.index.TestSegmentIndex = TRACE, stdout