Load payloads of ElementSpans lazily (benefit is unconfirmed)
diff --git a/Changes b/Changes
index de2ac93..38fca92 100644
--- a/Changes
+++ b/Changes
@@ -7,6 +7,7 @@
- [feature] Presorting of element terms in the index for coherent
SpanQuery sorting; Removed buffering of element candidates (diewald)
Warning: This is a breaking change!
+ - [performance] Payloads in ElementSpans can now be lazily loaded (diewald)
- [cleanup] Renamed /filter to /collection,
merge KorapHTML and KorapString (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index c51665e..fc2408e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -2,8 +2,8 @@
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -20,12 +20,15 @@
/**
* Enumeration of special spans which length is stored in their payload,
* representing elements such as phrases, sentences and paragraphs.
+ *
+ * Payloads are
*
* @author margaretha
* @author diewald
*/
public class ElementSpans extends SpansWithId {
private TermSpans termSpans;
+ private boolean lazyLoaded = false;
/**
* Constructs ElementSpans for the given {@link SpanElementQuery}.
@@ -44,7 +47,6 @@
hasMoreSpans = true;
};
-
@Override
public boolean next() throws IOException {
isStartEnumeration = false;
@@ -62,9 +64,34 @@
// Get payload
this.matchStartPosition = termSpans.start();
this.matchDocNumber = termSpans.doc();
+ this.lazyLoaded = false;
+ return true;
+ };
+
+
+ /*
+ * Process payload lazily.
+ * This may have a little impact on queries like
+ * position queries, where spans can be rejected
+ * solely based on their starting and doc position.
+ */
+ private void processPayload () {
+ if (this.lazyLoaded)
+ return;
+
+ // This will prevent failures for IOExceptions
+ this.lazyLoaded = true;
// No need to check if there is a pl - there has to be a payload!
- this.matchPayload = termSpans.getPayload();
+ try {
+ this.matchPayload = termSpans.getPayload();
+ }
+ catch (IOException e) {
+ this.matchEndPosition = this.matchStartPosition;
+ this.setSpanId((short) -1);
+ this.matchPayload = null;
+ return;
+ };
List<byte[]> payload = (List<byte[]>) this.matchPayload;
@@ -85,15 +112,39 @@
byte[] b = new byte[8];
b = Arrays.copyOfRange(bb.array(), 0, 8);
this.matchPayload = Collections.singletonList(b);
+ return;
}
- // The span is extremely short ... well ...
- else {
- this.matchEndPosition = this.matchStartPosition;
- this.setSpanId((short) -1);
- this.matchPayload = null;
- };
- return true;
+ this.matchEndPosition = this.matchStartPosition;
+ this.setSpanId((short) -1);
+ this.matchPayload = null;
+ };
+
+
+ @Override
+ public int end () {
+ this.processPayload();
+ return this.matchEndPosition;
+ };
+
+ @Override
+ public Collection<byte[]> getPayload() {
+ this.processPayload();
+ return this.matchPayload;
+ };
+
+
+ @Override
+ public boolean isPayloadAvailable() {
+ this.processPayload();
+ return !this.matchPayload.isEmpty();
+ };
+
+
+ @Override
+ public short getSpanId () {
+ this.processPayload();
+ return spanId;
};
@@ -110,7 +161,6 @@
return false;
};
-
@Override
public long cost() {
return termSpans.cost();