Remove ByteBuffer in favor of KorapByte in MatchModifyClassSpans
diff --git a/Changes b/Changes
index edeae96..9f310c4 100644
--- a/Changes
+++ b/Changes
@@ -7,7 +7,8 @@
- [feature] Presorting of element terms in the index for coherent
SpanQuery sorting; Removed buffering of element candidates (diewald)
Warning: This is a breaking change!
- - [performance] Payloads in ElementSpans can now be lazily loaded (diewald)
+ - [performance] Payloads in ElementSpans can now be lazily loaded,
+ MatchModifyClassSpans no longer rely on payload copies (diewald)
- [cleanup] Renamed /filter to /collection,
merge KorapHTML and KorapString,
removed KorapSpan, KorapTermSpan and KorapLongSpan (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanMatchModifyClassQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanMatchModifyClassQuery.java
index b86b818..1f3d3f6 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanMatchModifyClassQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanMatchModifyClassQuery.java
@@ -22,34 +22,19 @@
* Shrinks spans to a classed span.
*/
public class SpanMatchModifyClassQuery extends SpanClassQuery {
- private boolean divide = false;
-
- public SpanMatchModifyClassQuery (SpanQuery operand, byte number, boolean divide) {
- super(operand, number);
- this.divide = divide;
- };
-
- public SpanMatchModifyClassQuery (SpanQuery operand, boolean divide) {
- this(operand, (byte) 1, divide);
- };
public SpanMatchModifyClassQuery (SpanQuery operand, byte number) {
- this(operand, number, false);
+ super(operand, number);
};
public SpanMatchModifyClassQuery (SpanQuery operand) {
- this(operand, (byte) 1, false);
+ this(operand, (byte) 1);
};
@Override
public String toString (String field) {
StringBuffer buffer = new StringBuffer();
- if (divide) {
- buffer.append("split(");
- }
- else {
- buffer.append("shrink(");
- };
+ buffer.append("shrink(");
short classNr = (short) this.number;
buffer.append(classNr & 0xFF).append(": ");
buffer.append(this.operand.toString());
@@ -61,7 +46,7 @@
@Override
public Spans getSpans (final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
- return (Spans) new MatchModifyClassSpans(this.operand, context, acceptDocs, termContexts, number, divide);
+ return (Spans) new MatchModifyClassSpans(this.operand, context, acceptDocs, termContexts, number);
};
@Override
@@ -85,8 +70,7 @@
public SpanMatchModifyClassQuery clone() {
SpanMatchModifyClassQuery spanMatchQuery = new SpanMatchModifyClassQuery(
(SpanQuery) this.operand.clone(),
- this.number,
- this.divide
+ this.number
);
spanMatchQuery.setBoost(getBoost());
return spanMatchQuery;
@@ -103,7 +87,7 @@
if (!this.operand.equals(spanMatchModifyClassQuery.operand)) return false;
if (this.number != spanMatchModifyClassQuery.number) return false;
- if (this.divide != spanMatchModifyClassQuery.divide) return false;
+ // if (this.divide != spanMatchModifyClassQuery.divide) return false;
return getBoost() == spanMatchModifyClassQuery.getBoost();
};
@@ -114,7 +98,7 @@
int result = 1;
result = operand.hashCode();
result += number + 33_333;
- result += divide ? 1 : 0;
+ // result += divide ? 1 : 0;
result ^= (result << 15) | (result >>> 18);
result += Float.floatToRawIntBits(getBoost());
return result;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
index f6bcbef..a9f196a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MatchModifyClassSpans.java
@@ -1,5 +1,7 @@
package de.ids_mannheim.korap.query.spans;
+import static de.ids_mannheim.korap.util.KorapByte.*;
+
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.index.AtomicReaderContext;
@@ -17,10 +19,20 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.nio.ByteBuffer;
/**
- * Modify matches to, for example, return only certain class or span ranges.
+ * Spans, that can focus on the span boundaries of classed subqueries.
+ * The boundaries of the classed subquery may exceed the boundaries of the
+ * nested query.
+ *
+ * In case multiple classes are found with the very same number, the span
+ * is maximized to start on the first occurrence from the left and end on
+ * the last occurrence on the right.
+ *
+ * In case the class to focus on is not found in the payloads,
+ * the match is ignored.
+ *
+ * <strong>Warning</strong>: Payloads other than class payloads won't bubble up.
*
* @author diewald
*/
@@ -30,8 +42,6 @@
private Collection<byte[]> payload;
private final Spans spans;
private byte number;
- private boolean divide;
- private ByteBuffer bb;
private SpanQuery wrapQuery;
private final Logger log = LoggerFactory.getLogger(MatchModifyClassSpans.class);
@@ -39,145 +49,154 @@
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
- private int start = -1, end;
- private int tempStart = 0, tempEnd = 0;
+ private int start = -1,
+ end;
+ private int tempStart = 0,
+ tempEnd = 0;
- public MatchModifyClassSpans (
- SpanQuery wrapQuery,
- AtomicReaderContext context,
- Bits acceptDocs,
- Map<Term,TermContext> termContexts,
- byte number,
- boolean divide) throws IOException {
- this.spans = wrapQuery.getSpans(context, acceptDocs, termContexts);
- this.number = number;
- this.divide = divide;
- this.wrapQuery = wrapQuery;
- this.bb = ByteBuffer.allocate(9);
- this.wrappedPayload = new ArrayList<byte[]>(6);
+ /**
+ * Construct a MatchModifyClassSpan for the given {@link SpanQuery}.
+ *
+ * @param wrapQuery A {@link SpanQuery}.
+ * @param context The {@link AtomicReaderContext}.
+ * @param acceptDocs Bit vector representing the documents
+ * to be searched in.
+ * @param termContexts A map managing {@link TermState TermStates}.
+ * @param number The class number to focus on.
+ * @throws IOException
+ */
+ public MatchModifyClassSpans (SpanQuery wrapQuery,
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts,
+ byte number) throws IOException {
+ this.spans = wrapQuery.getSpans(context, acceptDocs, termContexts);
+ this.number = number;
+ this.wrapQuery = wrapQuery;
+ this.wrappedPayload = new ArrayList<byte[]>(6);
};
+
@Override
public Collection<byte[]> getPayload() throws IOException {
- return wrappedPayload;
+ return wrappedPayload;
};
+
@Override
- public boolean isPayloadAvailable() {
- return wrappedPayload.isEmpty() == false;
+ public boolean isPayloadAvailable () {
+ return wrappedPayload.isEmpty() == false;
};
- public int doc() { return spans.doc(); }
- // inherit javadocs
@Override
- public int start() { return start; }
+ public int doc () {
+ return spans.doc();
+ };
- // inherit javadocs
+
@Override
- public int end() { return end; }
+ public int start () {
+ return start;
+ };
- // inherit javadocs
+ @Override
+ public int end () {
+ return end;
+ };
+
+
@Override
public boolean next() throws IOException {
- /* TODO:
- * In case of a split() (instead of a submatch())
- * Is the cache empty?
- * Otherwise: Next from list
- */
+ if (DEBUG) log.trace("Forward next match");
- if (DEBUG)
- log.trace("Forward next match");
+ // Next span
+ while (spans.next()) {
+ if (DEBUG) log.trace("Forward next inner span");
- // Next span
- while (spans.next()) {
+ // No classes stored
+ wrappedPayload.clear();
- if (DEBUG)
- log.trace("Forward next inner span");
+ start = -1;
+ if (spans.isPayloadAvailable()) {
+ end = 0;
- // No classes stored
- wrappedPayload.clear();
+ // Iterate over all payloads and find the maximum span per class
+ for (byte[] payload : spans.getPayload()) {
- start = -1;
- if (spans.isPayloadAvailable()) {
- end = 0;
+ // No class payload - ignore
+ // this may be problematic for other calculated payloads!
+ if (payload.length != 9) {
+ if (DEBUG) log.trace("Ignore old payload {}", payload);
+ continue;
+ };
- // Iterate over all payloads and find the maximum span per class
- for (byte[] payload : spans.getPayload()) {
+ // Found class payload of structure <i>start<i>end<b>class
+ // and classes are matches!
+ if (payload[8] == this.number) {
+ tempStart = byte2int(payload, 0);
+ tempEnd = byte2int(payload, 4);
- // No class payload
- if (payload.length != 9) {
- if (DEBUG)
- log.trace("Ignore old payload {}", payload);
- continue;
- };
+ if (DEBUG) {
+ log.trace(
+ "Found matching class {}-{}",
+ tempStart,
+ tempEnd
+ );
+ };
- // Todo: Implement Divide
- // Found class payload of structure <i>start<i>end<b>class
- // and classes are matches!
- if (payload[8] == this.number) {
- bb.clear();
- bb.put(payload);
- bb.rewind();
- tempStart = bb.getInt();
- tempEnd = bb.getInt();
-
- if (DEBUG)
- log.trace("Found matching class {}-{}", tempStart, tempEnd);
-
- // Set start position
- if (start == -1)
- start = tempStart;
- else if (tempStart < start)
- start = tempStart;
+ // Set start position
+ if (start == -1 || tempStart < start)
+ start = tempStart;
- // Set end position
- if (tempEnd > end)
- end = tempEnd;
- };
+ // Set end position
+ if (tempEnd > end)
+ end = tempEnd;
+ };
- // Definately keep class information
- // Even if it is already used for shrinking
- wrappedPayload.add(payload);
- };
- };
+ // Definately keep class information
+ // Even if it is already used for shrinking
+ wrappedPayload.add(payload);
+ };
+ };
- // Class not found
- if (start == -1)
- continue;
+ // Class not found
+ if (start == -1)
+ continue;
- if (DEBUG)
- log.trace(
- "Start to focus on class {} from {} to {}",
- number,
- start,
- end
- );
+ if (DEBUG) {
+ log.trace(
+ "Start to focus on class {} from {} to {}",
+ number,
+ start,
+ end
+ );
+ };
+ return true;
+ };
- return true;
- };
-
- // No more spans
- return false;
+ // No more spans
+ this.wrappedPayload.clear();
+ return false;
};
- // inherit javadocs
+ // Todo: Check for this on document boundaries!
@Override
public boolean skipTo (int target) throws IOException {
- return spans.skipTo(target);
+ return spans.skipTo(target);
};
+
@Override
public String toString () {
- return getClass().getName() + "(" + this.wrapQuery.toString() + ")@" +
- (doc() + ":" + start() + "-" + end());
+ return getClass().getName() + "(" + this.wrapQuery.toString() + ")@" +
+ (doc() + ":" + start() + "-" + end());
};
-
@Override
public long cost () {
- return spans.cost();
+ return spans.cost();
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/util/KorapByte.java b/src/main/java/de/ids_mannheim/korap/util/KorapByte.java
index 5d9285e..00e605b 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KorapByte.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KorapByte.java
@@ -45,16 +45,16 @@
* Convert a byte array to an integer.
*
* @param data The byte array to convert.
- * @param offset The byte offset.
+ * @param offset The byte offset (Not integer offset!).
* @return The translated integer.
*/
- // Based on
+ // Roughly based on
// http://www.tutorials.de/java/228129-konvertierung-von-integer-byte-array.html
public static int byte2int (byte[] data, int offset) {
- int number = 0;
- int i = (offset * 4);
- for (; i < 4; ++i)
- number |= (data[3-i] & 0xff) << (i << 3);
+ offset += 3;
+ int number = 0;
+ for (int i = 0; i < 4; ++i)
+ number |= (data[offset-i] & 0xff) << (i << 3);
return number;
};
};