Merge branch 'payload-id' of ssh://korap.ids-mannheim.de:29418/KorAP/Krill into payload-id
diff --git a/misc/payloads.md b/misc/payloads.md
index 400c866..5454f89 100644
--- a/misc/payloads.md
+++ b/misc/payloads.md
@@ -157,7 +157,7 @@
For example:
- @:class=header$<b>17<s>1<i>6
+ @:class=header$<b>17<i>6<s>1
means the attribute belongs to the term, element, or relation in the
same token position whose TUI is 1 and end position is 6.
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index 9616ba3..49ac41e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -20,23 +20,38 @@
import de.ids_mannheim.korap.query.SpanElementQuery;
/**
- * Enumeration of special spans which length is stored in their
- * payload,
- * representing elements such as phrases, sentences and paragraphs.
+ * Enumeration of spans representing elements such as phrases, sentences and
+ * paragraphs. Span length is stored as a payload.
+ *
+ * Depth and certainty value payloads have not been loaded and handled yet.
*
* @author margaretha
* @author diewald
*/
public final class ElementSpans extends SimpleSpans {
private final TermSpans termSpans;
- private boolean lazyLoaded = false;
+ private boolean isPayloadLoaded;
private final Logger log = LoggerFactory.getLogger(ElementSpans.class);
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
private byte[] b = new byte[8];
+
+ public static enum PayloadTypeIdentifier {
+ ELEMENT(64),
+ ELEMENT_WITH_TUI(65),
+ ELEMENT_WITH_CERTAINTY_VALUE (66),
+ ELEMENT_WITH_TUI_AND_CERTAINTY_VALUE (67),
+ MILESTONE (68);
+
+ private int value;
+ private PayloadTypeIdentifier(int value) {
+ this.value = value;
+ }
+ }
+
/**
* Constructs ElementSpans for the given {@link SpanElementQuery}.
@@ -58,31 +73,28 @@
throws IOException {
super(spanElementQuery, context, acceptDocs, termContexts);
termSpans = (TermSpans) this.firstSpans;
- hasMoreSpans = true;
- // hasSpanId = true;
+ hasMoreSpans = true;
+ // termSpans.next();
};
@Override
public boolean next () throws IOException {
- isStartEnumeration = false;
+ isStartEnumeration = false;
+ this.matchPayload = null;
+ matchEndPosition = -1;
+ return advance();
+ };
- if (!hasMoreSpans || !(hasMoreSpans = termSpans.next()))
- return false;
+ private boolean advance() throws IOException {
+ if (!hasMoreSpans || !(hasMoreSpans = termSpans.next()))
+ return false;
- // Set current values
- return this.setToCurrent();
- };
-
-
- // Set term values to current
- private boolean setToCurrent () throws IOException {
- // Get payload
- this.matchStartPosition = termSpans.start();
- this.matchDocNumber = termSpans.doc();
- this.lazyLoaded = false;
- return true;
- };
+ this.matchStartPosition = termSpans.start();
+ this.matchDocNumber = termSpans.doc();
+ isPayloadLoaded = false;
+ return true;
+ };
/*
@@ -91,48 +103,51 @@
* position queries, where spans can be rejected
* solely based on their starting and doc position.
*/
- private void processPayload () {
- if (this.lazyLoaded)
+ private void loadPayload () {
+ if (this.isPayloadLoaded) {
return;
-
- // This will prevent failures for IOExceptions
- this.lazyLoaded = true;
-
- // No need to check if there is a pl - there has to be a payload!
+ }
+ else{
+ this.isPayloadLoaded = true;
+ }
+
+ List<byte[]> payload;
try {
- this.matchPayload = termSpans.getPayload();
+ payload = (List<byte[]>) termSpans.getPayload();
}
catch (IOException e) {
+ // silently setting empty element and payload
this.matchEndPosition = this.matchStartPosition;
this.setSpanId((short) -1);
this.matchPayload = null;
return;
- };
+ }
- List<byte[]> payload = (List<byte[]>) this.matchPayload;
- if (!payload.isEmpty()) {
+ if (!payload.isEmpty()) {
// Get payload one by one
final int length = payload.get(0).length;
final ByteBuffer bb = ByteBuffer.allocate(length);
bb.put(payload.get(0));
+
+ this.payloadTypeIdentifier = bb.get(0);
+ this.matchEndPosition = bb.getInt(9);
- // set element end position from payload
- this.matchEndPosition = bb.getInt(8);
-
- // Copy element id
- if (length >= 14) {
- this.setSpanId(bb.getShort(12));
- this.hasSpanId = true;
- }
+ if (payloadTypeIdentifier == PayloadTypeIdentifier.ELEMENT_WITH_TUI.value
+ || payloadTypeIdentifier == PayloadTypeIdentifier.ELEMENT_WITH_TUI_AND_CERTAINTY_VALUE.value) {
+ this.setSpanId(bb.getShort(14));
+ this.hasSpanId = true;
+ }
else {
+ // System.out.println(payloadTypeIdentifier);
this.setSpanId((short) -1);
}
- // Copy the start and end character offsets
- b = Arrays.copyOfRange(bb.array(), 0, 8);
- this.matchPayload = Collections.singletonList(b);
+ // FIX ME
+ // Copy the start and end character offsets
+ b = Arrays.copyOfRange(bb.array(), 1, 9);
+ this.matchPayload = Collections.singletonList(b);
return;
}
@@ -144,28 +159,28 @@
@Override
public int end () {
- this.processPayload();
+ this.loadPayload();
return this.matchEndPosition;
};
@Override
public Collection<byte[]> getPayload () {
- this.processPayload();
+ this.loadPayload();
return this.matchPayload;
};
@Override
public boolean isPayloadAvailable () {
- this.processPayload();
+ this.loadPayload();
return !this.matchPayload.isEmpty();
};
@Override
public short getSpanId () {
- this.processPayload();
+ this.loadPayload();
return spanId;
};
@@ -178,7 +193,7 @@
if (hasMoreSpans && firstSpans.doc() < target
&& firstSpans.skipTo(target)) {
- return this.setToCurrent();
+ return this.advance();
};
hasMoreSpans = false;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index 76a1cec..4a383ee 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -36,6 +36,7 @@
protected short spanId;
protected boolean hasSpanId = false;
+ protected byte payloadTypeIdentifier;
public SimpleSpans () {
collectPayloads = true;
@@ -126,10 +127,10 @@
}
- @Override
+ @Override
public int end () {
return matchEndPosition;
- }
+ }
@Override
@@ -176,4 +177,22 @@
this.spanId = spanId;
}
+ /**
+ * Gets the payload type identifier (PTI) of the current span
+ *
+ * @return a payload type identifier
+ */
+ public byte getPayloadTypeIdentifier() {
+ return payloadTypeIdentifier;
+ }
+
+ /**
+ * Sets the payload type identifier (PTI) of the current span
+ *
+ * @param payloadTypeIdentifier
+ */
+ public void setPayloadTypeIdentifier(byte payloadTypeIdentifier) {
+ this.payloadTypeIdentifier = payloadTypeIdentifier;
+ }
+
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
index fb61883..ceee21d 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
@@ -36,13 +36,33 @@
fd.addTV(
"base",
"bcbabd",
- "[(0-1)s:a|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|<>:div#0-2$<i>2<s>2|@:class=header$<i>3<s>1|@:class=header$<i>2<s>2]"
- + "[(1-2)s:e|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<i>2<s>1]"
- + "[(2-3)s:e|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<i>5<s>1]"
- + "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=header$<i>5<s>1]"
- + "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|<>:a#4-5$<i>5<s>2|@:class=header$<i>5<s>2]"
- + "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>1|<>:div#5-6$<i>6<s>-1|@:class=header$<i>6<s>1]"
- + "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<i>7<s>1|@:class=header$<i>7<s>2]");
+ "[(0-1)s:a|_1#0-1|"
+ + "<>:div$<b>65<i>0<i>2<i>2<b>0<s>2|"
+ + "<>:div$<b>65<i>0<i>3<i>3<b>0<s>1|"
+ + "<>:s$<b>65<i>0<i>5<i>5<b>0<s>3|"
+ + "@:class=header$<i>3<s>1|@:class=header$<i>2<s>2]"
+
+ + "[(1-2)s:e|_2#1-2|"
+ + "<>:a$<b>65<i>1<i>2<i>2<b>0<s>1|@:class=header$<i>2<s>1]"
+
+ + "[(2-3)s:e|_3#2-3|"
+ + "<>:div$<b>65<i>2<i>5<i>5<b>0<s>1|@:class=time$<i>5<s>1]"
+
+ + "[(3-4)s:a|_4#3-4|"
+ + "<>:div$<b>65<i>3<i>5<i>5<b>0<s>1|@:class=header$<i>5<s>1]"
+
+ + "[(4-5)s:b|_5#4-5|"
+ + "<>:div$<b>65<i>4<i>5<i>5<b>0<s>1|"
+ + "<>:a$<b>65<i>4<i>5<i>5<b>0<s>2|@:class=header$<i>5<s>2]"
+
+ + "[(5-6)s:d|_6#5-6|"
+ + "<>:s$<b>65<i>5<i>6<i>6<b>0<s>1|"
+ + "<>:div$<b>65<i>5<i>6<i>6<b>0<s>2|@:class=header$<i>6<s>1]"
+
+ + "[(6-7)s:d|_7#6-7|"
+ + "<>:div$<b>65<i>6<i>7<i>7<b>0<s>1"
+ + "<>:s$<b>65<i>6<i>7<i>7<b>0<s>2|"
+ + "|@:class=header$<i>7<s>1|@:class=header$<i>7<s>2]");
return fd;
}
@@ -54,13 +74,24 @@
fd.addTV(
"base",
"bcbabd",
- "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|@:class=header$<i>3<s>1|@:class=title$<i>3<s>1|@:class=book$<i>3<s>1]"
- + "[(1-2)s:c|_2#1-2|<>:div#1-2$<i>2<s>1|@:class=header$<i>2<s>1|@:class=title$<i>2<s>1]"
- + "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=book$<i>5<s>1]"
- + "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<i>5<s>1]"
- + "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<i>5<s>1|@:class=book$<i>5<s>1|@:class=title$<i>5<s>1]"
- + "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<i>6<s>1]"
- + "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<i>7<s>1|@:class=title$<i>7<s>1]");
+ "[(0-1)s:b|_1#0-1|"
+ + "<>:div$<b>65<i>0<i>3<i>3<b>0<s>1|@:class=header$<i>3<s>1|@:class=title$<i>3<s>1|@:class=book$<i>3<s>1]"
+ + "<>:s<b>65<i>0<i>5<i>5<b>0<s>2|"
+ + "[(1-2)s:c|_2#1-2|"
+ + "<>:div$<b>65<i>1<i>2<i>2<b>0<s>1|@:class=header$<i>2<s>1|@:class=title$<i>2<s>1]"
+ + "[(2-3)s:b|_3#2-3|"
+ + "<>:div$<b>65<i>2<i>3<i>5<b>0<s>1|@:class=book$<i>5<s>1]"
+ + "[(3-4)s:a|_4#3-4|"
+ + "<>:div$<b>65<i>3<i>5<i>5<b>0<s>1|@:class=title$<i>5<s>1]"
+ + "[(4-5)s:b|_5#4-5|"
+ + "<>:div$<b>65<i>4<i>5<i>5<b>0<s>1|@:class=header$<i>5<s>1|@:class=book$<i>5<s>1|@:class=title$<i>5<s>1]"
+ + "[(5-6)s:d|_6#5-6|"
+ + "<>:div$<b>65<i>5<i>6<i>6<b>0<s>1|@:class=header$<i>6<s>1]"
+ + "<>:s$<b>65<i>5<i>6<i>6<b>0<s>2|"
+ + "[(6-7)s:d|_7#6-7|"
+ + "<>:div$<b>65<i>6<i>7<i>7<b>0<s>1|"
+ + "<>:s$<b>65<i>6<i>7<i>7<b>0<s>2|"
+ + "@:class=header$<i>7<s>1|@:class=title$<i>7<s>1]");
return fd;
}
@@ -72,13 +103,26 @@
fd.addTV(
"base",
"bcbabd",
- "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>1|<>:div#0-3$<i>3<s>2|@:class=header$<i>3<s>2|@:class=book$<i>5<s>1|@:class=book$<i>3<s>2]"
- + "[(1-2)s:e|_2#1-2|<>:div#1-2$<i>2<s>1|<>:a#1-2$<i>2<s>2|@:class=book$<i>2<s>2|@:class=header$<i>2<s>1]"
- + "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|<>:a#1-2$<i>2<s>2|@:class=header$<i>2<s>2|@:class=book$<i>5<s>1]"
- + "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<i>5<s>1]"
- + "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<i>5<s>1|@:class=book$<i>5<s>1]"
- + "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<i>6<s>1]"
- + "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<i>7<s>1|@:class=book$<i>7<s>2]");
+ "[(0-1)s:b|_1#0-1|"
+ + "<>:s$<b>65<i>0<i>5<i>5<b>0<s>1|"
+ + "<>:div$<b>65<i>0<i>3<i>3<b>0<s>2|@:class=header$<i>3<s>2|@:class=book$<i>5<s>1|@:class=book$<i>3<s>2]"
+ + "[(1-2)s:e|_2#1-2|"
+ + "<>:div$<b>65<i>1<i>2<i>2<b>0<s>1|"
+ + "<>:a$<b>65<i>1<i>2<i>2<b>0<s>2|@:class=book$<i>2<s>2|@:class=header$<i>2<s>1]"
+ + "[(2-3)s:b|_3#2-3|"
+ + "<>:div$<b>65<i>2<i>3<i>5<b>0<s>1|"
+ + "<>:a$<b>65<i>1<i>2<i>2<b>0<s>2|@:class=header$<i>2<s>2|@:class=book$<i>5<s>1]"
+ + "[(3-4)s:a|_4#3-4|"
+ + "<>:div$<b>65<i>3<i>5<i>5<b>0<s>1|@:class=title$<i>5<s>1]"
+ + "[(4-5)s:b|_5#4-5|"
+ + "<>:div$<b>65<i>4<i>5<i>5<b>0<s>1|@:class=header$<i>5<s>1|@:class=book$<i>5<s>1]"
+ + "[(5-6)s:d|_6#5-6|"
+ + "<>:s$<b>65<i>5<i>6<i>6<b>0<s>1|"
+ + "<>:div$<b>65<i>5<i>6<i>6<b>0<s>1|@:class=header$<i>6<s>1]"
+ + "[(6-7)s:d|_7#6-7|"
+ + "<>:div$<b>65<i>6<i>7<i>7<b>0<s>1|"
+ + "<>:s$<b>65<i>6<i>7<i>7<b>0<s>2|"
+ + "@:class=header$<i>7<s>1|@:class=book$<i>7<s>2]");
return fd;
}
@@ -97,21 +141,27 @@
SpanAttributeQuery saq = new SpanAttributeQuery(new SpanTermQuery(
new Term("base", "@:class=header")), true);
- // div with @class=header
- SpanQuery sq = new SpanWithAttributeQuery(new SpanElementQuery("base",
- "div"), saq, true);
+ SpanElementQuery seq = new SpanElementQuery("base", "div");
+
+ // div with @class=header
+ SpanQuery sq = new SpanWithAttributeQuery(seq, saq, true);
kr = ki.search(sq, (short) 10);
- assertEquals((long) 4, kr.getTotalResults());
- assertEquals(0, kr.getMatch(0).getStartPos());
- assertEquals(2, kr.getMatch(0).getEndPos());
- assertEquals(0, kr.getMatch(1).getStartPos());
- assertEquals(3, kr.getMatch(1).getEndPos());
- assertEquals(3, kr.getMatch(2).getStartPos());
- assertEquals(5, kr.getMatch(2).getEndPos());
- assertEquals(6, kr.getMatch(3).getStartPos());
- assertEquals(7, kr.getMatch(3).getEndPos());
+ // for (int i = 0; i < kr.getTotalResults(); i++) {
+ // System.out.println(kr.getMatch(i).getLocalDocID() + " "
+ // + kr.getMatch(i).startPos + " " + kr.getMatch(i).endPos);
+ // }
+ //
+ assertEquals((long) 4, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(2, kr.getMatch(0).getEndPos());
+ assertEquals(0, kr.getMatch(1).getStartPos());
+ assertEquals(3, kr.getMatch(1).getEndPos());
+ assertEquals(3, kr.getMatch(2).getStartPos());
+ assertEquals(5, kr.getMatch(2).getEndPos());
+ assertEquals(6, kr.getMatch(3).getStartPos());
+ assertEquals(7, kr.getMatch(3).getEndPos());
}
@@ -217,10 +267,7 @@
assertEquals(3, kr.getMatch(0).getStartPos());
assertEquals(5, kr.getMatch(0).getEndPos());
- // for (int i = 0; i < kr.getTotalResults(); i++) {
- // System.out.println(kr.getMatch(i).getLocalDocID() + " "
- // + kr.getMatch(i).startPos + " " + kr.getMatch(i).endPos);
- // }
+
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
index 6309d59..8f61549 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
@@ -1,6 +1,6 @@
package de.ids_mannheim.korap.index;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
import java.io.IOException;
@@ -10,11 +10,11 @@
import org.junit.Test;
import de.ids_mannheim.korap.KrillIndex;
-import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.query.DistanceConstraint;
import de.ids_mannheim.korap.query.SpanDistanceQuery;
import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.response.Result;
public class TestDistanceExclusionIndex {
@@ -122,6 +122,7 @@
false, true), true);
kr = ki.search(sq, (short) 10);
+
assertEquals((long) 1, kr.getTotalResults());
assertEquals(9, kr.getMatch(0).getStartPos());
assertEquals(10, kr.getMatch(0).getEndPos());
@@ -178,12 +179,14 @@
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-0");
fd.addTV("base", "text", "[(0-1)s:c|_1#0-1]" + "[(1-2)s:e|_2#1-2]"
- + "[(2-3)s:c|_3#2-3|<>:y#2-4$<i>4]"
- + "[(3-4)s:c|_4#3-4|<>:x#3-7$<i>7]"
- + "[(4-5)s:d|_5#4-5|<>:y#4-6$<i>6]"
- + "[(5-6)s:c|_6#5-6|<>:y#5-8$<i>8]" + "[(6-7)s:d|_7#6-7]"
- + "[(7-8)s:e|_8#7-8|<>:x#7-9$<i>9]" + "[(8-9)s:e|_9#8-9]"
- + "[(9-10)s:d|_10#9-10|<>:x#9-10$<i>10]");
+ + "[(2-3)s:c|_3#2-3|<>:y$<b>64<i>2<i>4<i>4]"
+ + "[(3-4)s:c|_4#3-4|<>:x$<b>64<i>3<i>7<i>7]"
+ + "[(4-5)s:d|_5#4-5|<>:y$<b>64<i>4<i>6<i>6]"
+ + "[(5-6)s:c|_6#5-6|<>:y$<b>64<i>5<i>8<i>8]"
+ + "[(6-7)s:d|_7#6-7]"
+ + "[(7-8)s:e|_8#7-8|<>:x$<b>64<i>7<i>9<i>9]"
+ + "[(8-9)s:e|_9#8-9]"
+ + "[(9-10)s:d|_10#9-10|<>:x$<b>64<i>9<i>10<i>10]");
return fd;
}