Fixed sentence extension tests for payload IDs
Change-Id: I95173fdd581f939163a1ec3256667181c444ec82
diff --git a/Changes b/Changes
index 174026b..95bc323 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,6 @@
-0.55 2015-12-02
- - Introducing payload ids to the index structure
+0.55 2016-02-04
+ - [feature] Introducing payload identifiers to the index
+ structure (diewald, margaretha)
0.54 2015-11-13
- [performance] Updated Lucene dependency from 4.10.3
diff --git a/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java b/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java
index 4b363dd..02f0d5e 100644
--- a/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java
+++ b/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java
@@ -419,10 +419,6 @@
payload.put(mt.payload.bytes);
- if (payload.position() > 18) {
- System.err.println(mt.toString() + " has " + payload.toString());
- };
-
if (DEBUG)
log.trace("Create payload[1] {}", payload.toString());
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 9bbef70..2027ab5 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -786,7 +786,7 @@
try {
// Store character offsets in ByteBuffer
- ByteBuffer bb = ByteBuffer.allocate(8);
+ ByteBuffer bb = ByteBuffer.allocate(24);
SpanElementQuery cquery = new SpanElementQuery(field, element);
@@ -797,7 +797,7 @@
int newStartChar = -1, newEndChar = -1;
if (DEBUG)
- log.trace("Extend match to context boundary with {} in {}",
+ log.trace("Extend match to context boundary with {} in docID {}",
cquery.toString(), this.localDocID);
while (true) {
@@ -831,18 +831,19 @@
for (byte[] b : contextSpans.getPayload()) {
// Not an element span
- if (b.length != 8)
+ if (b[0] != (byte) 64)
continue;
- bb.put(b);
bb.rewind();
+ bb.put(b);
+ bb.position(1);
newStartChar = bb.getInt();
newEndChar = bb.getInt();
break;
};
}
catch (Exception e) {
- log.warn(e.getMessage());
+ log.warn("Some problems with ByteBuffer: " + e.getMessage());
};
};
}
@@ -862,11 +863,12 @@
for (byte[] b : contextSpans.getPayload()) {
// Not an element span
- if (b.length != 8)
+ if (b[0] != (byte) 64)
continue;
- bb.put(b);
bb.rewind();
+ bb.put(b);
+ bb.position(1);
newEndChar = bb.getInt(1);
break;
};
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 850a404..5fed33b 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-# log4j.rootLogger = ERROR, stdout
+log4j.rootLogger = ERROR, stdout
# Queries:
# log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -28,7 +28,7 @@
# log4j.logger.de.ids_mannheim.korap.response.Match = TRACE, stdout
# Index:
-log4j.logger.de.ids_mannheim.korap.KrillIndex = TRACE, stdout
+# log4j.logger.de.ids_mannheim.korap.KrillIndex = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.index.PositionsToOffset = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.index.MultiTermTokenStream = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java
index b7125d5..8bc0242 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java
@@ -72,11 +72,14 @@
// System.out.println(ref.toString());
kr = ki.search(ref, (short) 10);
+ /*
for (Match km : kr.getMatches()) {
+
System.out.println(km.getStartPos() + "," + km.getEndPos() + " "
+ km.getSnippetBrackets());
}
System.out.println(kr.getTotalResults());
+ */
assertEquals(
"spanReference(spanDistance(focus(1: focus(#[1,2]spanSegment("
+ "focus(#1: spanSegment(spanRelation(base:<:child-of), focus(2: spanNext("
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
index 4c4ab60..6f899a8 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -233,7 +233,6 @@
assertEquals(9, kr.getMatch(6).getEndPos());
// check target
-
}
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index bdbb580..4c69178 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -951,7 +951,8 @@
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
- for (String i : new String[] { "00001", "00002", "00003", "00004",
+ for (String i : new String[] { "00001",
+ "00002", "00003", "00004",
"00005", "00006", "02439" }) {
ki.addDoc(
getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
@@ -975,12 +976,15 @@
assertEquals(kr.getTotalResults(), 3);
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
+
assertFalse(kr.getContext().toJsonNode().toString().equals("\"s\""));
json = getString(getClass().getResource(
"/queries/bsp-context-sentence.jsonld").getFile());
kr = new Krill(json).apply(ki);
+ assertEquals(kr.getContext().toJsonNode().toString(), "\"s\"");
+
assertEquals(kr.getMatch(0).getSnippetBrackets(),
"steht a für den dezimalen [Wert] 97 sowohl im ASCII-"
+ " als auch im Unicode-Zeichensatz");
@@ -991,8 +995,6 @@
"In einem Zahlensystem mit einer Basis größer "
+ "als 10 steht A oder a häufig für den dezimalen"
+ " [Wert] 10, siehe auch Hexadezimalsystem.");
-
- assertEquals(kr.getContext().toJsonNode().toString(), "\"s\"");
};