Update to Lucene 5 (third step)
Change-Id: I9edf56d338778cae9d68f86d4fada37fbb2cc96b
diff --git a/pom.xml b/pom.xml
index a72452b..e27e37d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -110,7 +110,7 @@
<artifactId>lucene-core</artifactId>
<groupId>org.apache.lucene</groupId>
<type>jar</type>
- <version>5.3.1</version>
+ <version>5.0.0</version>
</dependency>
<!-- Lucene queryparser dependency -->
@@ -118,7 +118,7 @@
<artifactId>lucene-queryparser</artifactId>
<groupId>org.apache.lucene</groupId>
<type>jar</type>
- <version>5.3.1</version>
+ <version>5.0.0</version>
</dependency>
<!-- Lucene analyzers dependency -->
@@ -126,7 +126,7 @@
<artifactId>lucene-analyzers-common</artifactId>
<groupId>org.apache.lucene</groupId>
<type>jar</type>
- <version>5.3.1</version>
+ <version>5.0.0</version>
</dependency>
<dependency>
diff --git a/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java b/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java
index 13bc89f..4b363dd 100644
--- a/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java
+++ b/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java
@@ -48,7 +48,7 @@
.compile("\\[(?:\\([0-9]+-[0-9]+\\))?([^\\]]+?)\\]");
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
+ public static final boolean DEBUG = true;
private final Logger log = LoggerFactory
.getLogger(MultiTermTokenStream.class);
@@ -56,6 +56,8 @@
private int mttIndex = 0, mtIndex = 0;
private short i = 0;
+ private ByteBuffer payload = ByteBuffer.allocate(36);
+
/**
* Construct a new MultiTermTokenStream object.
@@ -399,7 +401,7 @@
charTermAttr.setEmpty();
charTermAttr.append(mt.term);
- ByteBuffer payload = ByteBuffer.allocateDirect(16);
+ payload.rewind();
// There is offset information
if (mt.start != mt.end) {
@@ -408,20 +410,41 @@
mt.end);
// Add offsets to BytesRef payload
- payload.put(int2byte(mt.start));
- payload.put(int2byte(mt.end));
+ payload.putInt(mt.start);
+ payload.putInt(mt.end);
};
// There is payload in the MultiTerm
if (mt.payload != null) {
+
payload.put(mt.payload.bytes);
+
+ if (payload.position() > 18) {
+ System.err.println(mt.toString() + " has " + payload.toString());
+ };
+
if (DEBUG)
log.trace("Create payload[1] {}", payload.toString());
};
// There is payload in the current token to index
+ // This seems to be heavily risky!
if (payload.position() > 0) {
- payloadAttr.setPayload(new BytesRef(payload.array()));
+
+ if (payload.hasArray()) {
+ payloadAttr.setPayload(
+ new BytesRef(
+ Arrays.copyOfRange(payload.array(),
+ payload.arrayOffset(),
+ payload.arrayOffset() + payload.position()
+ )
+ )
+ );
+ }
+ else {
+ log.error("This should never happen!");
+ };
+
if (DEBUG)
log.trace("Set payload[2] {}", payload.toString());
};
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillByte.java b/src/main/java/de/ids_mannheim/korap/util/KrillByte.java
index 6ae3256..419a43f 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillByte.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillByte.java
@@ -19,6 +19,7 @@
*/
// Based on
// http://www.tutorials.de/java/228129-konvertierung-von-integer-byte-array.html
+ // CHECK: int2byte may be out of use
public static byte[] int2byte (int number) {
byte[] data = new byte[4];
for (int i = 0; i < 4; ++i) {
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
index 8618592..22cbc67 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -95,7 +95,7 @@
fd.addTV(
"base",
"ceccecdeed",
- "[(0-1)s:c$<s>2|<>:p$#0-3$<i>3<s>1|_0#0-1|"
+ "[(0-1)s:c$<s>2|<>:p#0-3$<i>3<s>1|_0#0-1|"
+ ">:xip/syntax-dep_rel$<i>3<i>6<i>9<s>2<s>1<s>1|"
+ ">:xip/syntax-dep_rel$<i>6<i>9<s>1<s>2<s>1|"
+ "r@:func=subj$<s>2]"
@@ -104,7 +104,7 @@
+ "[(3-4)s:c|s:b|_3#3-4]"
+ "[(4-5)s:e|s:d|_4#4-5]"
+ "[(5-6)s:c|_5#5-6]"
- + "[(6-7)s:d$<s>2|<>:p$#6-9$<i>9<s>1|_6#6-7|"
+ + "[(6-7)s:d$<s>2|<>:p#6-9$<i>9<s>1|_6#6-7|"
+ "<:xip/syntax-dep_rel$<i>9<b>0<i>1<s>1<s>1<s>2|"
+ ">:xip/syntax-dep_rel$<i>9<b>0<i>9<s>3<s>1<s>1|"
+ "<:xip/syntax-dep_rel$<i>9<i>1<i>3<s>2<s>1<s>1|"
@@ -634,5 +634,4 @@
assertEquals(4, kr.getMatch(3).getStartPos());
assertEquals(7, kr.getMatch(3).getEndPos());
}
-
-}
\ No newline at end of file
+}