Updated Lucene to 4.3.1
diff --git a/CHANGES b/CHANGES
index b591594..77d7ffb 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,10 +1,13 @@
-0.25.1 2013-12-28
- - Support for pubDate collections.
- - Changed versioning scheme.
+0.25.3 2014-01-10
+ - Updated Lucene to 4.3.1.
0.25.2 2013-12-29
- Fixed highlight bug with empty reopened tags.
+0.25.1 2013-12-28
+ - Support for pubDate collections.
+ - Changed versioning scheme.
+
0.25 2013-12-20
- Support for Wildcard Queries.
- Improved support for regular expressions.
diff --git a/pom.xml b/pom.xml
index 50707e7..36ec331 100644
--- a/pom.xml
+++ b/pom.xml
@@ -9,7 +9,7 @@
<groupId>KorAP-modules</groupId>
<artifactId>KorAP-lucene-index</artifactId>
- <version>0.25.2</version>
+ <version>0.25.3</version>
<packaging>jar</packaging>
<name>KorAP-lucene-index</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index 2384f12..9b740ea 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -573,25 +573,18 @@
// Iterate over all terms in the document
while (termsEnum.next() != null) {
- log.trace("> {}", termsEnum.term().utf8ToString());
-
docs = termsEnum.docsAndPositions(
- null, //bitset.bits(),
- null, //docs,
- DocsAndPositionsEnum.FLAG_PAYLOADS
+ null, //bitset.bits(),
+ docs,
+ DocsAndPositionsEnum.FLAG_PAYLOADS
);
docs.nextDoc();
// log.trace("Check for '{}'({}) in document {}({}) from {}", termsEnum.term().utf8ToString(), termsEnum.totalTermFreq(), docs.docID(), localDocID, bitset.cardinality());
- docs.nextPosition();
- if (docs.docID() == DocIdSetIterator.NO_MORE_DOCS ||
- (docs.docID() != localDocID && docs.advance(localDocID) != localDocID))
+ if (docs.docID() == DocIdSetIterator.NO_MORE_DOCS)
continue;
- log.trace("Frequencies: {}!", docs.getPayload());
-
-
// Init docs
/*
if (docs.advance(localDocID) == DocIdSetIterator.NO_MORE_DOCS || docs.docID() != localDocID)
@@ -600,6 +593,7 @@
// How often does this term occur in the document?
int termOccurrences = docs.freq();
+ String termString = termsEnum.term().utf8ToString();
// Iterate over all occurrences
for (int i = 0; i < termOccurrences; i++) {
@@ -607,13 +601,24 @@
// Init positions and get the current
int pos = docs.nextPosition();
+ log.trace(">> {}: {}-{}-{}!",
+ termString, docs.freq(), pos, docs.getPayload());
+
+ BytesRef payload = docs.getPayload();
+
+ byte[] pl = new byte[12];
+
+ if (payload != null)
+ System.arraycopy(payload.bytes, payload.offset, pl, 0, payload.length);
+
+
// Check, if the position of the term is in the interesting area
if (pos >= startPos && pos <= endPos) {
termList.add(new TermInfo(
- termsEnum.term().utf8ToString(),
+ termString,
pos,
- docs.getPayload()
- ));
+ pl
+ ));
};
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
index 6859c8f..379a57e 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
@@ -1,18 +1,17 @@
package de.ids_mannheim.korap.index;
import java.util.*;
-import org.apache.lucene.util.BytesRef;
public class TermInfo {
private String prefix, foundry, layer, value;
private int pos = 0;
- private BytesRef payload;
+ private byte[] payload;
// Temporary:
private String name;
- public TermInfo (String name, int pos, BytesRef payload) {
+ public TermInfo (String name, int pos, byte[] payload) {
this.name = name;
this.pos = pos;
this.payload = payload;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index a6e96e5..acf1c2f 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -64,7 +64,7 @@
);
};
ki.commit();
- System.err.println(ki.getMatch("test").toJSON());
+ // System.err.println(ki.getMatch("test").toJSON());
};
};