Fix minor indexer bugs
diff --git a/pom.xml b/pom.xml
index 9072dc8..651e2b5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -126,6 +126,7 @@
$ java -jar target/KorAP-lucene-index-X.XX-jar-with-dependencies.jar src/test/resources/wiki/
or
$ java -jar target/KorAP-lucene-index-X.XX-jar-with-dependencies.jar /home/ndiewald/Repositories/korap/KorAP-modules/KorAP-lucene-index/sandbox/toindex/A00
+ $ java -jar target/KorAP-lucene-index-0.21-jar-with-dependencies.jar /home/ndiewald/Repositories/KorAP/KorAP-modules/KorAP-lucene-index/sandbox/toindex/A00
-->
<plugin>
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndex.java b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
index fc0d31c..7492c7d 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndex.java
@@ -216,20 +216,26 @@
};
public FieldDocument addDoc (File json) throws IOException {
- FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
- return this.addDoc(fd);
+ FieldDocument fd = this.mapper.readValue(json, FieldDocument.class);
+ return this.addDoc(fd);
};
public FieldDocument addDocFile(String json) throws IOException {
- return this.addDocFile(json, false);
+ return this.addDocFile(json, false);
};
- public FieldDocument addDocFile(String json, boolean gzip) throws IOException {
+ public FieldDocument addDocFile(String json, boolean gzip) {
+ try {
if (gzip) {
- FieldDocument fd = this.mapper.readValue(new GZIPInputStream(new FileInputStream(json)), FieldDocument.class);
- return this.addDoc(fd);
+ FieldDocument fd = this.mapper.readValue(new GZIPInputStream(new FileInputStream(json)), FieldDocument.class);
+ return this.addDoc(fd);
};
return this.addDoc(json);
+ }
+ catch (IOException e) {
+ log.error("File json not found");
+ };
+ return (FieldDocument) null;
};
public void commit () throws IOException {
diff --git a/src/main/java/de/ids_mannheim/korap/KorapIndexer.java b/src/main/java/de/ids_mannheim/korap/KorapIndexer.java
index cf9355b..1bdfb8d 100644
--- a/src/main/java/de/ids_mannheim/korap/KorapIndexer.java
+++ b/src/main/java/de/ids_mannheim/korap/KorapIndexer.java
@@ -17,7 +17,10 @@
public KorapIndexer () throws IOException {
Properties prop = new Properties();
- FileReader fr = new FileReader(getClass().getResource("/korap.conf").getFile());
+
+ ClassLoader classLoader = getClass().getClassLoader();
+ InputStream fr = classLoader.getResourceAsStream("korap.conf");
+
prop.load(fr);
this.indexDir = prop.getProperty("lucene.index");
@@ -33,10 +36,7 @@
if (file.matches("^[^\\.].+?\\.json\\.gz$")) {
String found = dir.getPath() + '/' + file;
System.out.print(" Index " + found + " ... ");
- try {
- this.index.addDocFile(found, true);
- }
- catch (IOException e) {
+ if (this.index.addDocFile(found, true) == null) {
System.out.println("fail.");
continue;
};
diff --git a/src/main/resources/korap.conf b/src/main/resources/korap.conf
index e69de29..debbd62 100644
--- a/src/main/resources/korap.conf
+++ b/src/main/resources/korap.conf
@@ -0,0 +1,5 @@
+# Lucene Backend properties
+lucene.properties = true
+lucene.index = /home/ndiewald/Repositories/korap/KorAP-modules/KorAP-lucene-index/sandbox/index
+lucene.index.commit.count = 10000
+lucene.index.commit.log = log/korap.commit.log
\ No newline at end of file
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 0b4991c..bf8fd29 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,6 +1,6 @@
## logger file can be used with
-# log4j.rootLogger = DEBUG, stdout
+log4j.rootLogger = DEBUG, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.ElementSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.WithinSpans = TRACE, stdout
@@ -9,8 +9,8 @@
#log4j.logger.de.ids_mannheim.korap.query.spans.KorapTermSpan = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.ClassSpans = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
-log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
-log4j.logger.de.ids_mannheim.korap.KorapMatch = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.KorapIndex = TRACE, stdout
+#log4j.logger.de.ids_mannheim.korap.KorapMatch = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.KorapCollection = TRACE, stdout
#log4j.logger.de.ids_mannheim.korap.index.PositionsToOffset = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
index ea35a63..bcd9ed3 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
+++ b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
@@ -83,16 +83,15 @@
// Construct index
KorapIndex ki = new KorapIndex();
- FieldDocument fd;
// Indexing test files
for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
- fd = ki.addDocFile(
+ ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
);
};
ki.commit();
- fd = ki.addDocFile(getClass().getResource("/wiki/AUG-55286.json.gz").getFile(), true);
+ ki.addDocFile(getClass().getResource("/wiki/AUG-55286.json.gz").getFile(), true);
ki.commit();
diff --git a/src/test/resources/korap.conf b/src/test/resources/korap.conf
index 555b1a8..debbd62 100644
--- a/src/test/resources/korap.conf
+++ b/src/test/resources/korap.conf
@@ -1,5 +1,5 @@
# Lucene Backend properties
lucene.properties = true
lucene.index = /home/ndiewald/Repositories/korap/KorAP-modules/KorAP-lucene-index/sandbox/index
-lucene.index.commit.count = 5000
+lucene.index.commit.count = 10000
lucene.index.commit.log = log/korap.commit.log
\ No newline at end of file