Improve reader/writer handling (fixes #55)
Change-Id: Ia2e81a211b6a8c0ee7c2de133accfdb1e6bc3bc2
diff --git a/Changes b/Changes
index 23d6a7d..f967ed6 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,5 @@
-0.58.6 2019-05-02
- - [bugfix] Updated cache loading (fixed #55) (diewald, margaretha)
+0.58.6 2019-05-28
+ - [bugfix] Updated cache loading (fixed #55) (diewald, margaretha)
0.58.5 2019-03-18
- [bugfix] Fix bug where duplicate keys occured in
@@ -17,7 +17,7 @@
- [bugfix] Fixed #53 element distance query bug (margaretha)
- [bugfix] Workaround for #54 failing offsets due to
surrogate pairs (diewald)
- - Added isReaderOpen method (margaretha)
+ - [feature] Added isReaderOpen method (margaretha)
0.58.4 2019-02-05
- [cleanup] Remove deprecated methods setLicense/getLicense,
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index aecbf30..738748b 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -34,7 +34,6 @@
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.TermQuery;
@@ -155,9 +154,8 @@
private IndexReader reader;
private IndexWriter writer;
- private IndexWriterConfig config;
- private IndexSearcher searcher;
private boolean readerOpen = false;
+ private boolean writerOpen = false;
private Directory directory;
// The commit counter is only there for
@@ -217,18 +215,6 @@
*/
public KrillIndex (Directory directory) throws IOException {
this.directory = directory;
-
- // Add analyzers
- // TODO: Should probably not be here - make configurable
- Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
- analyzerPerField.put("textClass", new KeywordAnalyzer());
- analyzerPerField.put("keywords", new KeywordAnalyzer());
- analyzerPerField.put("foundries", new KeywordAnalyzer());
- PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
- new TextAnalyzer(), analyzerPerField);
-
- // Create configuration with base analyzer
- this.config = new IndexWriterConfig(analyzer);
};
@@ -279,37 +265,37 @@
*/
public IndexWriter writer () throws IOException {
// Open writer if not already opened
- if (this.writer == null)
- this.writer = new IndexWriter(this.directory, this.config);
+ if (!writerOpen)
+ this.openWriter();
+ if (!writerOpen)
+ return null;
return this.writer;
};
-
-
- /**
- * The Lucene {@link IndexSearcher} object.
- *
- * Will be created, in case it doesn't exist yet.
- *
- * @return The {@link IndexSearcher} object.
- */
- public IndexSearcher searcher () {
- if (this.searcher == null)
- this.searcher = new IndexSearcher(this.reader());
- return this.searcher;
- };
-
+
// Open index reader
- private void openReader () {
+ private void openWriter () {
+ if (writerOpen) {
+ return;
+ };
+
try {
- // open reader
- this.reader = DirectoryReader.open(this.directory);
- readerOpen = true;
- if (this.searcher != null)
- this.searcher = new IndexSearcher(reader);
+
+ // Add analyzers
+ // This is just for legacy reasons
+ Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
+ analyzerPerField.put("textClass", new KeywordAnalyzer());
+ analyzerPerField.put("keywords", new KeywordAnalyzer());
+ analyzerPerField.put("foundries", new KeywordAnalyzer());
+ PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
+ new TextAnalyzer(), analyzerPerField);
+
+ // Create configuration with base analyzer
+ this.writer = new IndexWriter(this.directory, new IndexWriterConfig(analyzer));
+ writerOpen = true;
}
- // Failed to open reader
+ // Failed to open writer
catch (IOException e) {
// e.printStackTrace();
log.warn(e.getLocalizedMessage());
@@ -317,19 +303,47 @@
};
+ // Open index reader
+ private void openReader () {
+ if (readerOpen) {
+ // this.reader = DirectoryReader.openIfChanged(this.reader)
+ return;
+ };
+
+ try {
+ // open reader
+ this.reader = DirectoryReader.open(this.directory);
+ readerOpen = true;
+ }
+
+ // Failed to open reader
+ catch (IOException e) {
+ // This is in tests most of the time
+ // no problem, because the message just says
+ // "No segments found", because the reader
+ // is empty initially.
+ log.warn(e.getLocalizedMessage());
+ };
+ };
+
+
// Close index reader
public void closeReader () throws IOException {
- if (readerOpen) {
+ if (readerOpen || this.reader != null) {
this.reader.close();
+ this.reader = null;
readerOpen = false;
};
};
// Close index writer
- private void closeWriter () throws IOException {
- if (this.writer != null)
+ public void closeWriter () throws IOException {
+ if (writerOpen || this.writer != null) {
this.writer.close();
+ this.writer = null;
+ writerOpen = false;
+ };
};
@@ -341,8 +355,8 @@
* @throws IOException
*/
public void close () throws IOException {
- this.closeReader();
this.closeWriter();
+ this.closeReader();
};
@@ -472,6 +486,9 @@
};
this.delDocs("textSigle", textSigle);
};
+ }
+ else {
+ log.warn("Reader is null");
};
}
@@ -683,12 +700,17 @@
try {
if (gzip) {
+ GZIPInputStream gzipFile = new GZIPInputStream(json);
+
// Create json field document
FieldDocument fd = this.mapper.readValue(
- new GZIPInputStream(json), FieldDocument.class);
+ gzipFile, FieldDocument.class);
+ gzipFile.close();
return fd;
};
- return this.mapper.readValue(json, FieldDocument.class);
+ FieldDocument field = this.mapper.readValue(json, FieldDocument.class);
+ json.close();
+ return field;
}
// Fail to add json object
diff --git a/src/main/java/de/ids_mannheim/korap/index/Indexer.java b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
index 3759329..d685b63 100644
--- a/src/main/java/de/ids_mannheim/korap/index/Indexer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
@@ -107,7 +107,7 @@
try {
if (addInsteadOfUpsert) {
- log.info("Add " + file + " to the index. ");
+ log.info("{} Add {} to the index. ", this.count, file);
if (this.index.addDoc(new FileInputStream(file),
true) == null) {
log.warn("fail.");
@@ -115,7 +115,7 @@
}
}
else {
- log.info("Add or update " + file + " to the index. ");
+ log.info("{} Add or update {} to the index. ", this.count, file);
if (this.index.upsertDoc(new FileInputStream(file),
true) == null) {
log.warn("fail.");
@@ -128,8 +128,12 @@
}
// Commit in case the commit count is reached
- if ((this.count % this.commitCount) == 0)
+ if ((this.count % this.commitCount) == 0) {
+
+ // This will be done in addition to the
+ // autocommit initiated by KrillIndex
this.commit();
+ }
}
catch (FileNotFoundException e) {
log.error("File " + file + " is not found!");
@@ -156,9 +160,10 @@
}
}
- private void closeIndex() throws IOException{
+ private void closeIndex() throws IOException {
this.commit();
- index.close();
+ this.index.closeReader();
+ this.index.closeWriter();
}
/**
@@ -170,7 +175,7 @@
* and a list of directories
* @throws IOException
*/
- public static void main (String[] argv) throws IOException {
+ public static void main (String[] argv) {
Options options = new Options();
options.addOption(Option.builder("c").longOpt("config")
@@ -190,7 +195,6 @@
options.addOption(Option.builder("a").longOpt("addInsteadofUpsert")
.desc("Always add files to the index, never update")
.build());
-
CommandLineParser parser = new DefaultParser();
@@ -212,6 +216,7 @@
if (cmd.hasOption("a")) {
addInsteadOfUpsert = true;
};
+
}
catch (MissingOptionException e) {
HelpFormatter formatter = new HelpFormatter();
@@ -229,30 +234,35 @@
// Load properties
Properties prop = KrillProperties.loadProperties(propFile);
- // Get indexer object
- Indexer indexer = new Indexer(prop);
+ try {
+ // Get indexer object
+ Indexer indexer = new Indexer(prop);
- // Iterate over list of directories
- for (String arg : inputDirectories) {
- log.info("Indexing files in " + arg);
- File f = new File(arg);
- if (f.isDirectory())
- indexer.parse(f);
+ // Iterate over list of directories
+ for (String arg : inputDirectories) {
+ log.info("Indexing files in " + arg);
+ File f = new File(arg);
+ if (f.isDirectory())
+ indexer.parse(f);
+ }
+ indexer.closeIndex();
+
+ // Final commit
+ log.info("Finished indexing.");
+ // Finish indexing
+ String message = "Added ";
+ if (!addInsteadOfUpsert)
+ message += "or updated ";
+ message += indexer.count + " file";
+ if (indexer.count > 1) {
+ message += "s";
+ }
+ System.out.println(message + ".");
}
-
- indexer.closeIndex();
-
- // Final commit
- log.info("Finished indexing.");
- // Finish indexing
- String message = "Added ";
- if (!addInsteadOfUpsert)
- message += "or updated ";
- message += indexer.count + " file";
- if (indexer.count > 1) {
- message += "s";
+
+ catch (IOException e) {
+ log.error("Unexpected error: " + e);
+ e.printStackTrace();
}
- System.out.print(message + ".");
-
}
}
diff --git a/src/test/java/de/ids_mannheim/korap/TestIndexer.java b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
index dc78958..c575f12 100644
--- a/src/test/java/de/ids_mannheim/korap/TestIndexer.java
+++ b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
@@ -28,22 +28,22 @@
@Test
public void testArguments () throws IOException {
Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
- "-i", "src/test/resources/bzk" });
- assertEquals("Added or updated 1 file.", outputStream.toString());
+ "-i", "src/test/resources/bzk"});
+ assertEquals("Added or updated 1 file.\n", outputStream.toString());
}
@Test
public void testOutputArgument () throws IOException {
Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
- "-i", "src/test/resources/bzk", "-o", "test-output"});
- assertEquals("Added or updated 1 file.", outputStream.toString());
+ "-i", "src/test/resources/bzk", "-o", "test-output"});
+ assertEquals("Added or updated 1 file.\n", outputStream.toString());
}
@Test
public void testMultipleInputFiles () throws IOException {
Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
- "-i", "src/test/resources/wiki" });
- assertEquals("Added or updated 18 files.", outputStream.toString());
+ "-i", "src/test/resources/wiki"});
+ assertEquals("Added or updated 18 files.\n", outputStream.toString());
}
@@ -52,19 +52,19 @@
Indexer.main(new String[] {
"-c", "src/test/resources/krill.properties",
"-i", "src/test/resources/bzk",
- "-a" });
+ "-a"});
logger.info(outputStream.toString());
- assertEquals(outputStream.toString(), "Added 1 file.");
+ assertEquals(outputStream.toString(), "Added 1 file.\n");
}
@Test
public void testMultipleInputDirectories () throws IOException {
Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
- "-i",
- "src/test/resources/bzk;src/test/resources/goe;src/test/resources/sgbr",
- "-o", "test-index" });
- assertEquals("Added or updated 5 files.", outputStream.toString());
+ "-i",
+ "src/test/resources/bzk;src/test/resources/goe;src/test/resources/sgbr",
+ "-o", "test-index"});
+ assertEquals("Added or updated 5 files.\n", outputStream.toString());
}
@Test
@@ -78,7 +78,7 @@
@Test
public void testMissingConfig () throws IOException {
Indexer.main(new String[] { "-i", "src/test/resources/bzk",
- "-o test-index" });
+ "-o test-index"});
logger.info(outputStream.toString());
assertEquals(true, outputStream.toString().startsWith(info));
}
@@ -86,7 +86,7 @@
@Test
public void testMissingInput () throws IOException {
Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
- "-o", "test-index" });
+ "-o", "test-index"});
logger.info(outputStream.toString());
assertEquals(true, outputStream.toString().startsWith(info));
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index 923fec1..282ee80 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -669,6 +669,18 @@
ki.upsertDoc(getClass().getResourceAsStream("/wiki/WPD17-H81-63495.json.gz"), true);
ki.commit();
assertEquals(ki.numberOf("documents"), 3);
+
+ ki.close();
+
+ fd = new FieldDocument();
+ fd.addString("textSigle", "AAA/DDD/005");
+ fd.addString("content", "Example4");
+
+ ki.upsertDoc(fd);
+ ki.commit();
+
+ assertEquals(ki.numberOf("documents"), 4);
+
};