Improve reader/writer handling (fixes #55)
Change-Id: Ia2e81a211b6a8c0ee7c2de133accfdb1e6bc3bc2
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index aecbf30..738748b 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -34,7 +34,6 @@
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.TermQuery;
@@ -155,9 +154,8 @@
private IndexReader reader;
private IndexWriter writer;
- private IndexWriterConfig config;
- private IndexSearcher searcher;
private boolean readerOpen = false;
+ private boolean writerOpen = false;
private Directory directory;
// The commit counter is only there for
@@ -217,18 +215,6 @@
*/
public KrillIndex (Directory directory) throws IOException {
this.directory = directory;
-
- // Add analyzers
- // TODO: Should probably not be here - make configurable
- Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
- analyzerPerField.put("textClass", new KeywordAnalyzer());
- analyzerPerField.put("keywords", new KeywordAnalyzer());
- analyzerPerField.put("foundries", new KeywordAnalyzer());
- PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
- new TextAnalyzer(), analyzerPerField);
-
- // Create configuration with base analyzer
- this.config = new IndexWriterConfig(analyzer);
};
@@ -279,37 +265,37 @@
*/
public IndexWriter writer () throws IOException {
// Open writer if not already opened
- if (this.writer == null)
- this.writer = new IndexWriter(this.directory, this.config);
+ if (!writerOpen)
+ this.openWriter();
+ if (!writerOpen)
+ return null;
return this.writer;
};
-
-
- /**
- * The Lucene {@link IndexSearcher} object.
- *
- * Will be created, in case it doesn't exist yet.
- *
- * @return The {@link IndexSearcher} object.
- */
- public IndexSearcher searcher () {
- if (this.searcher == null)
- this.searcher = new IndexSearcher(this.reader());
- return this.searcher;
- };
-
+
// Open index reader
- private void openReader () {
+ private void openWriter () {
+ if (writerOpen) {
+ return;
+ };
+
try {
- // open reader
- this.reader = DirectoryReader.open(this.directory);
- readerOpen = true;
- if (this.searcher != null)
- this.searcher = new IndexSearcher(reader);
+
+ // Add analyzers
+ // This is just for legacy reasons
+ Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
+ analyzerPerField.put("textClass", new KeywordAnalyzer());
+ analyzerPerField.put("keywords", new KeywordAnalyzer());
+ analyzerPerField.put("foundries", new KeywordAnalyzer());
+ PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
+ new TextAnalyzer(), analyzerPerField);
+
+ // Create configuration with base analyzer
+ this.writer = new IndexWriter(this.directory, new IndexWriterConfig(analyzer));
+ writerOpen = true;
}
- // Failed to open reader
+ // Failed to open writer
catch (IOException e) {
// e.printStackTrace();
log.warn(e.getLocalizedMessage());
@@ -317,19 +303,47 @@
};
+ // Open index reader
+ private void openReader () {
+ if (readerOpen) {
+ // this.reader = DirectoryReader.openIfChanged(this.reader)
+ return;
+ };
+
+ try {
+ // open reader
+ this.reader = DirectoryReader.open(this.directory);
+ readerOpen = true;
+ }
+
+ // Failed to open reader
+ catch (IOException e) {
+ // This is in tests most of the time
+ // no problem, because the message just says
+ // "No segments found", because the reader
+ // is empty initially.
+ log.warn(e.getLocalizedMessage());
+ };
+ };
+
+
// Close index reader
public void closeReader () throws IOException {
- if (readerOpen) {
+ if (readerOpen || this.reader != null) {
this.reader.close();
+ this.reader = null;
readerOpen = false;
};
};
// Close index writer
- private void closeWriter () throws IOException {
- if (this.writer != null)
+ public void closeWriter () throws IOException {
+ if (writerOpen || this.writer != null) {
this.writer.close();
+ this.writer = null;
+ writerOpen = false;
+ };
};
@@ -341,8 +355,8 @@
* @throws IOException
*/
public void close () throws IOException {
- this.closeReader();
this.closeWriter();
+ this.closeReader();
};
@@ -472,6 +486,9 @@
};
this.delDocs("textSigle", textSigle);
};
+ }
+ else {
+ log.warn("Reader is null");
};
}
@@ -683,12 +700,17 @@
try {
if (gzip) {
+ GZIPInputStream gzipFile = new GZIPInputStream(json);
+
// Create json field document
FieldDocument fd = this.mapper.readValue(
- new GZIPInputStream(json), FieldDocument.class);
+ gzipFile, FieldDocument.class);
+ gzipFile.close();
return fd;
};
- return this.mapper.readValue(json, FieldDocument.class);
+ FieldDocument field = this.mapper.readValue(json, FieldDocument.class);
+ json.close();
+ return field;
}
// Fail to add json object
diff --git a/src/main/java/de/ids_mannheim/korap/index/Indexer.java b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
index 3759329..d685b63 100644
--- a/src/main/java/de/ids_mannheim/korap/index/Indexer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
@@ -107,7 +107,7 @@
try {
if (addInsteadOfUpsert) {
- log.info("Add " + file + " to the index. ");
+ log.info("{} Add {} to the index. ", this.count, file);
if (this.index.addDoc(new FileInputStream(file),
true) == null) {
log.warn("fail.");
@@ -115,7 +115,7 @@
}
}
else {
- log.info("Add or update " + file + " to the index. ");
+ log.info("{} Add or update {} to the index. ", this.count, file);
if (this.index.upsertDoc(new FileInputStream(file),
true) == null) {
log.warn("fail.");
@@ -128,8 +128,12 @@
}
// Commit in case the commit count is reached
- if ((this.count % this.commitCount) == 0)
+ if ((this.count % this.commitCount) == 0) {
+
+ // This will be done in addition to the
+ // autocommit initiated by KrillIndex
this.commit();
+ }
}
catch (FileNotFoundException e) {
log.error("File " + file + " is not found!");
@@ -156,9 +160,10 @@
}
}
- private void closeIndex() throws IOException{
+ private void closeIndex() throws IOException {
this.commit();
- index.close();
+ this.index.closeReader();
+ this.index.closeWriter();
}
/**
@@ -170,7 +175,7 @@
* and a list of directories
* @throws IOException
*/
- public static void main (String[] argv) throws IOException {
+ public static void main (String[] argv) {
Options options = new Options();
options.addOption(Option.builder("c").longOpt("config")
@@ -190,7 +195,6 @@
options.addOption(Option.builder("a").longOpt("addInsteadofUpsert")
.desc("Always add files to the index, never update")
.build());
-
CommandLineParser parser = new DefaultParser();
@@ -212,6 +216,7 @@
if (cmd.hasOption("a")) {
addInsteadOfUpsert = true;
};
+
}
catch (MissingOptionException e) {
HelpFormatter formatter = new HelpFormatter();
@@ -229,30 +234,35 @@
// Load properties
Properties prop = KrillProperties.loadProperties(propFile);
- // Get indexer object
- Indexer indexer = new Indexer(prop);
+ try {
+ // Get indexer object
+ Indexer indexer = new Indexer(prop);
- // Iterate over list of directories
- for (String arg : inputDirectories) {
- log.info("Indexing files in " + arg);
- File f = new File(arg);
- if (f.isDirectory())
- indexer.parse(f);
+ // Iterate over list of directories
+ for (String arg : inputDirectories) {
+ log.info("Indexing files in " + arg);
+ File f = new File(arg);
+ if (f.isDirectory())
+ indexer.parse(f);
+ }
+ indexer.closeIndex();
+
+ // Final commit
+ log.info("Finished indexing.");
+ // Finish indexing
+ String message = "Added ";
+ if (!addInsteadOfUpsert)
+ message += "or updated ";
+ message += indexer.count + " file";
+ if (indexer.count > 1) {
+ message += "s";
+ }
+ System.out.println(message + ".");
}
-
- indexer.closeIndex();
-
- // Final commit
- log.info("Finished indexing.");
- // Finish indexing
- String message = "Added ";
- if (!addInsteadOfUpsert)
- message += "or updated ";
- message += indexer.count + " file";
- if (indexer.count > 1) {
- message += "s";
+
+ catch (IOException e) {
+ log.error("Unexpected error: " + e);
+ e.printStackTrace();
}
- System.out.print(message + ".");
-
}
}