Improve reader/writer handling (fixes #55)

Change-Id: Ia2e81a211b6a8c0ee7c2de133accfdb1e6bc3bc2
diff --git a/Changes b/Changes
index 23d6a7d..f967ed6 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,5 @@
-0.58.6 2019-05-02
-	- [bugfix] Updated cache loading (fixed #55) (diewald, margaretha)
+0.58.6 2019-05-28
+    - [bugfix] Updated cache loading (fixed #55) (diewald, margaretha)
 
 0.58.5 2019-03-18
     - [bugfix] Fix bug where duplicate keys occured in
@@ -17,7 +17,7 @@
     - [bugfix] Fixed #53 element distance query bug (margaretha)
     - [bugfix] Workaround for #54 failing offsets due to
       surrogate pairs (diewald)
-    - Added isReaderOpen method (margaretha)
+    - [feature] Added isReaderOpen method (margaretha)
 
 0.58.4 2019-02-05
     - [cleanup] Remove deprecated methods setLicense/getLicense,
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index aecbf30..738748b 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -34,7 +34,6 @@
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryWrapperFilter;
 import org.apache.lucene.search.TermQuery;
@@ -155,9 +154,8 @@
     private IndexReader reader;
 
     private IndexWriter writer;
-    private IndexWriterConfig config;
-    private IndexSearcher searcher;
     private boolean readerOpen = false;
+    private boolean writerOpen = false;
     private Directory directory;
 
     // The commit counter is only there for
@@ -217,18 +215,6 @@
      */
     public KrillIndex (Directory directory) throws IOException {
         this.directory = directory;
-
-        // Add analyzers
-        // TODO: Should probably not be here - make configurable
-        Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
-        analyzerPerField.put("textClass", new KeywordAnalyzer());
-        analyzerPerField.put("keywords", new KeywordAnalyzer());
-        analyzerPerField.put("foundries", new KeywordAnalyzer());
-        PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
-                new TextAnalyzer(), analyzerPerField);
-
-        // Create configuration with base analyzer
-        this.config = new IndexWriterConfig(analyzer);
     };
 
 
@@ -279,37 +265,37 @@
      */
     public IndexWriter writer () throws IOException {
         // Open writer if not already opened
-        if (this.writer == null)
-            this.writer = new IndexWriter(this.directory, this.config);
+        if (!writerOpen)
+            this.openWriter();
+        if (!writerOpen)
+            return null;
         return this.writer;
     };
-
-
-    /**
-     * The Lucene {@link IndexSearcher} object.
-     * 
-     * Will be created, in case it doesn't exist yet.
-     * 
-     * @return The {@link IndexSearcher} object.
-     */
-    public IndexSearcher searcher () {
-        if (this.searcher == null)
-            this.searcher = new IndexSearcher(this.reader());
-        return this.searcher;
-    };
-
+    
 
     // Open index reader
-    private void openReader () {
+    private void openWriter () {
+        if (writerOpen) {
+            return;
+        };
+
         try {
-            // open reader
-            this.reader = DirectoryReader.open(this.directory);
-            readerOpen = true;
-            if (this.searcher != null)
-                this.searcher = new IndexSearcher(reader);
+
+            // Add analyzers
+            // This is just for legacy reasons
+            Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
+            analyzerPerField.put("textClass", new KeywordAnalyzer());
+            analyzerPerField.put("keywords", new KeywordAnalyzer());
+            analyzerPerField.put("foundries", new KeywordAnalyzer());
+            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
+                new TextAnalyzer(), analyzerPerField);
+
+            // Create configuration with base analyzer
+            this.writer = new IndexWriter(this.directory, new IndexWriterConfig(analyzer));
+            writerOpen = true;
         }
 
-        // Failed to open reader
+        // Failed to open writer
         catch (IOException e) {
             // e.printStackTrace();
             log.warn(e.getLocalizedMessage());
@@ -317,19 +303,47 @@
     };
 
 
+    // Open index reader
+    private void openReader () {
+        if (readerOpen) {
+            // this.reader = DirectoryReader.openIfChanged(this.reader)
+            return;
+        };
+
+        try {
+            // open reader
+            this.reader = DirectoryReader.open(this.directory);
+            readerOpen = true;
+        }
+
+        // Failed to open reader
+        catch (IOException e) {
+            // This is in tests most of the time
+            // no problem, because the message just says
+            // "No segments found", because the reader
+            // is empty initially.
+            log.warn(e.getLocalizedMessage());
+        };
+    };
+
+
     // Close index reader
     public void closeReader () throws IOException {
-        if (readerOpen) {
+        if (readerOpen || this.reader != null) {
             this.reader.close();
+            this.reader = null;
             readerOpen = false;
         };
     };
 
 
     // Close index writer
-    private void closeWriter () throws IOException {
-        if (this.writer != null)
+    public void closeWriter () throws IOException {
+        if (writerOpen || this.writer != null) {
             this.writer.close();
+            this.writer = null;
+            writerOpen = false;
+        };
     };
 
 
@@ -341,8 +355,8 @@
      * @throws IOException
      */
     public void close () throws IOException {
-        this.closeReader();
         this.closeWriter();
+        this.closeReader();
     };
 
 
@@ -472,6 +486,9 @@
                         };
                         this.delDocs("textSigle", textSigle);
                     };
+                }
+                else {
+                    log.warn("Reader is null");
                 };
             }
 
@@ -683,12 +700,17 @@
         try {
             if (gzip) {
 
+                GZIPInputStream gzipFile = new GZIPInputStream(json);
+
                 // Create json field document
                 FieldDocument fd = this.mapper.readValue(
-                        new GZIPInputStream(json), FieldDocument.class);
+                        gzipFile, FieldDocument.class);
+                gzipFile.close();
                 return fd;
             };
-            return this.mapper.readValue(json, FieldDocument.class);
+            FieldDocument field = this.mapper.readValue(json, FieldDocument.class);
+            json.close();
+            return field;
         }
 
         // Fail to add json object
diff --git a/src/main/java/de/ids_mannheim/korap/index/Indexer.java b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
index 3759329..d685b63 100644
--- a/src/main/java/de/ids_mannheim/korap/index/Indexer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
@@ -107,7 +107,7 @@
 
                 try {
                     if (addInsteadOfUpsert) {
-                        log.info("Add " + file + " to the index. ");
+                        log.info("{} Add {} to the index. ", this.count, file);
                         if (this.index.addDoc(new FileInputStream(file),
                                               true) == null) {
                             log.warn("fail.");
@@ -115,7 +115,7 @@
                         }
                     }
                     else {
-                        log.info("Add or update " + file + " to the index. ");
+                        log.info("{} Add or update {} to the index. ", this.count, file);
                         if (this.index.upsertDoc(new FileInputStream(file),
                                                  true) == null) {
                             log.warn("fail.");
@@ -128,8 +128,12 @@
                     }
 
                     // Commit in case the commit count is reached
-                    if ((this.count % this.commitCount) == 0)
+                    if ((this.count % this.commitCount) == 0) {
+
+                        // This will be done in addition to the
+                        // autocommit initiated by KrillIndex
                         this.commit();
+                    }
                 }
                 catch (FileNotFoundException e) {
                     log.error("File " + file + " is not found!");
@@ -156,9 +160,10 @@
         }
     }
 
-    private void closeIndex() throws IOException{
+    private void closeIndex() throws IOException {
         this.commit();
-        index.close();
+        this.index.closeReader();
+        this.index.closeWriter();
     }
 
     /**
@@ -170,7 +175,7 @@
      *            and a list of directories
      * @throws IOException
      */
-    public static void main (String[] argv) throws IOException {
+    public static void main (String[] argv) {
         
         Options options = new Options();
         options.addOption(Option.builder("c").longOpt("config")
@@ -190,7 +195,6 @@
         options.addOption(Option.builder("a").longOpt("addInsteadofUpsert")
                 .desc("Always add files to the index, never update")
                 .build());
-
         
         CommandLineParser parser = new DefaultParser();
 
@@ -212,6 +216,7 @@
             if (cmd.hasOption("a")) {
                 addInsteadOfUpsert = true;
             };
+
         }
         catch (MissingOptionException e) {
             HelpFormatter formatter = new HelpFormatter();
@@ -229,30 +234,35 @@
         // Load properties
         Properties prop = KrillProperties.loadProperties(propFile);
 
-        // Get indexer object
-        Indexer indexer = new Indexer(prop);
+        try {
+            // Get indexer object
+            Indexer indexer = new Indexer(prop);
 
-        // Iterate over list of directories
-        for (String arg : inputDirectories) {
-            log.info("Indexing files in " + arg);
-            File f = new File(arg);
-            if (f.isDirectory())
-                indexer.parse(f);
+            // Iterate over list of directories
+            for (String arg : inputDirectories) {
+                log.info("Indexing files in " + arg);
+                File f = new File(arg);
+                if (f.isDirectory())
+                    indexer.parse(f);
+            }
+            indexer.closeIndex();
+
+            // Final commit
+            log.info("Finished indexing.");
+            // Finish indexing
+            String message = "Added ";
+            if (!addInsteadOfUpsert)
+                message += "or updated ";
+            message += indexer.count + " file";
+            if (indexer.count > 1) {
+                message += "s";
+            }
+            System.out.println(message + ".");
         }
-        
-        indexer.closeIndex();
-        
-        // Final commit
-        log.info("Finished indexing.");
-        // Finish indexing
-        String message = "Added ";
-        if (!addInsteadOfUpsert)
-            message += "or updated ";
-        message += indexer.count + " file";
-        if (indexer.count > 1) {
-            message += "s";
+
+        catch (IOException e) {
+            log.error("Unexpected error: " + e);
+            e.printStackTrace();
         }
-        System.out.print(message + ".");
-        
     }
 }
diff --git a/src/test/java/de/ids_mannheim/korap/TestIndexer.java b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
index dc78958..c575f12 100644
--- a/src/test/java/de/ids_mannheim/korap/TestIndexer.java
+++ b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
@@ -28,22 +28,22 @@
     @Test

     public void testArguments () throws IOException {

         Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",

-                "-i", "src/test/resources/bzk" });

-        assertEquals("Added or updated 1 file.", outputStream.toString());

+                                    "-i", "src/test/resources/bzk"});

+        assertEquals("Added or updated 1 file.\n", outputStream.toString());

     }

 

     @Test

     public void testOutputArgument () throws IOException {

         Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",

-                "-i", "src/test/resources/bzk", "-o", "test-output"});

-        assertEquals("Added or updated 1 file.", outputStream.toString());

+                                    "-i", "src/test/resources/bzk", "-o", "test-output"});

+        assertEquals("Added or updated 1 file.\n", outputStream.toString());

     }

 

     @Test

     public void testMultipleInputFiles () throws IOException {

         Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",

-                "-i", "src/test/resources/wiki" });

-        assertEquals("Added or updated 18 files.", outputStream.toString());

+                                    "-i", "src/test/resources/wiki"});

+        assertEquals("Added or updated 18 files.\n", outputStream.toString());

     }

 

 

@@ -52,19 +52,19 @@
         Indexer.main(new String[] {

                 "-c", "src/test/resources/krill.properties",

                 "-i", "src/test/resources/bzk",

-                "-a" });

+                "-a"});

         logger.info(outputStream.toString());

-        assertEquals(outputStream.toString(), "Added 1 file.");

+        assertEquals(outputStream.toString(), "Added 1 file.\n");

     }

 

     

     @Test

     public void testMultipleInputDirectories () throws IOException {

         Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",

-                "-i",

-                "src/test/resources/bzk;src/test/resources/goe;src/test/resources/sgbr",

-                "-o", "test-index" });

-        assertEquals("Added or updated 5 files.", outputStream.toString());

+                                    "-i",

+                                    "src/test/resources/bzk;src/test/resources/goe;src/test/resources/sgbr",

+                                    "-o", "test-index"});

+        assertEquals("Added or updated 5 files.\n", outputStream.toString());

     }

 

     @Test

@@ -78,7 +78,7 @@
     @Test

     public void testMissingConfig () throws IOException {

         Indexer.main(new String[] { "-i", "src/test/resources/bzk",

-                "-o test-index" });

+                                    "-o test-index"});

         logger.info(outputStream.toString());

         assertEquals(true, outputStream.toString().startsWith(info));

     }

@@ -86,7 +86,7 @@
     @Test

     public void testMissingInput () throws IOException {

         Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",

-                "-o", "test-index" });

+                                    "-o", "test-index"});

         logger.info(outputStream.toString());

         assertEquals(true, outputStream.toString().startsWith(info));

     }

diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index 923fec1..282ee80 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -669,6 +669,18 @@
         ki.upsertDoc(getClass().getResourceAsStream("/wiki/WPD17-H81-63495.json.gz"), true);
         ki.commit();
         assertEquals(ki.numberOf("documents"), 3);
+
+        ki.close();
+
+        fd = new FieldDocument();
+        fd.addString("textSigle", "AAA/DDD/005");
+        fd.addString("content", "Example4");
+        
+        ki.upsertDoc(fd);
+        ki.commit();
+
+        assertEquals(ki.numberOf("documents"), 4);
+
     };