Add fingerprint method to KrillIndex (closes #62)

Change-Id: Ic9ded4e07587edfe5edde931b928c40673af0e83
diff --git a/Changes b/Changes
index c851d6d..59f093c 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.59.2 2020-06-02
+    - [feature] Add fingerprint method to index (diewald)
+
 0.59.1 2020-04-08
     - [bugfix] Fix bug in classed group queries (diewald)
     - [bugfix] Fix bug in segments with negated components (diewald)
diff --git a/pom.xml b/pom.xml
index 064f57f..2f61a5a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
 
 	<groupId>de.ids_mannheim.korap</groupId>
 	<artifactId>Krill</artifactId>
-	<version>0.59.1</version>
+	<version>0.59.2</version>
 	<packaging>jar</packaging>
 
 	<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 49c4d7d..60805bd 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -10,6 +10,9 @@
 
 import java.time.LocalDate;
 
+import java.security.NoSuchAlgorithmException;
+import java.security.MessageDigest;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
 import org.apache.lucene.document.Document;
@@ -148,6 +151,8 @@
     private String version = "Unknown";
     private String name = "Unknown";
 
+    private String indexRevision;
+    
     // Temp:
     private IndexReader reader;
 
@@ -554,6 +559,7 @@
                 this.commit();
                 commitCounter = 0;
             };
+            this.indexRevision = null;
         }
 
         // Failed to add document
@@ -582,10 +588,11 @@
                 commitCounter = 0;
             };
 
+            this.indexRevision = null;
             return true;
         }
 
-        // Failed to add document
+        // Failed to delete document
         catch (IOException e) {
             log.error("Unable to delete documents");
         };
@@ -1687,12 +1694,51 @@
     };
 
 
-	
     public void getValues (String field) {
-
+            
     };
 
 
+	/**
+     * Return a fingerprint of the current state of the index.
+     * Contains information about the number of segments, docs per segment
+     * and deletions per segment.
+     */
+    public String getFingerprint () {
+
+        // indexRevision is cached
+        if (this.indexRevision != null) {
+            return this.indexRevision;
+        };
+
+        // Reader is empty
+        if (this.reader() == null) {
+            return "null";
+        }
+
+        MessageDigest md;
+        try {
+            // MD5 used for fingerprinting (no security implications here)
+            md = MessageDigest.getInstance("MD5");
+        }
+        catch (NoSuchAlgorithmException e) {
+            log.error(e.getMessage());
+            return e.getMessage();
+        };
+        
+        String hash = this.reader().getCombinedCoreAndDeletesKey().toString();
+
+        md.update(hash.getBytes());
+
+        // Turn bytes into Base64 string
+        this.indexRevision = new String(
+            Base64.getEncoder().encode(md.digest())
+            );
+
+        return this.indexRevision;
+    };
+    
+
     // Collect matches
     public MatchCollector collect (Krill ks, MatchCollector mc) {
         if (DEBUG)
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestIndexRevision.java b/src/test/java/de/ids_mannheim/korap/index/TestIndexRevision.java
new file mode 100644
index 0000000..2128d4f
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestIndexRevision.java
@@ -0,0 +1,103 @@
+package de.ids_mannheim.korap.index;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertNotEquals;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.junit.Test;
+import org.junit.Ignore;
+
+import de.ids_mannheim.korap.Krill;
+import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.KrillMeta;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.StatusCodes;
+
+import java.nio.file.Paths;
+import org.apache.lucene.store.MMapDirectory;
+
+public class TestIndexRevision {
+
+    @Test
+    public void testIndexRevisionAdd () throws IOException {
+        KrillIndex ki = new KrillIndex();
+
+        assertEquals(ki.getFingerprint(),"null");
+        
+        ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
+        ki.commit();
+
+        String x1 = ki.getFingerprint();
+        assertEquals(x1,"ibtSULzKIMrfGAtES3GXRA==");
+
+        ki.addDoc(getClass().getResourceAsStream("/wiki/00002.json.gz"), true);
+        ki.addDoc(getClass().getResourceAsStream("/wiki/00003.json.gz"), true);
+        ki.addDoc(getClass().getResourceAsStream("/wiki/00004.json.gz"), true);
+        ki.commit();
+
+        String x2 = ki.getFingerprint();
+        assertEquals(x2,"0UIQZpZVfiGDD2leAq6YQA==");
+
+        ki.addDoc(getClass().getResourceAsStream("/wiki/00006.json.gz"), true);
+        ki.commit();
+
+        String x3 = ki.getFingerprint();
+        assertEquals(x3,"fS3GqnKynhPQ5wFyC9/XWw==");
+
+
+        // Check if the same changes will have the same effect
+        KrillIndex ki2 = new KrillIndex();
+
+        assertEquals(ki2.getFingerprint(),"null");
+        
+        ki2.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
+        ki2.commit();
+
+        assertEquals(ki2.getFingerprint(), x1);
+
+        ki2.addDoc(getClass().getResourceAsStream("/wiki/00002.json.gz"), true);
+        ki2.addDoc(getClass().getResourceAsStream("/wiki/00003.json.gz"), true);
+        ki2.addDoc(getClass().getResourceAsStream("/wiki/00004.json.gz"), true);
+        ki2.commit();
+
+        assertEquals(ki2.getFingerprint(), x2);
+
+        ki2.addDoc(getClass().getResourceAsStream("/wiki/00006.json.gz"), true);
+        ki2.commit();
+
+        assertEquals(ki2.getFingerprint(), x3);
+    };
+
+    @Test
+    public void testIndexRevisionDel () throws IOException {
+        KrillIndex ki = new KrillIndex();
+
+        assertEquals(ki.getFingerprint(),"null");
+        
+        ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
+        ki.commit();
+
+        String x1 = ki.getFingerprint();
+        assertEquals(x1,"ibtSULzKIMrfGAtES3GXRA==");
+
+        assertTrue(ki.delDocs("title", "A"));
+        ki.commit();
+
+        String x2 = ki.getFingerprint();
+        assertNotEquals(x1, x2);
+
+    };
+
+    @Ignore
+    public void testIndexRevisionSample () throws IOException {
+        KrillIndex ki = new KrillIndex(new MMapDirectory(
+                Paths.get(getClass().getResource("/sample-index").getFile())));
+
+        assertEquals(ki.getFingerprint(),"Wes8Bd4h1OypPqbWF5njeQ==");
+    };
+};