Add fingerprint method to KrillIndex (closes #62)
Change-Id: Ic9ded4e07587edfe5edde931b928c40673af0e83
diff --git a/Changes b/Changes
index c851d6d..59f093c 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.59.2 2020-06-02
+ - [feature] Add fingerprint method to index (diewald)
+
0.59.1 2020-04-08
- [bugfix] Fix bug in classed group queries (diewald)
- [bugfix] Fix bug in segments with negated components (diewald)
diff --git a/pom.xml b/pom.xml
index 064f57f..2f61a5a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.59.1</version>
+ <version>0.59.2</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 49c4d7d..60805bd 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -10,6 +10,9 @@
import java.time.LocalDate;
+import java.security.NoSuchAlgorithmException;
+import java.security.MessageDigest;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.document.Document;
@@ -148,6 +151,8 @@
private String version = "Unknown";
private String name = "Unknown";
+ private String indexRevision;
+
// Temp:
private IndexReader reader;
@@ -554,6 +559,7 @@
this.commit();
commitCounter = 0;
};
+ this.indexRevision = null;
}
// Failed to add document
@@ -582,10 +588,11 @@
commitCounter = 0;
};
+ this.indexRevision = null;
return true;
}
- // Failed to add document
+ // Failed to delete document
catch (IOException e) {
log.error("Unable to delete documents");
};
@@ -1687,12 +1694,51 @@
};
-
public void getValues (String field) {
-
+
};
+ /**
+ * Return a fingerprint of the current state of the index.
+ * Contains information about the number of segments, docs per segment
+ * and deletions per segment.
+ */
+ public String getFingerprint () {
+
+ // indexRevision is cached
+ if (this.indexRevision != null) {
+ return this.indexRevision;
+ };
+
+ // Reader is empty
+ if (this.reader() == null) {
+ return "null";
+ }
+
+ MessageDigest md;
+ try {
+ // MD5 used for fingerprinting (no security implications here)
+ md = MessageDigest.getInstance("MD5");
+ }
+ catch (NoSuchAlgorithmException e) {
+ log.error(e.getMessage());
+ return e.getMessage();
+ };
+
+ String hash = this.reader().getCombinedCoreAndDeletesKey().toString();
+
+ md.update(hash.getBytes());
+
+ // Turn bytes into Base64 string
+ this.indexRevision = new String(
+ Base64.getEncoder().encode(md.digest())
+ );
+
+ return this.indexRevision;
+ };
+
+
// Collect matches
public MatchCollector collect (Krill ks, MatchCollector mc) {
if (DEBUG)
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestIndexRevision.java b/src/test/java/de/ids_mannheim/korap/index/TestIndexRevision.java
new file mode 100644
index 0000000..2128d4f
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestIndexRevision.java
@@ -0,0 +1,103 @@
+package de.ids_mannheim.korap.index;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertNotEquals;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.junit.Test;
+import org.junit.Ignore;
+
+import de.ids_mannheim.korap.Krill;
+import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.KrillMeta;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.StatusCodes;
+
+import java.nio.file.Paths;
+import org.apache.lucene.store.MMapDirectory;
+
+public class TestIndexRevision {
+
+ @Test
+ public void testIndexRevisionAdd () throws IOException {
+ KrillIndex ki = new KrillIndex();
+
+ assertEquals(ki.getFingerprint(),"null");
+
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
+ ki.commit();
+
+ String x1 = ki.getFingerprint();
+ assertEquals(x1,"ibtSULzKIMrfGAtES3GXRA==");
+
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00002.json.gz"), true);
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00003.json.gz"), true);
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00004.json.gz"), true);
+ ki.commit();
+
+ String x2 = ki.getFingerprint();
+ assertEquals(x2,"0UIQZpZVfiGDD2leAq6YQA==");
+
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00006.json.gz"), true);
+ ki.commit();
+
+ String x3 = ki.getFingerprint();
+ assertEquals(x3,"fS3GqnKynhPQ5wFyC9/XWw==");
+
+
+ // Check if the same changes will have the same effect
+ KrillIndex ki2 = new KrillIndex();
+
+ assertEquals(ki2.getFingerprint(),"null");
+
+ ki2.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
+ ki2.commit();
+
+ assertEquals(ki2.getFingerprint(), x1);
+
+ ki2.addDoc(getClass().getResourceAsStream("/wiki/00002.json.gz"), true);
+ ki2.addDoc(getClass().getResourceAsStream("/wiki/00003.json.gz"), true);
+ ki2.addDoc(getClass().getResourceAsStream("/wiki/00004.json.gz"), true);
+ ki2.commit();
+
+ assertEquals(ki2.getFingerprint(), x2);
+
+ ki2.addDoc(getClass().getResourceAsStream("/wiki/00006.json.gz"), true);
+ ki2.commit();
+
+ assertEquals(ki2.getFingerprint(), x3);
+ };
+
+ @Test
+ public void testIndexRevisionDel () throws IOException {
+ KrillIndex ki = new KrillIndex();
+
+ assertEquals(ki.getFingerprint(),"null");
+
+ ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
+ ki.commit();
+
+ String x1 = ki.getFingerprint();
+ assertEquals(x1,"ibtSULzKIMrfGAtES3GXRA==");
+
+ assertTrue(ki.delDocs("title", "A"));
+ ki.commit();
+
+ String x2 = ki.getFingerprint();
+ assertNotEquals(x1, x2);
+
+ };
+
+ @Ignore
+ public void testIndexRevisionSample () throws IOException {
+ KrillIndex ki = new KrillIndex(new MMapDirectory(
+ Paths.get(getClass().getResource("/sample-index").getFile())));
+
+ assertEquals(ki.getFingerprint(),"Wes8Bd4h1OypPqbWF5njeQ==");
+ };
+};