Added upsert and fixes #43
Change-Id: I9b27c661a23e3d411b83e310fb4e772d06a0372c
diff --git a/Changes b/Changes
index 77c439f..f23dbba 100644
--- a/Changes
+++ b/Changes
@@ -4,6 +4,9 @@
- [bugfix] Fix bug where fields already set where lifted
again, but ignored in the fields order list (diewald)
- [feature] Added LocalDate handling to KrillDate (diewald)
+ - [feature] Added upsert method to index and fix #43
+ by adding indexCreationDate and indexLastModified field
+ (diewald)
0.58.4 2019-02-05
- [cleanup] Remove deprecated methods setLicense/getLicense,
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 7ebb4f7..7520f96 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -8,6 +8,8 @@
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
+import java.time.LocalDate;
+
import org.apache.lucene.analysis.Analyzer;
/*
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -66,6 +68,7 @@
import de.ids_mannheim.korap.response.SearchContext;
import de.ids_mannheim.korap.response.Text;
import de.ids_mannheim.korap.util.KrillProperties;
+import de.ids_mannheim.korap.util.KrillDate;
import de.ids_mannheim.korap.util.QueryException;
/**
@@ -260,6 +263,8 @@
// Todo: Maybe use DirectoryReader.openIfChanged(DirectoryReader)
if (!readerOpen)
this.openReader();
+ if (!readerOpen)
+ return null;
return this.reader;
};
@@ -393,6 +398,96 @@
/**
+ * Update a document in the index as a {@link FieldDocument}
+ * if it already exists (based on the textSigle), otherwise
+ * insert it to the index.
+ *
+ * @param doc
+ * The {@link FieldDocument} to add to the index.
+ * @return The {@link FieldDocument}, which means, the same
+ * object, that was passed to the method.
+ */
+ public FieldDocument upsertDoc (FieldDocument doc) {
+ if (doc == null)
+ return doc;
+
+ // Create a filter based on the corpusID and the docID
+ String textSigle = doc.getTextSigle();
+ KrillDate current = new KrillDate(LocalDate.now());
+ KrillDate indexCreationDate = current;
+ KrillDate indexLastModified = current;
+
+ // Delete the document if exists
+ if (textSigle != null) {
+
+ // First find the document
+ Filter filter = (Filter) new QueryWrapperFilter(
+ new TermQuery(new Term("textSigle", textSigle))
+ );
+
+ try {
+ // Iterate over all atomic indices and find the matching document
+
+ if (this.reader() != null) {
+
+ for (LeafReaderContext atomic : this.reader().leaves()) {
+
+ // Retrieve the single document of interest
+ DocIdSet filterSet = filter.getDocIdSet(
+ atomic,
+ atomic.reader().getLiveDocs());
+
+ DocIdSetIterator filterIterator = filterSet.iterator();
+
+ if (filterIterator == null)
+ continue;
+
+ // Go to the matching doc - and remember its ID
+ int localDocID = filterIterator.nextDoc();
+
+ if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
+ continue;
+
+ // We've found the correct document! Hurray!
+ if (DEBUG)
+ log.trace("We've found a matching document");
+
+ // TODO: Probably use
+ // document(int docID, StoredFieldVisitor visitor)
+ Document storedDoc = atomic.reader().document(localDocID);
+
+ // Document is loadable
+ if (storedDoc != null) {
+ IndexableField indexCreationField =
+ storedDoc.getField("indexCreationDate");
+
+ if (indexCreationField == null) {
+ indexCreationDate = current;
+ }
+ else {
+ indexCreationDate = new KrillDate(
+ indexCreationField.numericValue().toString()
+ );
+ };
+ };
+ this.delDocs("textSigle", textSigle);
+ };
+ };
+ }
+
+ catch (IOException e) {
+ log.error("Unable to upsert document");
+ };
+ };
+
+ doc.addDate("indexCreationDate", indexCreationDate.toDisplay());
+ doc.addDate("indexLastModified", indexLastModified.toDisplay());
+
+ return this.addDoc(doc);
+ };
+
+
+ /**
* Add a document to the index as a {@link FieldDocument}.
*
* @param doc
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index af0dbdc..802a319 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -26,6 +26,7 @@
import de.ids_mannheim.korap.query.QueryBuilder;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
import de.ids_mannheim.korap.response.Match;
+import de.ids_mannheim.korap.response.MetaFields;
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.util.QueryException;
@@ -614,6 +615,59 @@
assertTrue(res.at("/document/fields/3").isMissingNode());
};
+
+ @Test
+ public void indexUpsert () throws Exception {
+ KrillIndex ki = new KrillIndex();
+
+ // Add new document
+ FieldDocument fd = new FieldDocument();
+ fd.addString("textSigle", "AAA/BBB/001");
+ fd.addString("content", "Example1");
+ ki.upsertDoc(fd);
+ ki.commit();
+
+ MetaFields mfs = ki.getFields("AAA/BBB/001");
+ assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10);
+ assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}"));
+ assertEquals(
+ mfs.getFieldValue("indexCreationDate"),
+ mfs.getFieldValue("indexLastModified")
+ );
+ assertEquals(mfs.getFieldValue("content"), "Example1");
+
+
+ // Add new document
+ fd = new FieldDocument();
+ fd.addString("textSigle", "AAA/BBB/002");
+ fd.addString("content", "Example2");
+
+ ki.upsertDoc(fd);
+ ki.commit();
+
+ mfs = ki.getFields("AAA/BBB/002");
+ assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10);
+
+ assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}"));
+ assertEquals(mfs.getFieldValue("content"), "Example2");
+
+ fd = new FieldDocument();
+ fd.addString("textSigle", "AAA/BBB/001");
+ fd.addString("content", "Example3");
+
+ ki.upsertDoc(fd);
+ ki.commit();
+
+ mfs = ki.getFields("AAA/BBB/001");
+ assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10);
+ assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}"));
+ assertEquals(mfs.getFieldValue("content"), "Example3");
+
+ assertEquals(ki.numberOf("documents"), 2);
+
+ };
+
+
private static String createDocString1 () {
return new String(
"{"