Fixed deletion bug in virtual collections - left two known bugs

Change-Id: Ib975976009ddfa74e9a9f3f07049bdad87a0486f
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java b/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
index 894a747..f94b407 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
@@ -7,11 +7,13 @@
 import de.ids_mannheim.korap.response.Notifications;
 
 import org.apache.lucene.search.*;
+import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.OpenBitSet;
 import org.apache.lucene.util.DocIdBitSet;
+import org.apache.lucene.search.BitsFilteredDocIdSet;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -58,6 +60,7 @@
         return filter.toString();
     };
 
+    /*
     public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
 
         int maxDoc = atomic.reader().maxDoc();
@@ -68,7 +71,7 @@
             return null;
 
         // Init vector
-        DocIdSet docids = filter.getDocIdSet(atomic, atomic.reader().getLiveDocs());
+        DocIdSet docids = filter.getDocIdSet(atomic, null);
         DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
 
         if (filterIter == null) {
@@ -87,17 +90,60 @@
         };
 
         // Remove deleted docs
-        /*
-        System.err.println(atomic.reader().getClass());
-        FixedBitSet livedocs = (FixedBitSet) atomic.reader().getLiveDocs();
-        if (livedocs != null) {
-            bitset.and(livedocs);
-        };
-        */
+        return (FixedBitSet) BitsFilteredDocIdSet.wrap(
+            (DocIdSet) bitset,
+            (Bits) atomic.reader().getLiveDocs()
+                                                       ).iterator();
+    };
+    */
 
+
+    public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
+        AtomicReader r = atomic.reader();
+        FixedBitSet bitset = new FixedBitSet(r.maxDoc());
+        DocIdSet docids = this.getDocIdSet(atomic, (Bits) r.getLiveDocs());
+        if (docids == null)
+            return null;
+        bitset.or(docids.iterator());
         return bitset;
     };
 
+
+    public DocIdSet getDocIdSet (AtomicReaderContext atomic, Bits acceptDocs) throws IOException {
+
+        int maxDoc = atomic.reader().maxDoc();
+        FixedBitSet bitset = new FixedBitSet(maxDoc);
+
+        Filter filter;
+        if (this.cb == null || (filter = this.cb.toFilter()) == null)
+            return null;
+
+        // Init vector
+        DocIdSet docids = filter.getDocIdSet(atomic, null);
+        DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
+
+        if (filterIter == null) {
+            if (!this.cb.isNegative())
+                return null;
+
+            bitset.set(0, maxDoc);
+        }
+        else {
+            // Or bit set
+            bitset.or(filterIter);
+
+            // Revert for negation
+            if (this.cb.isNegative())
+                bitset.flip(0, maxDoc);
+        };
+
+        // Remove deleted docs
+        return (DocIdSet) BitsFilteredDocIdSet.wrap(
+            (DocIdSet) bitset,
+            acceptDocs
+        );
+    };
+
     /**
      * Search for the number of occurrences of different types,
      * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
index 8b2ff4d..5bb9968 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
@@ -73,7 +73,8 @@
 
             return new CollectionBuilderRange(field, begin, end);
         };
-        return new CollectionBuilderTerm(field, dateDF.toString());
+
+        return new CollectionBuilderRange(field, dateDF.floor(), dateDF.ceil());
     };
 
 
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
index 5aade60..e764123 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -91,6 +91,12 @@
 
         kcn.fromBuilder(cb.andGroup(cb.term("textClass", "finanzen")).with(cb.term("textClass", "kultur")));
         assertEquals(0, kcn.docCount());
+
+        kcn.fromBuilder(cb.term("text", "Mann"));
+        assertEquals(3, kcn.docCount());
+
+        kcn.fromBuilder(cb.term("text", "Frau"));
+        assertEquals(1, kcn.docCount());
     };
 
     @Test
@@ -127,7 +133,7 @@
     };
 
     @Test
-    public void testIndexWithMultipleCommits () throws IOException {
+    public void testIndexWithMultipleCommitsAndDeletes () throws IOException {
         ki = new KrillIndex();
         ki.addDoc(createDoc1());
         ki.addDoc(createDoc2());
@@ -169,11 +175,112 @@
         assertEquals(1, kcn.docCount());
         kcn.fromBuilder(cb.term("author", "Michael").not());
         assertEquals(2, kcn.docCount());
+
+        // Readd Peter's doc
+        ki.addDoc(createDoc2());
+        ki.commit();
+
+        kcn.fromBuilder(cb.term("author", "Frank"));
+        assertEquals(1, kcn.docCount());
+        kcn.fromBuilder(cb.term("author", "Peter"));
+        assertEquals(1, kcn.docCount());
+        kcn.fromBuilder(cb.term("author", "Sebastian"));
+        assertEquals(1, kcn.docCount());
+        kcn.fromBuilder(cb.term("author", "Michael").not());
+        assertEquals(3, kcn.docCount());
     };
 
-    // Todo: Test index with removes
-    // Todo: Test with dates
-    // Todo: Test with regex
+    @Test
+    public void testIndexWithDateRanges () throws IOException {
+        ki = new KrillIndex();
+        ki.addDoc(createDoc1());
+        ki.addDoc(createDoc2());
+        ki.addDoc(createDoc3());
+        ki.commit();
+        CollectionBuilderNew cb = new CollectionBuilderNew();
+        KrillCollectionNew kcn = new KrillCollectionNew(ki);
+
+        kcn.fromBuilder(cb.date("pubDate", "2005"));
+        assertEquals(3, kcn.docCount());
+        kcn.fromBuilder(cb.date("pubDate", "2005-12"));
+        assertEquals(3, kcn.docCount());
+
+        kcn.fromBuilder(cb.date("pubDate", "2005-12-10"));
+        assertEquals(1, kcn.docCount());
+        kcn.fromBuilder(cb.date("pubDate", "2005-12-16"));
+        assertEquals(1, kcn.docCount());
+        kcn.fromBuilder(cb.date("pubDate", "2005-12-07"));
+        assertEquals(1, kcn.docCount());
+
+        kcn.fromBuilder(cb.since("pubDate", "2005-12-07"));
+        assertEquals(3, kcn.docCount());
+        kcn.fromBuilder(cb.since("pubDate", "2005-12-10"));
+        assertEquals(2, kcn.docCount());
+        kcn.fromBuilder(cb.since("pubDate", "2005-12-16"));
+        assertEquals(1, kcn.docCount());
+
+        kcn.fromBuilder(cb.till("pubDate", "2005-12-16"));
+        assertEquals(3, kcn.docCount());
+        kcn.fromBuilder(cb.till("pubDate", "2005-12-10"));
+        assertEquals(2, kcn.docCount());
+        kcn.fromBuilder(cb.till("pubDate", "2005-12-07"));
+        assertEquals(1, kcn.docCount());
+
+        kcn.fromBuilder(cb.date("pubDate", "2005-12-10").not());
+        assertEquals(2, kcn.docCount());
+        kcn.fromBuilder(cb.date("pubDate", "2005-12-16").not());
+        assertEquals(2, kcn.docCount());
+        kcn.fromBuilder(cb.date("pubDate", "2005-12-07").not());
+        assertEquals(2, kcn.docCount());
+        kcn.fromBuilder(cb.date("pubDate", "2005-12-09").not());
+        assertEquals(3, kcn.docCount());
+
+
+        kcn.fromBuilder(cb.till("pubDate", "2005-12-16").not());
+        assertEquals(0, kcn.docCount());
+        kcn.fromBuilder(cb.till("pubDate", "2005-12-15").not());
+        assertEquals(1, kcn.docCount());
+        kcn.fromBuilder(cb.till("pubDate", "2005-12-10").not());
+        assertEquals(1, kcn.docCount());
+        kcn.fromBuilder(cb.till("pubDate", "2005-12-09").not());
+        assertEquals(2, kcn.docCount());
+        kcn.fromBuilder(cb.till("pubDate", "2005-12-07").not());
+        assertEquals(2, kcn.docCount());
+        kcn.fromBuilder(cb.till("pubDate", "2005-12-06").not());
+        assertEquals(3, kcn.docCount());
+    };
+
+
+    @Test
+    public void testIndexWithRegexes () throws IOException {
+        ki = new KrillIndex();
+
+        ki.addDoc(createDoc1());
+        ki.addDoc(createDoc2());
+        ki.addDoc(createDoc3());
+        ki.commit();
+
+        CollectionBuilderNew cb = new CollectionBuilderNew();
+        KrillCollectionNew kcn = new KrillCollectionNew(ki);
+
+        kcn.fromBuilder(cb.re("author", "Fran.*"));
+        assertEquals(1, kcn.docCount());
+        kcn.fromBuilder(cb.re("author", "Blin.*"));
+        assertEquals(0, kcn.docCount());
+        kcn.fromBuilder(cb.re("author", "Frank|Peter"));
+        assertEquals(2, kcn.docCount());
+
+        kcn.fromBuilder(cb.term("text", "Frau"));
+        assertEquals(1, kcn.docCount());
+
+        kcn.fromBuilder(cb.re("text", "Frau"));
+        assertEquals(1, kcn.docCount());
+
+        kcn.fromBuilder(cb.re("text", "Frau|Mann"));
+        System.err.println(kcn.toString());
+        assertEquals(3, kcn.docCount());
+    };
+
 
     private FieldDocument createDoc1 () {
         FieldDocument fd = new FieldDocument();
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java
index 0440483..8408176 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java
@@ -46,7 +46,7 @@
     @Test
     public void builderDateDay () throws IOException {
         CollectionBuilderNew kc = new CollectionBuilderNew();
-        assertEquals("pubDate:20051011",
+        assertEquals("pubDate:[20051011 TO 20051011]",
                      kc.date("pubDate", "2005-10-11").toString());
     };
 
@@ -56,7 +56,7 @@
         // CollectionBuilderNew.CollectionBuilderInterface kbi = ;
         assertNull(kc.date("pubDate", ""));
 
-        assertEquals("pubDate:20051580",
+        assertEquals("pubDate:[20051580 TO 20051580]",
                      kc.date("pubDate", "2005-15-80").toString());
 
         assertNull(kc.date("pubDate", "2005-15-8"));