Introduced new KrillCollection interface - but with a failing test for deleted documents

Change-Id: Ie5cd0cea3b651eb93c5b46e669cc9cd37503c8b3
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index a89fdfc..1aef947 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -67,7 +67,7 @@
             .getLogger(KrillCollection.class);
 
     // This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = true;
+    public static final boolean DEBUG = false;
 
 
     /**
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java b/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
new file mode 100644
index 0000000..894a747
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
@@ -0,0 +1,152 @@
+package de.ids_mannheim.korap;
+
+import java.util.*;
+import java.io.IOException;
+
+import de.ids_mannheim.korap.collection.CollectionBuilderNew;
+import de.ids_mannheim.korap.response.Notifications;
+
+import org.apache.lucene.search.*;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.DocIdBitSet;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class KrillCollectionNew extends Notifications {
+    private KrillIndex index;
+    private CollectionBuilderNew.CollectionBuilderInterface cb;
+
+    // Logger
+    private final static Logger log = LoggerFactory
+            .getLogger(KrillCollection.class);
+
+    // This advices the java compiler to ignore all loggings
+    public static final boolean DEBUG = true;
+
+
+    /**
+     * Construct a new KrillCollection by passing a KrillIndex.
+     * 
+     * @param index
+     *            The {@link KrillIndex} object.
+     */
+    public KrillCollectionNew (KrillIndex index) {
+        this.index = index;
+    };
+
+    public KrillCollectionNew fromBuilder (CollectionBuilderNew.CollectionBuilderInterface cb) {
+        this.cb = cb;
+        return this;
+    };
+
+    public Filter toFilter () {
+        if (this.cb == null)
+            return null;
+
+        return this.cb.toFilter();
+    };
+
+    public String toString () {
+        Filter filter = this.toFilter();
+        if (filter == null)
+            return "";
+
+        return filter.toString();
+    };
+
+    public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
+
+        int maxDoc = atomic.reader().maxDoc();
+        FixedBitSet bitset = new FixedBitSet(maxDoc);
+
+        Filter filter;
+        if (this.cb == null || (filter = this.cb.toFilter()) == null)
+            return null;
+
+        // Init vector
+        DocIdSet docids = filter.getDocIdSet(atomic, atomic.reader().getLiveDocs());
+        DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
+
+        if (filterIter == null) {
+            if (!this.cb.isNegative())
+                return null;
+
+            bitset.set(0, maxDoc);
+        }
+        else {
+            // Or bit set
+            bitset.or(filterIter);
+
+            // Revert for negation
+            if (this.cb.isNegative())
+                bitset.flip(0, maxDoc);
+        };
+
+        // Remove deleted docs
+        /*
+        System.err.println(atomic.reader().getClass());
+        FixedBitSet livedocs = (FixedBitSet) atomic.reader().getLiveDocs();
+        if (livedocs != null) {
+            bitset.and(livedocs);
+        };
+        */
+
+        return bitset;
+    };
+
+    /**
+     * Search for the number of occurrences of different types,
+     * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
+     * collection.
+     * 
+     * @param field
+     *            The field containing the textual data and the
+     *            annotations as a string.
+     * @param type
+     *            The type of meta information,
+     *            e.g. <i>documents</i> or <i>sentences</i> as a
+     *            string.
+     * @return The number of the occurrences.
+     * @throws IOException
+     * @see KrillIndex#numberOf
+     */
+    public long numberOf (String field, String type) throws IOException {
+
+        // No index defined
+        if (this.index == null)
+            return (long) -1;
+
+        // This is redundant to index stuff
+        if (type.equals("documents"))
+            return this.docCount();
+        
+        return (long) 0;
+        // return this.index.numberOf(this, field, type);
+    };
+
+
+
+    public long docCount () {
+
+        // No index defined
+        if (this.index == null)
+            return (long) 0;
+
+        long docCount = 0;
+        try {
+            FixedBitSet bitset;
+            for (AtomicReaderContext atomic : this.index.reader().leaves()) {
+                if ((bitset = this.bits(atomic)) != null)
+                    docCount += bitset.cardinality();
+            };
+        }
+        catch (IOException e) {
+            log.warn(e.getLocalizedMessage());
+        };
+        return docCount;
+    };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 73a604a..bfdae31 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -419,6 +419,51 @@
 
 
     /**
+     * Delete documents of the index by passing field information.
+     * 
+     * @param field
+     *            The meta field name.
+     * @param term
+     *            The meta field term.
+     */
+    public boolean delDocs (String field, String term) {
+        if (field == null || term == null)
+            return false;
+        try {
+            this.writer().deleteDocuments(
+                new Term(field, term)
+            );
+            if (++commitCounter > autoCommit) {
+                this.commit();
+                commitCounter = 0;
+            };
+
+            return true;
+        }
+
+        // Failed to add document
+        catch (IOException e) {
+            log.error("Unable to delete documents");
+        };
+
+        return false;
+    };
+
+
+    /**
+     * Delete a document of the index by passing a UID.
+     * 
+     * @param uid
+     *            The unique identifier of the document.
+     */
+    public boolean delDoc (Integer uid) {
+        if (uid < 0)
+            return false;
+        return this.delDocs("UID", uid.toString());
+    };
+
+
+    /**
      * Add a document to the index as a JSON string.
      * 
      * @param json
@@ -574,11 +619,11 @@
             };
 
             long docCount = 0;
-            int i = 1;
+            // int i = 1;
             try {
                 for (AtomicReaderContext atomic : this.reader().leaves()) {
                     docCount += collection.bits(atomic).cardinality();
-                    i++;
+                    // i++;
                 };
             }
             catch (IOException e) {
diff --git a/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java b/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java
new file mode 100644
index 0000000..7efcc60
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java
@@ -0,0 +1,231 @@
+package de.ids_mannheim.korap.collection;
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.BitsFilteredDocIdSet;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.KrillCollection;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A container Filter that allows Boolean composition of Filters
+ * in groups (either or-groups or and-groups).
+ *
+ * @author Nils Diewald
+ *
+ * This filter is roughly based on org.apache.lucene.queries.BooleanFilter.
+ */
+public class BooleanGroupFilter extends Filter {
+    // Group is either an or- or an and-Group
+    private boolean isOptional;
+
+    // Logger
+    private final static Logger log = LoggerFactory.getLogger(KrillCollection.class);
+
+    // This advices the java compiler to ignore all loggings
+    public static final boolean DEBUG = true;
+
+    // Init operands list
+    private final List<GroupFilterOperand> operands = new ArrayList<>(3);
+
+    // Operand in the filter group
+    private class GroupFilterOperand {
+        public Filter filter;
+        public boolean isNegative;
+
+        // Operand has filter and negativity information
+        public GroupFilterOperand (Filter filter, boolean negative) {
+            this.filter = filter;
+            this.isNegative = negative;
+        };
+    };
+
+    /**
+     * Create a new BooleanGroupFilter.
+     * Accepts a boolean parameter to make it an or-Group
+     * (<pre>true</pre>) or an and-Group (<pre>true</pre>).
+     */
+    public BooleanGroupFilter (boolean optional) {
+        this.isOptional = optional;
+    };
+
+
+    /**
+     * Add an operand to the list of filter operands.
+     * The operand is a positive filter that won't be flipped.
+     */
+    public final void with (Filter filter) {
+        this.operands.add(new GroupFilterOperand(filter, false));
+    };
+
+
+    /**
+     * Add an operand to the list of filter operands.
+     * The operand is a negative filter that will be flipped.
+     */
+    public final void without (Filter filter) {
+        this.operands.add(new GroupFilterOperand(filter, true));
+    };
+
+
+    @Override
+    public boolean equals (Object obj) {
+        if (this == obj)
+            return true;
+        
+        if ((obj == null) || (obj.getClass() != this.getClass()))
+            return false;
+
+        final BooleanGroupFilter other = (BooleanGroupFilter) obj;
+        return operands.equals(other.operands);
+    };
+
+
+    @Override
+    public int hashCode() {
+        return 657153719 ^ operands.hashCode();
+    };
+
+    
+    @Override
+    public String toString () {
+        StringBuilder buffer = new StringBuilder(
+            this.isOptional ? "OrGroup(" : "AndGroup("
+        );
+        boolean first = true;
+        for (final GroupFilterOperand operand : this.operands) {
+            if (first)
+                first = false;
+            else
+                buffer.append(" ");
+
+            if (operand.isNegative)
+                buffer.append('-');
+
+            buffer.append(operand.filter.toString());
+        };
+        return buffer.append(')').toString();
+    };
+
+  
+    @Override
+    public DocIdSet getDocIdSet (AtomicReaderContext context, Bits acceptDocs) throws IOException {
+        final AtomicReader reader = context.reader();
+        int maxDoc = reader.maxDoc();
+        FixedBitSet bitset     = new FixedBitSet(maxDoc);
+        FixedBitSet combinator = new FixedBitSet(maxDoc);
+        boolean init = true;
+
+        if (DEBUG)
+            log.debug("Start trying to filter on bitset of length {}", maxDoc);
+
+        for (final GroupFilterOperand operand : this.operands) {
+            final DocIdSet docids = operand.filter.getDocIdSet(context, null);
+            final DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
+
+            if (DEBUG)
+                log.debug("> Filter to bitset of {} ({} negative)",
+                          operand.filter.toString(),
+                          operand.isNegative);
+
+            // Filter resulted in no docs
+            if (filterIter == null) {
+
+                if (DEBUG) log.debug("- Filter is null");
+
+                // Filter matches
+                if (operand.isNegative) {
+
+                    // This means, everything is allowed
+                    if (this.isOptional) {
+
+                        // Everything is allowed
+                        if (DEBUG) log.debug("- Filter to allow all documents");
+
+                        bitset.set(0, maxDoc);
+                        return BitsFilteredDocIdSet.wrap(bitset, acceptDocs);
+                    };
+
+                    // There is no possible match
+                    if (DEBUG) log.debug("- Filter to allow no documents (1)");
+                    return null;
+                }
+
+                // The result is unimportant
+                else if (this.isOptional) {
+                    if (DEBUG) log.debug("- Filter is ignorable");
+                    continue;
+                };
+
+                // There is no possible match
+                if (DEBUG) log.debug("- Filter to allow no documents (2)");
+                return null;
+            }
+
+            // Initialize bitset
+            else if (init) {
+
+                bitset.or(filterIter);
+
+                if (DEBUG) log.debug("- Filter is inial with card {}", bitset.cardinality());
+
+                // Flip the matching documents
+                if (operand.isNegative) {
+                    bitset.flip(0, maxDoc);
+                    if (DEBUG) log.debug("- Filter is negative - so flipped to card {} (1)", bitset.cardinality());
+                };
+
+                init = false;
+            }
+            else {
+
+                if (DEBUG) log.debug("- Filter is fine and operating");
+
+                // Operator is negative and needs to be flipped
+                if (operand.isNegative) {
+                    if (this.isOptional) {
+                    if (DEBUG) log.debug("- Filter is negative optional");
+
+                        // Negative or ... may be slow
+                        combinator.or(filterIter);
+                        combinator.flip(0, maxDoc);
+
+                        if (DEBUG) log.debug("- Filter is negative - so flipped to card {} (2)", combinator.cardinality());
+
+                        bitset.or(combinator);
+                        combinator.clear(0, maxDoc);
+                    }
+
+                    // Negative and
+                    else {
+                        if (DEBUG) log.debug("- Filter is negative not optional");
+                        bitset.andNot(filterIter);
+                        if (DEBUG) log.debug("- Filter is negative - so andNotted");
+                    }
+                }
+                else if (this.isOptional) {
+                    if (DEBUG) log.debug("- Filter is simply optional");
+                    bitset.or(filterIter);
+                }
+                else {
+                    if (DEBUG) log.debug("- Filter is simply not optional");
+                    bitset.and(filterIter);
+                    // TODO: Check with nextSetBit() if the filter is not applicable
+                };
+
+                if (DEBUG) log.debug("- Subresult has card {} ", bitset.cardinality());
+            };
+        };
+        return BitsFilteredDocIdSet.wrap(bitset, acceptDocs);
+    };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
index 06bafd4..8b2ff4d 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
@@ -2,8 +2,10 @@
 
 import java.util.*;
 import java.io.IOException;
+// TEMPORARY:
 import org.apache.lucene.queries.BooleanFilter;
 import org.apache.lucene.search.BooleanClause;
+
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.TermsFilter;
 import org.apache.lucene.search.*;
@@ -14,6 +16,7 @@
 import org.slf4j.LoggerFactory;
 
 import de.ids_mannheim.korap.KrillCollection;
+import de.ids_mannheim.korap.collection.BooleanGroupFilter;
 
 public class CollectionBuilderNew {
 
@@ -144,7 +147,6 @@
             return this.isOptional;
         };
 
-
         private ArrayList<CollectionBuilderInterface> operands;
 
         public CollectionBuilderGroup (boolean optional) {
@@ -159,28 +161,26 @@
             return this;
         };
 
-        public Filter toFilter () {
 
+        public Filter toFilter () {
             if (this.operands == null || this.operands.isEmpty())
                 return null;
 
             if (this.operands.size() == 1)
                 return this.operands.get(0).toFilter();
 
-            BooleanFilter bool = new BooleanFilter();
+            // BooleanFilter bool = new BooleanFilter();
+            BooleanGroupFilter bool = new BooleanGroupFilter(this.isOptional);
 
             Iterator<CollectionBuilderInterface> i = this.operands.iterator();
             while (i.hasNext()) {
                 CollectionBuilderInterface cb = i.next();
                 if (cb.isNegative()) {
-                    bool.add(cb.toFilter(), BooleanClause.Occur.MUST_NOT);
-                }
-                else if (this.isOptional()) {
-                    bool.add(cb.toFilter(), BooleanClause.Occur.SHOULD);
+                    bool.without(cb.toFilter());
                 }
                 else {
-                    bool.add(cb.toFilter(), BooleanClause.Occur.MUST);
-                }
+                    bool.with(cb.toFilter());
+                };
             };
 
             return bool;
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 5fed33b..eacf929 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-log4j.rootLogger = ERROR, stdout
+# log4j.rootLogger = ERROR, stdout
 
 # Queries:
 # log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -21,7 +21,7 @@
 
 # Collections:
 # log4j.logger.de.ids_mannheim.korap.collection.Filter = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.KrillCollection = TRACE, stdout
+log4j.logger.de.ids_mannheim.korap.KrillCollection = TRACE, stdout
 
 # Responses:
 # log4j.logger.de.ids_mannheim.korap.server.Node = TRACE, stdout