Introduced new KrillCollection interface - but with a failing test for deleted documents
Change-Id: Ie5cd0cea3b651eb93c5b46e669cc9cd37503c8b3
diff --git a/Changes b/Changes
index b1c4a45..862b218 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,10 @@
+0.53 2015-07-24
+ - [feature] Implemented new KrillCollection (diewald)
+ This should fix a lot of issues with deleted
+ documents and negation in virtual collections.
+ - [cleanup] REMOVED deprecated collection filtering (diewald)
+ - [feature] Added removal methods for documents (diewald)
+
0.52 2015-07-08
- [bugfix] Fixed payload filtering in FocusSpans (margaretha)
- [workaround] Reintroduced empty collection support,
diff --git a/pom.xml b/pom.xml
index e6d8a61..e0eb994 100644
--- a/pom.xml
+++ b/pom.xml
@@ -24,7 +24,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.52</version>
+ <version>0.53</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index a89fdfc..1aef947 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -67,7 +67,7 @@
.getLogger(KrillCollection.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = true;
+ public static final boolean DEBUG = false;
/**
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java b/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
new file mode 100644
index 0000000..894a747
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollectionNew.java
@@ -0,0 +1,152 @@
+package de.ids_mannheim.korap;
+
+import java.util.*;
+import java.io.IOException;
+
+import de.ids_mannheim.korap.collection.CollectionBuilderNew;
+import de.ids_mannheim.korap.response.Notifications;
+
+import org.apache.lucene.search.*;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.DocIdBitSet;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class KrillCollectionNew extends Notifications {
+ private KrillIndex index;
+ private CollectionBuilderNew.CollectionBuilderInterface cb;
+
+ // Logger
+ private final static Logger log = LoggerFactory
+ .getLogger(KrillCollection.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = true;
+
+
+ /**
+ * Construct a new KrillCollection by passing a KrillIndex.
+ *
+ * @param index
+ * The {@link KrillIndex} object.
+ */
+ public KrillCollectionNew (KrillIndex index) {
+ this.index = index;
+ };
+
+ public KrillCollectionNew fromBuilder (CollectionBuilderNew.CollectionBuilderInterface cb) {
+ this.cb = cb;
+ return this;
+ };
+
+ public Filter toFilter () {
+ if (this.cb == null)
+ return null;
+
+ return this.cb.toFilter();
+ };
+
+ public String toString () {
+ Filter filter = this.toFilter();
+ if (filter == null)
+ return "";
+
+ return filter.toString();
+ };
+
+ public FixedBitSet bits (AtomicReaderContext atomic) throws IOException {
+
+ int maxDoc = atomic.reader().maxDoc();
+ FixedBitSet bitset = new FixedBitSet(maxDoc);
+
+ Filter filter;
+ if (this.cb == null || (filter = this.cb.toFilter()) == null)
+ return null;
+
+ // Init vector
+ DocIdSet docids = filter.getDocIdSet(atomic, atomic.reader().getLiveDocs());
+ DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
+
+ if (filterIter == null) {
+ if (!this.cb.isNegative())
+ return null;
+
+ bitset.set(0, maxDoc);
+ }
+ else {
+ // Or bit set
+ bitset.or(filterIter);
+
+ // Revert for negation
+ if (this.cb.isNegative())
+ bitset.flip(0, maxDoc);
+ };
+
+ // Remove deleted docs
+ /*
+ System.err.println(atomic.reader().getClass());
+ FixedBitSet livedocs = (FixedBitSet) atomic.reader().getLiveDocs();
+ if (livedocs != null) {
+ bitset.and(livedocs);
+ };
+ */
+
+ return bitset;
+ };
+
+ /**
+ * Search for the number of occurrences of different types,
+ * e.g. <i>documents</i>, <i>sentences</i> etc. in the virtual
+ * collection.
+ *
+ * @param field
+ * The field containing the textual data and the
+ * annotations as a string.
+ * @param type
+ * The type of meta information,
+ * e.g. <i>documents</i> or <i>sentences</i> as a
+ * string.
+ * @return The number of the occurrences.
+ * @throws IOException
+ * @see KrillIndex#numberOf
+ */
+ public long numberOf (String field, String type) throws IOException {
+
+ // No index defined
+ if (this.index == null)
+ return (long) -1;
+
+ // This is redundant to index stuff
+ if (type.equals("documents"))
+ return this.docCount();
+
+ return (long) 0;
+ // return this.index.numberOf(this, field, type);
+ };
+
+
+
+ public long docCount () {
+
+ // No index defined
+ if (this.index == null)
+ return (long) 0;
+
+ long docCount = 0;
+ try {
+ FixedBitSet bitset;
+ for (AtomicReaderContext atomic : this.index.reader().leaves()) {
+ if ((bitset = this.bits(atomic)) != null)
+ docCount += bitset.cardinality();
+ };
+ }
+ catch (IOException e) {
+ log.warn(e.getLocalizedMessage());
+ };
+ return docCount;
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 73a604a..bfdae31 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -419,6 +419,51 @@
/**
+ * Delete documents of the index by passing field information.
+ *
+ * @param field
+ * The meta field name.
+ * @param term
+ * The meta field term.
+ */
+ public boolean delDocs (String field, String term) {
+ if (field == null || term == null)
+ return false;
+ try {
+ this.writer().deleteDocuments(
+ new Term(field, term)
+ );
+ if (++commitCounter > autoCommit) {
+ this.commit();
+ commitCounter = 0;
+ };
+
+ return true;
+ }
+
+ // Failed to add document
+ catch (IOException e) {
+ log.error("Unable to delete documents");
+ };
+
+ return false;
+ };
+
+
+ /**
+ * Delete a document of the index by passing a UID.
+ *
+ * @param uid
+ * The unique identifier of the document.
+ */
+ public boolean delDoc (Integer uid) {
+ if (uid < 0)
+ return false;
+ return this.delDocs("UID", uid.toString());
+ };
+
+
+ /**
* Add a document to the index as a JSON string.
*
* @param json
@@ -574,11 +619,11 @@
};
long docCount = 0;
- int i = 1;
+ // int i = 1;
try {
for (AtomicReaderContext atomic : this.reader().leaves()) {
docCount += collection.bits(atomic).cardinality();
- i++;
+ // i++;
};
}
catch (IOException e) {
diff --git a/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java b/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java
new file mode 100644
index 0000000..7efcc60
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/collection/BooleanGroupFilter.java
@@ -0,0 +1,231 @@
+package de.ids_mannheim.korap.collection;
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.BitsFilteredDocIdSet;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.KrillCollection;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A container Filter that allows Boolean composition of Filters
+ * in groups (either or-groups or and-groups).
+ *
+ * @author Nils Diewald
+ *
+ * This filter is roughly based on org.apache.lucene.queries.BooleanFilter.
+ */
+public class BooleanGroupFilter extends Filter {
+ // Group is either an or- or an and-Group
+ private boolean isOptional;
+
+ // Logger
+ private final static Logger log = LoggerFactory.getLogger(KrillCollection.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = true;
+
+ // Init operands list
+ private final List<GroupFilterOperand> operands = new ArrayList<>(3);
+
+ // Operand in the filter group
+ private class GroupFilterOperand {
+ public Filter filter;
+ public boolean isNegative;
+
+ // Operand has filter and negativity information
+ public GroupFilterOperand (Filter filter, boolean negative) {
+ this.filter = filter;
+ this.isNegative = negative;
+ };
+ };
+
+ /**
+ * Create a new BooleanGroupFilter.
+ * Accepts a boolean parameter to make it an or-Group
+ * (<pre>true</pre>) or an and-Group (<pre>true</pre>).
+ */
+ public BooleanGroupFilter (boolean optional) {
+ this.isOptional = optional;
+ };
+
+
+ /**
+ * Add an operand to the list of filter operands.
+ * The operand is a positive filter that won't be flipped.
+ */
+ public final void with (Filter filter) {
+ this.operands.add(new GroupFilterOperand(filter, false));
+ };
+
+
+ /**
+ * Add an operand to the list of filter operands.
+ * The operand is a negative filter that will be flipped.
+ */
+ public final void without (Filter filter) {
+ this.operands.add(new GroupFilterOperand(filter, true));
+ };
+
+
+ @Override
+ public boolean equals (Object obj) {
+ if (this == obj)
+ return true;
+
+ if ((obj == null) || (obj.getClass() != this.getClass()))
+ return false;
+
+ final BooleanGroupFilter other = (BooleanGroupFilter) obj;
+ return operands.equals(other.operands);
+ };
+
+
+ @Override
+ public int hashCode() {
+ return 657153719 ^ operands.hashCode();
+ };
+
+
+ @Override
+ public String toString () {
+ StringBuilder buffer = new StringBuilder(
+ this.isOptional ? "OrGroup(" : "AndGroup("
+ );
+ boolean first = true;
+ for (final GroupFilterOperand operand : this.operands) {
+ if (first)
+ first = false;
+ else
+ buffer.append(" ");
+
+ if (operand.isNegative)
+ buffer.append('-');
+
+ buffer.append(operand.filter.toString());
+ };
+ return buffer.append(')').toString();
+ };
+
+
+ @Override
+ public DocIdSet getDocIdSet (AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ final AtomicReader reader = context.reader();
+ int maxDoc = reader.maxDoc();
+ FixedBitSet bitset = new FixedBitSet(maxDoc);
+ FixedBitSet combinator = new FixedBitSet(maxDoc);
+ boolean init = true;
+
+ if (DEBUG)
+ log.debug("Start trying to filter on bitset of length {}", maxDoc);
+
+ for (final GroupFilterOperand operand : this.operands) {
+ final DocIdSet docids = operand.filter.getDocIdSet(context, null);
+ final DocIdSetIterator filterIter = (docids == null) ? null : docids.iterator();
+
+ if (DEBUG)
+ log.debug("> Filter to bitset of {} ({} negative)",
+ operand.filter.toString(),
+ operand.isNegative);
+
+ // Filter resulted in no docs
+ if (filterIter == null) {
+
+ if (DEBUG) log.debug("- Filter is null");
+
+ // Filter matches
+ if (operand.isNegative) {
+
+ // This means, everything is allowed
+ if (this.isOptional) {
+
+ // Everything is allowed
+ if (DEBUG) log.debug("- Filter to allow all documents");
+
+ bitset.set(0, maxDoc);
+ return BitsFilteredDocIdSet.wrap(bitset, acceptDocs);
+ };
+
+ // There is no possible match
+ if (DEBUG) log.debug("- Filter to allow no documents (1)");
+ return null;
+ }
+
+ // The result is unimportant
+ else if (this.isOptional) {
+ if (DEBUG) log.debug("- Filter is ignorable");
+ continue;
+ };
+
+ // There is no possible match
+ if (DEBUG) log.debug("- Filter to allow no documents (2)");
+ return null;
+ }
+
+ // Initialize bitset
+ else if (init) {
+
+ bitset.or(filterIter);
+
+ if (DEBUG) log.debug("- Filter is inial with card {}", bitset.cardinality());
+
+ // Flip the matching documents
+ if (operand.isNegative) {
+ bitset.flip(0, maxDoc);
+ if (DEBUG) log.debug("- Filter is negative - so flipped to card {} (1)", bitset.cardinality());
+ };
+
+ init = false;
+ }
+ else {
+
+ if (DEBUG) log.debug("- Filter is fine and operating");
+
+ // Operator is negative and needs to be flipped
+ if (operand.isNegative) {
+ if (this.isOptional) {
+ if (DEBUG) log.debug("- Filter is negative optional");
+
+ // Negative or ... may be slow
+ combinator.or(filterIter);
+ combinator.flip(0, maxDoc);
+
+ if (DEBUG) log.debug("- Filter is negative - so flipped to card {} (2)", combinator.cardinality());
+
+ bitset.or(combinator);
+ combinator.clear(0, maxDoc);
+ }
+
+ // Negative and
+ else {
+ if (DEBUG) log.debug("- Filter is negative not optional");
+ bitset.andNot(filterIter);
+ if (DEBUG) log.debug("- Filter is negative - so andNotted");
+ }
+ }
+ else if (this.isOptional) {
+ if (DEBUG) log.debug("- Filter is simply optional");
+ bitset.or(filterIter);
+ }
+ else {
+ if (DEBUG) log.debug("- Filter is simply not optional");
+ bitset.and(filterIter);
+ // TODO: Check with nextSetBit() if the filter is not applicable
+ };
+
+ if (DEBUG) log.debug("- Subresult has card {} ", bitset.cardinality());
+ };
+ };
+ return BitsFilteredDocIdSet.wrap(bitset, acceptDocs);
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
index 06bafd4..8b2ff4d 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilderNew.java
@@ -2,8 +2,10 @@
import java.util.*;
import java.io.IOException;
+// TEMPORARY:
import org.apache.lucene.queries.BooleanFilter;
import org.apache.lucene.search.BooleanClause;
+
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.*;
@@ -14,6 +16,7 @@
import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.KrillCollection;
+import de.ids_mannheim.korap.collection.BooleanGroupFilter;
public class CollectionBuilderNew {
@@ -144,7 +147,6 @@
return this.isOptional;
};
-
private ArrayList<CollectionBuilderInterface> operands;
public CollectionBuilderGroup (boolean optional) {
@@ -159,28 +161,26 @@
return this;
};
- public Filter toFilter () {
+ public Filter toFilter () {
if (this.operands == null || this.operands.isEmpty())
return null;
if (this.operands.size() == 1)
return this.operands.get(0).toFilter();
- BooleanFilter bool = new BooleanFilter();
+ // BooleanFilter bool = new BooleanFilter();
+ BooleanGroupFilter bool = new BooleanGroupFilter(this.isOptional);
Iterator<CollectionBuilderInterface> i = this.operands.iterator();
while (i.hasNext()) {
CollectionBuilderInterface cb = i.next();
if (cb.isNegative()) {
- bool.add(cb.toFilter(), BooleanClause.Occur.MUST_NOT);
- }
- else if (this.isOptional()) {
- bool.add(cb.toFilter(), BooleanClause.Occur.SHOULD);
+ bool.without(cb.toFilter());
}
else {
- bool.add(cb.toFilter(), BooleanClause.Occur.MUST);
- }
+ bool.with(cb.toFilter());
+ };
};
return bool;
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 5fed33b..eacf929 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-log4j.rootLogger = ERROR, stdout
+# log4j.rootLogger = ERROR, stdout
# Queries:
# log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -21,7 +21,7 @@
# Collections:
# log4j.logger.de.ids_mannheim.korap.collection.Filter = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.KrillCollection = TRACE, stdout
+log4j.logger.de.ids_mannheim.korap.KrillCollection = TRACE, stdout
# Responses:
# log4j.logger.de.ids_mannheim.korap.server.Node = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
new file mode 100644
index 0000000..5aade60
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -0,0 +1,207 @@
+package de.ids_mannheim.korap.collection;
+import java.io.IOException;
+
+import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.KrillCollectionNew;
+import de.ids_mannheim.korap.collection.CollectionBuilderNew;
+import de.ids_mannheim.korap.index.FieldDocument;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestKrillCollectionIndex {
+ private KrillIndex ki;
+
+ @Test
+ public void testIndexWithCollectionBuilder () throws IOException {
+ ki = new KrillIndex();
+ ki.addDoc(createDoc1());
+ ki.addDoc(createDoc2());
+ ki.addDoc(createDoc3());
+ ki.commit();
+ CollectionBuilderNew cb = new CollectionBuilderNew();
+ KrillCollectionNew kcn = new KrillCollectionNew(ki);
+
+ // Simple string tests
+ kcn.fromBuilder(cb.term("author", "Frank"));
+ assertEquals(1, kcn.docCount());
+
+ kcn.fromBuilder(cb.term("author", "Peter"));
+ assertEquals(1, kcn.docCount());
+
+ kcn.fromBuilder(cb.term("author", "Sebastian"));
+ assertEquals(1, kcn.docCount());
+
+ kcn.fromBuilder(cb.term("author", "Michael"));
+ assertEquals(0, kcn.docCount());
+
+ kcn.fromBuilder(cb.term("textClass", "reisen"));
+ assertEquals(3, kcn.docCount());
+
+ kcn.fromBuilder(cb.term("textClass", "kultur"));
+ assertEquals(2, kcn.docCount());
+
+ kcn.fromBuilder(cb.term("textClass", "finanzen"));
+ assertEquals(1, kcn.docCount());
+
+ // Simple orGroup tests
+ kcn.fromBuilder(cb.orGroup(cb.term("author", "Frank")).with(cb.term("author", "Michael")));
+ assertEquals(1, kcn.docCount());
+
+ kcn.fromBuilder(cb.orGroup(cb.term("author", "Frank")).with(cb.term("author", "Sebastian")));
+ assertEquals(2, kcn.docCount());
+
+ kcn.fromBuilder(cb.orGroup(cb.term("author", "Frank"))
+ .with(cb.term("author", "Sebastian"))
+ .with(cb.term("author", "Peter")));
+ assertEquals(3, kcn.docCount());
+
+ kcn.fromBuilder(cb.orGroup(cb.term("author", "Huhu"))
+ .with(cb.term("author", "Haha"))
+ .with(cb.term("author", "Hehe")));
+ assertEquals(0, kcn.docCount());
+
+ // Multi field orGroup tests
+ kcn.fromBuilder(cb.orGroup(cb.term("ID", "doc-1")).with(cb.term("author", "Peter")));
+ assertEquals(2, kcn.docCount());
+
+ kcn.fromBuilder(cb.orGroup(cb.term("ID", "doc-1")).with(cb.term("author", "Frank")));
+ assertEquals(1, kcn.docCount());
+
+ kcn.fromBuilder(cb.orGroup(cb.term("ID", "doc-1")).with(cb.term("author", "Michael")));
+ assertEquals(1, kcn.docCount());
+
+ // Simple andGroup tests
+ kcn.fromBuilder(cb.andGroup(cb.term("author", "Frank")).with(cb.term("author", "Michael")));
+ assertEquals(0, kcn.docCount());
+
+ kcn.fromBuilder(cb.andGroup(cb.term("ID", "doc-1")).with(cb.term("author", "Frank")));
+ assertEquals(1, kcn.docCount());
+
+ // andGroup in keyword field test
+ kcn.fromBuilder(cb.andGroup(cb.term("textClass", "reisen")).with(cb.term("textClass", "finanzen")));
+ assertEquals(1, kcn.docCount());
+
+ kcn.fromBuilder(cb.andGroup(cb.term("textClass", "reisen")).with(cb.term("textClass", "kultur")));
+ assertEquals(2, kcn.docCount());
+
+ kcn.fromBuilder(cb.andGroup(cb.term("textClass", "finanzen")).with(cb.term("textClass", "kultur")));
+ assertEquals(0, kcn.docCount());
+ };
+
+ @Test
+ public void testIndexWithNegation () throws IOException {
+ ki = new KrillIndex();
+ ki.addDoc(createDoc1());
+ ki.addDoc(createDoc2());
+ ki.addDoc(createDoc3());
+ ki.commit();
+ CollectionBuilderNew cb = new CollectionBuilderNew();
+ KrillCollectionNew kcn = new KrillCollectionNew(ki);
+
+ // Simple negation tests
+ kcn.fromBuilder(cb.term("author", "Frank").not());
+ assertEquals(2, kcn.docCount());
+
+ kcn.fromBuilder(cb.term("textClass", "reisen").not());
+ assertEquals(0, kcn.docCount());
+
+ kcn.fromBuilder(cb.term("textClass", "kultur").not());
+ assertEquals(1, kcn.docCount());
+
+ // orGroup with simple Negation
+ kcn.fromBuilder(
+ cb.orGroup(cb.term("textClass", "kultur").not()).with(cb.term("author", "Peter"))
+ );
+ assertEquals(2, kcn.docCount());
+
+ kcn.fromBuilder(
+ cb.orGroup(cb.term("textClass", "kultur").not()).with(cb.term("author", "Sebastian"))
+ );
+ assertEquals(1, kcn.docCount());
+
+ };
+
+ @Test
+ public void testIndexWithMultipleCommits () throws IOException {
+ ki = new KrillIndex();
+ ki.addDoc(createDoc1());
+ ki.addDoc(createDoc2());
+ ki.commit();
+ CollectionBuilderNew cb = new CollectionBuilderNew();
+ KrillCollectionNew kcn = new KrillCollectionNew(ki);
+
+ kcn.fromBuilder(cb.term("author", "Frank"));
+ assertEquals(1, kcn.docCount());
+ kcn.fromBuilder(cb.term("author", "Peter"));
+ assertEquals(1, kcn.docCount());
+ kcn.fromBuilder(cb.term("author", "Sebastian"));
+ assertEquals(0, kcn.docCount());
+ kcn.fromBuilder(cb.term("author", "Michael").not());
+ assertEquals(2, kcn.docCount());
+
+ // Add Sebastians doc
+ ki.addDoc(createDoc3());
+ ki.commit();
+
+ kcn.fromBuilder(cb.term("author", "Frank"));
+ assertEquals(1, kcn.docCount());
+ kcn.fromBuilder(cb.term("author", "Peter"));
+ assertEquals(1, kcn.docCount());
+ kcn.fromBuilder(cb.term("author", "Sebastian"));
+ assertEquals(1, kcn.docCount());
+ kcn.fromBuilder(cb.term("author", "Michael").not());
+ assertEquals(3, kcn.docCount());
+
+ // Remove one document
+ ki.delDocs("author", "Peter");
+ ki.commit();
+
+ kcn.fromBuilder(cb.term("author", "Frank"));
+ assertEquals(1, kcn.docCount());
+ kcn.fromBuilder(cb.term("author", "Peter"));
+ assertEquals(0, kcn.docCount());
+ kcn.fromBuilder(cb.term("author", "Sebastian"));
+ assertEquals(1, kcn.docCount());
+ kcn.fromBuilder(cb.term("author", "Michael").not());
+ assertEquals(2, kcn.docCount());
+ };
+
+ // Todo: Test index with removes
+ // Todo: Test with dates
+ // Todo: Test with regex
+
+ private FieldDocument createDoc1 () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addString("author", "Frank");
+ fd.addKeyword("textClass", "Nachricht Kultur Reisen");
+ fd.addInt("pubDate", 20051210);
+ fd.addText("text", "Der alte Mann ging über die Straße");
+ return fd;
+ };
+
+ private FieldDocument createDoc2 () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-2");
+ fd.addString("author", "Peter");
+ fd.addKeyword("textClass", "Kultur Reisen");
+ fd.addInt("pubDate", 20051207);
+ fd.addText("text", "Der junge Mann hatte keine andere Wahl");
+ return fd;
+ };
+
+ private FieldDocument createDoc3 () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-3");
+ fd.addString("author", "Sebastian");
+ fd.addKeyword("textClass", "Reisen Finanzen");
+ fd.addInt("pubDate", 20051216);
+ fd.addText("text", "Die Frau und der Mann küssten sich");
+ return fd;
+ };
+};
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java
index 870b725..0440483 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionNew.java
@@ -107,7 +107,7 @@
@Test
public void builderAndCombined () throws IOException {
CollectionBuilderNew kc = new CollectionBuilderNew();
- assertEquals("BooleanFilter(+author:tree +title:name)",
+ assertEquals("AndGroup(author:tree title:name)",
kc.andGroup(kc.term("author", "tree"))
.with(kc.term("title", "name")).toString());
};
@@ -115,7 +115,7 @@
@Test
public void builderAndNestedSimple () throws IOException {
CollectionBuilderNew kc = new CollectionBuilderNew();
- assertEquals("BooleanFilter(+author:tree +title:name)",
+ assertEquals("AndGroup(author:tree title:name)",
kc.andGroup(kc.andGroup(kc.term("author", "tree")).with(kc.term("title", "name"))).toString());
};
@@ -123,7 +123,7 @@
@Test
public void builderOrCombined () throws IOException {
CollectionBuilderNew kc = new CollectionBuilderNew();
- assertEquals("BooleanFilter(author:tree title:name)",
+ assertEquals("OrGroup(author:tree title:name)",
kc.orGroup(kc.term("author", "tree"))
.with(kc.term("title", "name")).toString());
};
@@ -131,7 +131,7 @@
@Test
public void builderOrNestedSimple () throws IOException {
CollectionBuilderNew kc = new CollectionBuilderNew();
- assertEquals("BooleanFilter(author:tree title:name)",
+ assertEquals("OrGroup(author:tree title:name)",
kc.orGroup(kc.orGroup(kc.term("author", "tree"))
.with(kc.term("title", "name"))).toString()
);
@@ -145,7 +145,7 @@
).with(
kc.andGroup(kc.term("author", "tree2")).with(kc.term("title", "name2"))
).toString();
- assertEquals("BooleanFilter(BooleanFilter(author:tree1 title:name1) BooleanFilter(+author:tree2 +title:name2))", g);
+ assertEquals("OrGroup(OrGroup(author:tree1 title:name1) AndGroup(author:tree2 title:name2))", g);
};
@Test
@@ -153,14 +153,14 @@
CollectionBuilderNew kc = new CollectionBuilderNew();
CollectionBuilderNew.CollectionBuilderInterface kbi = kc.orGroup(kc.term("author", "tree1")).with(kc.term("title", "name1"));
assertEquals(
- "BooleanFilter(author:tree1 title:name1)",
+ "OrGroup(author:tree1 title:name1)",
kbi.toString());
assertFalse(kbi.isNegative());
kbi = kc.andGroup(
kc.orGroup(kc.term("author", "tree1")).with(kc.term("title", "name1"))
).not();
- assertEquals("BooleanFilter(author:tree1 title:name1)", kbi.toString());
+ assertEquals("OrGroup(author:tree1 title:name1)", kbi.toString());
assertTrue(kbi.isNegative());
};