Working Virtual Collections | Feature Freeze
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
new file mode 100644
index 0000000..0419fdd
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
@@ -0,0 +1,63 @@
+import java.io.*;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.KorapCollection;
+import de.ids_mannheim.korap.KorapFilter;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.KorapQuery;
+import de.ids_mannheim.korap.filter.BooleanFilter;
+import org.apache.lucene.search.spans.SpanQuery;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestKorapCollection {
+
+    @Test
+    public void filterExample () throws IOException {
+	
+	// Construct index
+	KorapIndex ki = new KorapIndex();
+	// Indexing test files
+	for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
+	    FieldDocument fd = ki.addDocFile(
+	      getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+            );
+	};
+	ki.commit();
+
+	KorapFilter kf = new KorapFilter();
+
+	// Create Virtual collections:
+	KorapCollection kc = new KorapCollection(ki);
+
+	// The virtual collection consists of all documents that have the textClass "reisen" and "freizeit"
+	kc.filter( kf.and("textClass", "reisen").and("textClass", "freizeit") );
+
+	// Subset this to all documents that have also the text
+	kc.filter( kf.and("textClass", "kultur") );
+
+	// Create a query
+	KorapQuery kq = new KorapQuery("tokens");
+	SpanQuery query = kq.seg("opennlp/p:NN").with("tt/p:NN").toQuery();
+	
+	// Get some statistics (This can be improved):
+	/*
+	System.err.println("Tokens in this virtual collection: " + kc.numberOf("tokens", "t"));
+	System.err.println("Paragraphs in this virtual collection: " + kc.numberOf("tokens", "p"));
+	System.err.println("Sentences in this virtual collection: " + kc.numberOf("tokens", "s"));
+	*/
+
+	KorapResult kr = kc.search(query);
+	// System.err.println(kr.toJSON());
+    };
+};
+
+
+
+// kc.filter( kf.and("textClass", "kultur").or("textClass", "wissenschaft") );
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java b/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java
index 8ccefa8..661e45b 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java
+++ b/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java
@@ -21,10 +21,10 @@
 
 	KorapFilter kf = new KorapFilter();
 
-	assertEquals("textClass:tree", kf.genre("tree").toString());
-	assertEquals("+textClass:tree +textClass:sport", kf.genre("tree").and("sport").toString());
-	assertEquals("(+textClass:tree +textClass:sport) textClass:news", kf.genre("tree").and("sport").or("news").toString());
-	assertEquals("textClass:tree textClass:sport textClass:news", kf.genre("tree", "sport", "news").toString());
+	assertEquals("+textClass:tree", kf.and("textClass","tree").toString());
+	assertEquals("+textClass:tree +textClass:sport", kf.and("textClass","tree").and("textClass","sport").toString());
+	assertEquals("+textClass:tree +textClass:sport textClass:news", kf.and("textClass","tree").and("textClass","sport").or("textClass","news").toString());
+	assertEquals("+textClass:tree +textClass:sport +textClass:news", kf.and("textClass", "tree", "sport", "news").toString());
     };
 
     @Test
@@ -32,48 +32,53 @@
 
 	KorapFilter kf = new KorapFilter();
 
-	assertEquals("pubDate:[20030604 TO 20030899]", kf.between("2003-06-04", "2003-08-99").toString());
-	assertEquals("pubDate:[0 TO 20030604]", kf.till("2003-06-04").toString());
-	assertEquals("pubDate:[20030604 TO 99999999]", kf.since("2003-06-04").toString());
-	assertEquals("pubDate:20030604", kf.date("2003-06-04").toString());
+	assertEquals("+pubDate:[20030604 TO 20030899]", kf.between("2003-06-04", "2003-08-99").toString());
+	assertEquals("+pubDate:[0 TO 20030604]", kf.till("2003-06-04").toString());
+	assertEquals("+pubDate:[20030604 TO 99999999]", kf.since("2003-06-04").toString());
+	assertEquals("+pubDate:20030604", kf.date("2003-06-04").toString());
     };
 
     @Test
     public void rangeLimited () throws IOException {
 
 	KorapFilter kf = new KorapFilter();
-	assertEquals("pubDate:[20050000 TO 20099999]", kf.between("2005", "2009").toString());
-       	assertEquals("pubDate:[20051000 TO 20090899]", kf.between("200510", "200908").toString());
-	assertEquals("pubDate:[20051000 TO 20090899]", kf.between("2005-10", "2009-08").toString());
-	assertEquals("pubDate:[20051006 TO 20090803]", kf.between("2005-1006", "2009-0803").toString());
-	assertEquals("pubDate:[20051006 TO 20090803]", kf.between("2005-10-06", "2009-08-03").toString());
 
-	assertEquals("pubDate:[0 TO 20059999]", kf.till("2005").toString());
-	assertEquals("pubDate:[0 TO 20051099]", kf.till("200510").toString());
-	assertEquals("pubDate:[0 TO 20051099]", kf.till("2005-10").toString());
-	assertEquals("pubDate:[0 TO 20051006]", kf.till("2005-1006").toString());
-	assertEquals("pubDate:[0 TO 20051006]", kf.till("2005-10-06").toString());
+	assertEquals("+pubDate:[20050000 TO 20099999]", kf.between("2005", "2009").toString());
+       	assertEquals("+pubDate:[20051000 TO 20090899]", kf.between("200510", "200908").toString());
+	assertEquals("+pubDate:[20051000 TO 20090899]", kf.between("2005-10", "2009-08").toString());
+	assertEquals("+pubDate:[20051006 TO 20090803]", kf.between("2005-1006", "2009-0803").toString());
+	assertEquals("+pubDate:[20051006 TO 20090803]", kf.between("2005-10-06", "2009-08-03").toString());
 
-	assertEquals("pubDate:[20050000 TO 99999999]", kf.since("2005").toString());
-	assertEquals("pubDate:[20051000 TO 99999999]", kf.since("200510").toString());
-	assertEquals("pubDate:[20051000 TO 99999999]", kf.since("2005-10").toString());
-	assertEquals("pubDate:[20051006 TO 99999999]", kf.since("2005-1006").toString());
-	assertEquals("pubDate:[20051006 TO 99999999]", kf.since("2005-10-06").toString());
+	assertEquals("+pubDate:[0 TO 20059999]", kf.till("2005").toString());
+	assertEquals("+pubDate:[0 TO 20051099]", kf.till("200510").toString());
+	assertEquals("+pubDate:[0 TO 20051099]", kf.till("2005-10").toString());
+	assertEquals("+pubDate:[0 TO 20051006]", kf.till("2005-1006").toString());
+	assertEquals("+pubDate:[0 TO 20051006]", kf.till("2005-10-06").toString());
 
-	assertEquals("pubDate:[20050000 TO 20059999]", kf.date("2005").toString());
-	assertEquals("pubDate:[20051000 TO 20051099]", kf.date("200510").toString());
-	assertEquals("pubDate:[20051000 TO 20051099]", kf.date("2005-10").toString());
-	assertEquals("pubDate:20051006", kf.date("2005-1006").toString());
-	assertEquals("pubDate:20051006", kf.date("2005-10-06").toString());
+	assertEquals("+pubDate:[20050000 TO 99999999]", kf.since("2005").toString());
+	assertEquals("+pubDate:[20051000 TO 99999999]", kf.since("200510").toString());
+	assertEquals("+pubDate:[20051000 TO 99999999]", kf.since("2005-10").toString());
+	assertEquals("+pubDate:[20051006 TO 99999999]", kf.since("2005-1006").toString());
+	assertEquals("+pubDate:[20051006 TO 99999999]", kf.since("2005-10-06").toString());
+
+	assertEquals("+pubDate:[20050000 TO 20059999]", kf.date("2005").toString());
+	assertEquals("+pubDate:[20051000 TO 20051099]", kf.date("200510").toString());
+	assertEquals("+pubDate:[20051000 TO 20051099]", kf.date("2005-10").toString());
+	assertEquals("+pubDate:20051006", kf.date("2005-1006").toString());
+	assertEquals("+pubDate:20051006", kf.date("2005-10-06").toString());
     };
 
     @Test
     public void rangeFailure () throws IOException {
 
 	KorapFilter kf = new KorapFilter();
-	assertNull(kf.between("aaaa-bb-cc", "aaaabbcc"));
-	assertNull(kf.till("aaaa-bb-cc"));
-	assertNull(kf.since("aaaa-bb-cc"));
-	assertNull(kf.date("aaaa-bb-cc"));
+	assertEquals("", kf.between("aaaa-bb-cc", "aaaabbcc").toString());
+	assertEquals("", kf.till("aaaa-bb-cc").toString());
+	assertEquals("", kf.since("aaaa-bb-cc").toString());
+	assertEquals("", kf.date("aaaa-bb-cc").toString());
     };
+
+
+    // TODO: More extensive testing!
+
 };
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestClassIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestClassIndex.java
index ff3abaf..4aa9d5f 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestClassIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestClassIndex.java
@@ -189,8 +189,8 @@
 	assertEquals("StartPos (1)", 4, kr.match(1).startPos);
 	assertEquals("EndPos (1)", 6, kr.match(1).endPos);
 
-	assertEquals("Document count", 1, ki.numberOf("documents"));
-	assertEquals("Token count", 10, ki.numberOf("t"));
+	assertEquals("Document count", 1, ki.numberOf("base", "documents"));
+	assertEquals("Token count", 10, ki.numberOf("base", "t"));
 
 
 	sq = new SpanNextQuery(
@@ -211,8 +211,8 @@
 	assertEquals("StartPos (1)", 3, kr.match(1).startPos);
 	assertEquals("EndPos (1)", 6, kr.match(1).endPos);
 
-	assertEquals(1, ki.numberOf("documents"));
-	assertEquals(10, ki.numberOf("t"));
+	assertEquals(1, ki.numberOf("base", "documents"));
+	assertEquals(10, ki.numberOf("base", "t"));
     };
 
 
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java
index a4ba6a2..2200069 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKorapIndex.java
@@ -49,8 +49,8 @@
 	/* Save documents */
 	ki.commit();
 
-	assertEquals(2, ki.numberOf("documents"));
-	assertEquals(7, ki.numberOf("sentences"));
+	assertEquals(2, ki.numberOf("base", "documents"));
+	assertEquals(7, ki.numberOf("base", "sentences"));
 
 
 	fd = new FieldDocument();
@@ -65,8 +65,8 @@
 	/* Save documents */
 	ki.commit();
 
-	assertEquals(3, ki.numberOf("documents"));
-	assertEquals(10, ki.numberOf("sentences"));
+	assertEquals(3, ki.numberOf("base", "documents"));
+	assertEquals(10, ki.numberOf("base", "sentences"));
 
 
 	// KorapQuery kq = new KorapQuery("text");
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
index c081f7c..32e9a72 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
@@ -154,8 +154,8 @@
 	assertEquals("EndPos (1)", 6, kr.match(1).endPos);
 	assertEquals("SnippetBrackets (1)", "abca[{1:b}{2:c}]abac", kr.match(1).snippetBrackets());
 
-	assertEquals("Document count", 1, ki.numberOf("documents"));
-	assertEquals("Token count", 10, ki.numberOf("t"));
+	assertEquals("Document count", 1, ki.numberOf("base", "documents"));
+	assertEquals("Token count", 10, ki.numberOf("base", "t"));
 
 
 	sq = new SpanMatchModifyQuery(
@@ -180,7 +180,7 @@
 	assertEquals("EndPos (1)", 6, kr.match(1).endPos);
 	assertEquals("SnippetBrackets (1)", "abca[bc]abac", kr.match(1).snippetBrackets());
 
-	assertEquals(1, ki.numberOf("documents"));
-	assertEquals(10, ki.numberOf("t"));
+	assertEquals(1, ki.numberOf("base", "documents"));
+	assertEquals(10, ki.numberOf("base", "t"));
     };
 };
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
index a3860ea..9b9aae7 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
@@ -85,8 +85,8 @@
 	assertEquals("StartPos (1)", 4, kr.match(1).startPos);
 	assertEquals("EndPos (1)", 6, kr.match(1).endPos);
 
-	assertEquals(1, ki.numberOf("documents"));
-	assertEquals(10, ki.numberOf("t"));
+	assertEquals(1, ki.numberOf("base", "documents"));
+	assertEquals(10, ki.numberOf("base", "t"));
 
 
 	sq = new SpanNextQuery(
@@ -105,8 +105,8 @@
 	assertEquals("StartPos (1)", 3, kr.match(1).startPos);
 	assertEquals("EndPos (1)", 6, kr.match(1).endPos);
 
-	assertEquals(1, ki.numberOf("documents"));
-	assertEquals(10, ki.numberOf("t"));
+	assertEquals(1, ki.numberOf("base", "documents"));
+	assertEquals(10, ki.numberOf("base", "t"));
 
     };
 
diff --git a/src/test/resources/wiki/readme.txt b/src/test/resources/wiki/readme.txt
new file mode 100644
index 0000000..32fc818
--- /dev/null
+++ b/src/test/resources/wiki/readme.txt
@@ -0,0 +1,7 @@
+00001: freizeit-unterhaltung,reisen,wissenschaft,populaerwissenschaft
+00002: freizeit-unterhaltung,reisen
+00003: kultur,musik
+00004: wissenschaft,populaerwissenschaft
+00005: freizeit-unterhaltung,reisen
+00006: freizeit-unterhaltung,reisen
+02439: kultur,musik,freizeit-unterhaltung,reisen