Real index test [broken]
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
index 2e557b4..ea35a63 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
+++ b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollection.java
@@ -52,6 +52,13 @@
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
+ kc.filter(kf.and("corpusID", "WPD"));
+
+ assertEquals("Documents", 1, kc.numberOf("documents"));
+ assertEquals("Tokens", 405, kc.numberOf("tokens"));
+ assertEquals("Sentences", 75, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
+
// Create a query
KorapQuery kq = new KorapQuery("tokens");
SpanQuery query = kq.seg("opennlp/p:NN").with("tt/p:NN").toQuery();
@@ -67,7 +74,55 @@
assertEquals("Tokens", 1669, kc.numberOf("tokens"));
assertEquals("Sentences", 188, kc.numberOf("sentences"));
assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
- System.err.println(kr.toJSON());
+ // System.err.println(kr.toJSON());
+ };
+
+
+ @Test
+ public void filterExample2 () throws IOException {
+
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ FieldDocument fd;
+ // Indexing test files
+ for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
+ fd = ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ );
+ };
+ ki.commit();
+
+ fd = ki.addDocFile(getClass().getResource("/wiki/AUG-55286.json.gz").getFile(), true);
+
+ ki.commit();
+
+ KorapFilter kf = new KorapFilter();
+
+ // Create Virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
+ kc.filter( kf.and("textClass", "reisen").and("textClass", "freizeit-unterhaltung") );
+ assertEquals("Documents", 6, kc.numberOf("documents"));
+ assertEquals("Tokens", 2089, kc.numberOf("tokens"));
+ assertEquals("Sentences", 234, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 141, kc.numberOf("paragraphs"));
+
+ kc.filter( kf.and("corpusID", "A00") );
+
+ assertEquals("Documents", 1, kc.numberOf("documents"));
+ assertEquals("Tokens", 411, kc.numberOf("tokens"));
+ assertEquals("Sentences", 40, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 2, kc.numberOf("paragraphs"));
+
+ // assertEquals("Documents", 1, kc.numberOf("documents"));
+
+ // Create a query
+ KorapQuery kq = new KorapQuery("tokens");
+ SpanQuery query = kq.seg("opennlp/p:NN").with("tt/p:NN").toQuery();
+
+ KorapResult kr = kc.search(query);
+
+ assertEquals(87, kr.totalResults());
+ // System.out.println(kr.toJSON());
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java
new file mode 100644
index 0000000..f7fcac1
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java
@@ -0,0 +1,74 @@
+import java.util.*;
+import java.io.*;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.KorapCollection;
+import de.ids_mannheim.korap.KorapFilter;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.KorapQuery;
+import org.apache.lucene.store.MMapDirectory;
+import de.ids_mannheim.korap.filter.BooleanFilter;
+import org.apache.lucene.search.spans.SpanQuery;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.junit.Ignore;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestRealIndex {
+
+ @Test
+ public void realExample1 () throws IOException {
+
+ // Load configuration file
+ Properties prop = new Properties();
+ FileReader fr = new FileReader(getClass().getResource("/korap.conf").getFile());
+ prop.load(fr);
+
+ // Check if the configuration was loaded fine
+ assertEquals(prop.getProperty("lucene.properties"), "true");
+
+ String indexDir = prop.getProperty("lucene.index");
+
+ // Get the real index
+ KorapIndex ki = new KorapIndex(new MMapDirectory(new File(indexDir)));
+
+ // Create a container for virtual collections:
+ KorapCollection kc = new KorapCollection(ki);
+
+ // Construct filter generator
+ KorapFilter kf = new KorapFilter();
+
+ // The virtual collection consists of all documents that have
+ // the textClasses "reisen" and "freizeit"
+ // kc.filter( kf.and("textClass", "reisen").and("textClass", "freizeit-unterhaltung") );
+
+ // This is real slow atm - sorry
+ kc.filter(kf.and("textClass", "kultur"));
+ // kc.filter(kf.and("ID", "A00_JAN.02873"));
+
+
+ // Create a query
+ KorapQuery kq = new KorapQuery("tokens");
+
+ SpanQuery query =
+ kq.within(
+ kq.tag("xip/const:NPA"),
+ kq._(1,
+ kq.seq(
+ kq._(2, kq.seg("cnx/p:A").with("mate/m:number:sg"))
+ ).append(
+ kq.seg("opennlp/p:NN").with("tt/p:NN")
+ )
+ )
+ ).toQuery();
+
+
+ KorapResult kr = kc.search(query);
+
+ System.err.println(kr.toJSON());
+ };
+};
\ No newline at end of file
diff --git a/src/test/resources/korap.conf b/src/test/resources/korap.conf
new file mode 100644
index 0000000..555b1a8
--- /dev/null
+++ b/src/test/resources/korap.conf
@@ -0,0 +1,5 @@
+# Lucene Backend properties
+lucene.properties = true
+lucene.index = /home/ndiewald/Repositories/korap/KorAP-modules/KorAP-lucene-index/sandbox/index
+lucene.index.commit.count = 5000
+lucene.index.commit.log = log/korap.commit.log
\ No newline at end of file
diff --git a/src/test/resources/queries/bsp1.json b/src/test/resources/queries/bsp1.json
new file mode 100644
index 0000000..4085309
--- /dev/null
+++ b/src/test/resources/queries/bsp1.json
@@ -0,0 +1,76 @@
+{
+ "@context":{
+ "korap":"http://korap.ids-mannheim.de/ns/query",
+ "@language":"de",
+ "operands":{
+ "@id":"korap:operands",
+ "@container":"@list"
+ },
+ "relation":{
+ "@id":"korap:relation",
+ "@type":"korap:relation#types"
+ },
+ "class":{
+ "@id":"korap:class",
+ "@type":"xsd:integer"
+ },
+ "query":"korap:query",
+ "filter":"korap:filter",
+ "meta":"korap:meta"
+ },
+ "query":{
+ "@type":"korap:group",
+ "relation":"or",
+ "operands":[
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:foo",
+ "relation":"="
+ }
+ },
+ {
+ "@type":"korap:sequence",
+ "operands":[
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:foo",
+ "relation":"="
+ }
+ },
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:bar",
+ "relation":"="
+ },
+ "repetition":"*"
+ }
+ ]
+ }
+ ]
+ },
+ "meta":{
+ "@type":"korap:meta",
+ "@value":{
+ "@type":"korap:group",
+ "operands":[
+ {
+ "@type":"korap:term",
+ "@value":"author:Goethe",
+ "relation":"="
+ },
+ {
+ "@type":"korap:term",
+ "@value":"year:1815",
+ "relation":"="
+ }
+ ],
+ "relation":"and"
+ }
+ }
+}
diff --git a/src/test/resources/queries/bsp2.json b/src/test/resources/queries/bsp2.json
new file mode 100644
index 0000000..f97a4fd
--- /dev/null
+++ b/src/test/resources/queries/bsp2.json
@@ -0,0 +1,56 @@
+{
+ "@context":{
+ "korap":"http://korap.ids-mannheim.de/ns/query",
+ "@language":"de",
+ "operands":{
+ "@id":"korap:operands",
+ "@container":"@list"
+ },
+ "relation":{
+ "@id":"korap:relation",
+ "@type":"korap:relation#types"
+ },
+ "class":{
+ "@id":"korap:class",
+ "@type":"xsd:integer"
+ },
+ "query":"korap:query",
+ "filter":"korap:filter",
+ "meta":"korap:meta"
+ },
+ "query":{
+ "@type":"korap:sequence",
+ "operands":[
+ {
+ "@type":"korap:group",
+ "relation":"or",
+ "operands":[
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:foo",
+ "relation":"="
+ }
+ },
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:bar",
+ "relation":"="
+ }
+ }
+ ]
+ },
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:foobar",
+ "relation":"="
+ }
+ }
+ ]
+ }
+}
diff --git a/src/test/resources/queries/bsp3.json b/src/test/resources/queries/bsp3.json
new file mode 100644
index 0000000..10a9f21
--- /dev/null
+++ b/src/test/resources/queries/bsp3.json
@@ -0,0 +1,41 @@
+{
+ "@context":{
+ "korap":"http://korap.ids-mannheim.de/ns/query",
+ "@language":"de",
+ "operands":{
+ "@id":"korap:operands",
+ "@container":"@list"
+ },
+ "relation":{
+ "@id":"korap:relation",
+ "@type":"korap:relation#types"
+ },
+ "class":{
+ "@id":"korap:class",
+ "@type":"xsd:integer"
+ },
+ "query":"korap:query",
+ "filter":"korap:filter",
+ "meta":"korap:meta"
+ },
+ "query":{
+ "@type":"korap:sequence",
+ "shrink":"0",
+ "operands":[
+ {
+ "@type":"korap:group",
+ "class":"0",
+ "operands":[
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:foo",
+ "relation":"="
+ }
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/test/resources/queries/bsp4.json b/src/test/resources/queries/bsp4.json
new file mode 100644
index 0000000..e98afbd
--- /dev/null
+++ b/src/test/resources/queries/bsp4.json
@@ -0,0 +1,49 @@
+{
+ "@context":{
+ "korap":"http://korap.ids-mannheim.de/ns/query",
+ "@language":"de",
+ "operands":{
+ "@id":"korap:operands",
+ "@container":"@list"
+ },
+ "relation":{
+ "@id":"korap:relation",
+ "@type":"korap:relation#types"
+ },
+ "class":{
+ "@id":"korap:class",
+ "@type":"xsd:integer"
+ },
+ "query":"korap:query",
+ "filter":"korap:filter",
+ "meta":"korap:meta"
+ },
+ "query":{
+ "@type":"korap:sequence",
+ "shrink":"0",
+ "operands":[
+ {
+ "@type":"korap:group",
+ "class":"0",
+ "operands":[
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:foo",
+ "relation":"="
+ }
+ }
+ ]
+ },
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"orth:bar",
+ "relation":"="
+ }
+ }
+ ]
+ }
+}
diff --git a/src/test/resources/queries/bsp5.json b/src/test/resources/queries/bsp5.json
new file mode 100644
index 0000000..246df1e
--- /dev/null
+++ b/src/test/resources/queries/bsp5.json
@@ -0,0 +1,49 @@
+{
+ "@context":{
+ "korap":"http://korap.ids-mannheim.de/ns/query",
+ "@language":"de",
+ "operands":{
+ "@id":"korap:operands",
+ "@container":"@list"
+ },
+ "relation":{
+ "@id":"korap:relation",
+ "@type":"korap:relation#types"
+ },
+ "class":{
+ "@id":"korap:class",
+ "@type":"xsd:integer"
+ },
+ "query":"korap:query",
+ "filter":"korap:filter",
+ "meta":"korap:meta"
+ },
+ "query":{
+ "@type":"korap:sequence",
+ "shrink":"1",
+ "operands":[
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:Der",
+ "relation":"="
+ }
+ },
+ {
+ "@type":"korap:group",
+ "class":"1",
+ "operands":[
+ {
+ "@type":"korap:token",
+ "@value":{
+ "@type":"korap:term",
+ "@value":"base:Mann",
+ "relation":"="
+ }
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/src/test/resources/queries/readme.txt b/src/test/resources/queries/readme.txt
new file mode 100644
index 0000000..b82454f
--- /dev/null
+++ b/src/test/resources/queries/readme.txt
@@ -0,0 +1,5 @@
+bsp1.json: [base=foo]|([base=foo][base=bar])* meta author=Goethe&year=1815
+bsp2.json: ([base=foo]|[base=bar])[base=foobar]
+bsp3.json: shrink({[base=Mann]})
+bsp4.json: shrink({[base=foo]}[orth=bar])
+bsp5.json: shrink(1:[base=Der]{1:[base=Mann]})