Support for 'or' and nested groups in collections
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
index 2d10b6f..bc8a5bb 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/filter/TestKorapCollectionJSON.java
@@ -18,8 +18,8 @@
KorapCollection kc = new KorapCollection(metaQuery);
assertEquals("filter with QueryWrapperFilter(+textClass:wissenschaft)", kc.getFilter(0).toString());
- assertEquals("filter with QueryWrapperFilter(+pubPlace:Erfurt +author:Hesse)", kc.getFilter(1).toString());
- assertEquals("extend with QueryWrapperFilter(+pubDate:[20110429 TO 20131231] +textClass:freizeit)", kc.getFilter(2).toString());
+ assertEquals("filter with QueryWrapperFilter(+(+pubPlace:Erfurt +author:Hesse))", kc.getFilter(1).toString());
+ assertEquals("extend with QueryWrapperFilter(+(+pubDate:[20110429 TO 20131231] +textClass:freizeit))", kc.getFilter(2).toString());
assertEquals(3, kc.getCount());
};
@@ -29,7 +29,7 @@
String metaQuery = getString(getClass().getResource("/queries/metaquery2.jsonld").getFile());
KorapCollection kc = new KorapCollection(metaQuery);
assertEquals(1,kc.getCount());
- assertEquals("filter with QueryWrapperFilter(+author:Hesse +pubDate:[0 TO 20131205])",kc.getFilter(0).toString());
+ assertEquals("filter with QueryWrapperFilter(+(+author:Hesse +pubDate:[0 TO 20131205]))",kc.getFilter(0).toString());
};
@Test
@@ -40,6 +40,16 @@
assertEquals("filter with QueryWrapperFilter(+pubDate:[20000101 TO 20131231])",kc.getFilter(0).toString());
};
+ @Test
+ public void metaQuery7 () {
+ String metaQuery = getString(getClass().getResource("/queries/metaquery7.jsonld").getFile());
+ KorapCollection kc = new KorapCollection(metaQuery);
+ assertEquals(2,kc.getCount());
+ assertEquals("filter with QueryWrapperFilter(+(corpusID:c-1 corpusID:c-2))",kc.getFilter(0).toString());
+ assertEquals("filter with QueryWrapperFilter(+(+corpusID:d-1 +corpusID:d-2))",kc.getFilter(1).toString());
+ };
+
+
public static String getString (String path) {
StringBuilder contentBuilder = new StringBuilder();
diff --git a/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java b/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java
index 661e45b..4651dbf 100644
--- a/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java
+++ b/src/test/java/de/ids_mannheim/korap/filter/TestKorapFilter.java
@@ -25,6 +25,8 @@
assertEquals("+textClass:tree +textClass:sport", kf.and("textClass","tree").and("textClass","sport").toString());
assertEquals("+textClass:tree +textClass:sport textClass:news", kf.and("textClass","tree").and("textClass","sport").or("textClass","news").toString());
assertEquals("+textClass:tree +textClass:sport +textClass:news", kf.and("textClass", "tree", "sport", "news").toString());
+
+ assertEquals("corpusID:c-1 corpusID:c-2 corpusID:c-3", kf.or("corpusID", "c-1", "c-2", "c-3").toString());
};
@Test
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java
index 3ac5a32..6be837f 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRealIndex.java
@@ -12,6 +12,7 @@
import org.apache.lucene.store.MMapDirectory;
import de.ids_mannheim.korap.filter.BooleanFilter;
import org.apache.lucene.search.spans.SpanQuery;
+import de.ids_mannheim.korap.util.QueryException;
import static org.junit.Assert.*;
import org.junit.Test;
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
index 9b5be8b..ecbf426 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKorapSearch.java
@@ -279,6 +279,69 @@
};
+ @Test
+ public void searchJSONCollection () throws IOException {
+
+ // Construct index
+ KorapIndex ki = new KorapIndex();
+ // Indexing test files
+ for (String i : new String[] {"00001", "00002", "00003", "00004", "00005", "00006", "02439"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ );
+ };
+ ki.commit();
+
+ String json = getString(getClass().getResource("/queries/metaquery8-nocollection.jsonld").getFile());
+
+ KorapSearch ks = new KorapSearch(json);
+ KorapResult kr = ks.run(ki);
+ assertEquals(276, kr.getTotalResults());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(10, kr.getItemsPerPage());
+
+ json = getString(getClass().getResource("/queries/metaquery8.jsonld").getFile());
+
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+
+ assertEquals(147, kr.getTotalResults());
+ assertEquals("WPD_AAA.00001", kr.getMatch(0).getDocID());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(10, kr.getItemsPerPage());
+
+ json = getString(getClass().getResource("/queries/metaquery8-filtered.jsonld").getFile());
+
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+
+ assertEquals(28, kr.getTotalResults());
+ assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(10, kr.getItemsPerPage());
+
+ json = getString(getClass().getResource("/queries/metaquery8-filtered-further.jsonld").getFile());
+
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+
+ assertEquals(0, kr.getTotalResults());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(10, kr.getItemsPerPage());
+
+ json = getString(getClass().getResource("/queries/metaquery8-filtered-nested.jsonld").getFile());
+
+ ks = new KorapSearch(json);
+ kr = ks.run(ki);
+
+ assertEquals("filter with QueryWrapperFilter(+(ID:WPD_AAA.00003 (+tokens:s:die +tokens:s:Schriftzeichen)))", ks.getCollection().getFilter(1).toString());
+
+ assertEquals(119, kr.getTotalResults());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(10, kr.getItemsPerPage());
+
+ };
+
public static String getString (String path) {
StringBuilder contentBuilder = new StringBuilder();
diff --git a/src/test/resources/queries/metaquery7.jsonld b/src/test/resources/queries/metaquery7.jsonld
new file mode 100644
index 0000000..a7114e6
--- /dev/null
+++ b/src/test/resources/queries/metaquery7.jsonld
@@ -0,0 +1,60 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "meta":{
+ "startPage" : 1,
+ "count" : 5,
+ "context" : {
+ "left" : [ "token", 3 ],
+ "right" : [ "char", 6 ]
+ }
+ },
+ "query":{
+ "@type":"korap:token",
+ "wrap":{
+ "@type":"korap:term",
+ "foundry":"mate",
+ "layer":"l",
+ "key":"lediglich"
+ }
+ },
+ "collections": [
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "or",
+ "@field": "korap:field#corpusID",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "c-1"
+ },
+ {
+ "@type": "korap:term",
+ "@value": "c-2"
+ }
+ ]
+ }
+ },
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "and",
+ "@field": "korap:field#corpusID",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "d-1"
+ },
+ {
+ "@type": "korap:term",
+ "@value": "d-2"
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/test/resources/queries/metaquery8-filtered-further.jsonld b/src/test/resources/queries/metaquery8-filtered-further.jsonld
new file mode 100644
index 0000000..945a2fa
--- /dev/null
+++ b/src/test/resources/queries/metaquery8-filtered-further.jsonld
@@ -0,0 +1,80 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "korap:token",
+ "wrap": {
+ "@type": "korap:term",
+ "foundry": "mate",
+ "layer" : "lemma",
+ "key":"der",
+ "match": "match:eq"
+ }
+ },
+ "meta":{
+ "startPage":1,
+ "count": 10,
+ "context":{
+ "left":["char",90],
+ "right":["char",90]
+ }
+ },
+ "collections": [
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "or",
+ "@field": "korap:field#ID",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00001"
+ },
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00002"
+ }
+ ]
+ }
+ },
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "or",
+ "@field": "korap:field#ID",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00003"
+ },
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00002"
+ }
+ ]
+ }
+ },
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "or",
+ "@field": "korap:field#ID",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00001"
+ },
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00005"
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/test/resources/queries/metaquery8-filtered-nested.jsonld b/src/test/resources/queries/metaquery8-filtered-nested.jsonld
new file mode 100644
index 0000000..ebb56ab
--- /dev/null
+++ b/src/test/resources/queries/metaquery8-filtered-nested.jsonld
@@ -0,0 +1,73 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "korap:token",
+ "wrap": {
+ "@type": "korap:term",
+ "foundry": "mate",
+ "layer" : "lemma",
+ "key":"der",
+ "match": "match:eq"
+ }
+ },
+ "meta":{
+ "startPage":1,
+ "count": 10,
+ "context":{
+ "left":["char",90],
+ "right":["char",90]
+ }
+ },
+ "collections": [
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "or",
+ "@field": "korap:field#ID",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00001"
+ },
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00002"
+ }
+ ]
+ }
+ },
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "or",
+ "@field": "korap:field#ID",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00003"
+ },
+ {
+ "@type": "korap:group",
+ "relation": "and",
+ "@field": "korap:field#tokens",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "s:die"
+ },
+ {
+ "@type": "korap:term",
+ "@value": "s:Schriftzeichen"
+ }
+
+ ]
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/test/resources/queries/metaquery8-filtered.jsonld b/src/test/resources/queries/metaquery8-filtered.jsonld
new file mode 100644
index 0000000..ddb51cf
--- /dev/null
+++ b/src/test/resources/queries/metaquery8-filtered.jsonld
@@ -0,0 +1,61 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "korap:token",
+ "wrap": {
+ "@type": "korap:term",
+ "foundry": "mate",
+ "layer" : "lemma",
+ "key":"der",
+ "match": "match:eq"
+ }
+ },
+ "meta":{
+ "startPage":1,
+ "count": 10,
+ "context":{
+ "left":["char",90],
+ "right":["char",90]
+ }
+ },
+ "collections": [
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "or",
+ "@field": "korap:field#ID",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00001"
+ },
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00002"
+ }
+ ]
+ }
+ },
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "or",
+ "@field": "korap:field#ID",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00003"
+ },
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00002"
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/test/resources/queries/metaquery8-nocollection.jsonld b/src/test/resources/queries/metaquery8-nocollection.jsonld
new file mode 100644
index 0000000..6edf506
--- /dev/null
+++ b/src/test/resources/queries/metaquery8-nocollection.jsonld
@@ -0,0 +1,21 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "korap:token",
+ "wrap": {
+ "@type": "korap:term",
+ "foundry": "mate",
+ "layer" : "lemma",
+ "key":"der",
+ "match": "match:eq"
+ }
+ },
+ "meta":{
+ "startPage":1,
+ "count": 10,
+ "context":{
+ "left":["char",90],
+ "right":["char",90]
+ }
+ }
+}
diff --git a/src/test/resources/queries/metaquery8.jsonld b/src/test/resources/queries/metaquery8.jsonld
new file mode 100644
index 0000000..77dfc4c
--- /dev/null
+++ b/src/test/resources/queries/metaquery8.jsonld
@@ -0,0 +1,42 @@
+{
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query": {
+ "@type": "korap:token",
+ "wrap": {
+ "@type": "korap:term",
+ "foundry": "mate",
+ "layer" : "lemma",
+ "key":"der",
+ "match": "match:eq"
+ }
+ },
+ "meta":{
+ "startPage":1,
+ "count": 10,
+ "context":{
+ "left":["char",90],
+ "right":["char",90]
+ }
+ },
+ "collections": [
+ {
+ "@type": "korap:meta-filter",
+ "@id": "korap-filter#id-1223232",
+ "@value": {
+ "@type": "korap:group",
+ "relation": "or",
+ "@field": "korap:field#ID",
+ "operands": [
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00001"
+ },
+ {
+ "@type": "korap:term",
+ "@value": "WPD_AAA.00002"
+ }
+ ]
+ }
+ }
+ ]
+}