Adopted more legacy tests and removed search API from collections
Change-Id: I30d544b3f09b2b4971e2c39e793894498a62778e
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
index a41bc59..398c7b0 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -6,10 +6,21 @@
import de.ids_mannheim.korap.collection.CollectionBuilder;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.index.TextAnalyzer;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.KrillQuery;
+import de.ids_mannheim.korap.query.QueryBuilder;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+
+
import static org.junit.Assert.*;
import org.junit.Test;
@@ -316,6 +327,174 @@
};
+ @Test
+ public void filterExampleFromLegacy () throws Exception {
+
+ // Construct index
+ KrillIndex ki = new KrillIndex();
+ // Indexing test files
+ for (String i : new String[] { "00001", "00002", "00003", "00004",
+ "00005", "00006", "02439" }) {
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
+ true);
+ };
+ ki.commit();
+
+ // Create Virtual collections:
+ KrillCollection kc = new KrillCollection(ki);
+
+ assertEquals("Documents", 7, kc.numberOf("documents"));
+
+ // The virtual collection consists of all documents that have
+ // the textClass "reisen" and "freizeit"
+
+ /* kc.filter(kf.and("textClass", "reisen").and("textClass",
+ "freizeit-unterhaltung"));
+ */
+
+ kc.fromBuilder(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
+
+ assertEquals("Documents", 5, kc.numberOf("documents"));
+ assertEquals("Tokens", 1678, kc.numberOf("tokens"));
+ assertEquals("Sentences", 194, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
+
+
+ // Subset this to all documents that have also the text
+ // kc.filter(kf.and("textClass", "kultur"));
+ /*
+ kc.fromBuilder(
+ kc.build().andGroup().with(
+ kc.getBuilder()
+ ).with(
+ kc.build().term("textClass", "kultur")
+ )
+ );
+ */
+
+ kc.filter(kc.build().term("textClass", "kultur"));
+
+ assertEquals("Documents", 1, kc.numberOf("documents"));
+ assertEquals("Tokens", 405, kc.numberOf("tokens"));
+ assertEquals("Sentences", 75, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
+
+
+ // kc.filter(kf.and("corpusID", "WPD"));
+ kc.filter(kc.build().term("corpusID", "WPD"));
+
+ assertEquals("Documents", 1, kc.numberOf("documents"));
+ assertEquals("Tokens", 405, kc.numberOf("tokens"));
+ assertEquals("Sentences", 75, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
+
+ // Create a query
+ QueryBuilder kq = new QueryBuilder("tokens");
+ SpanQuery query = kq.seg("opennlp/p:NN").with("tt/p:NN").toQuery();
+
+ Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true, (short) 5);
+ assertEquals(kr.getTotalResults(), 70);
+
+
+ kc.extend(kc.build().term("textClass", "uninteresting"));
+ assertEquals("Documents", 1, kc.numberOf("documents"));
+
+ kc.extend(kc.build().term("textClass", "wissenschaft"));
+
+ assertEquals("Documents", 3, kc.numberOf("documents"));
+ assertEquals("Tokens", 1669, kc.numberOf("tokens"));
+ assertEquals("Sentences", 188, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
+ // System.err.println(kr.toJSON());
+ };
+
+
+ @Test
+ public void filterExampleAtomicLegacy () throws Exception {
+
+ // That's exactly the same test class, but with multiple atomic indices
+
+ // Construct index
+ KrillIndex ki = new KrillIndex();
+ // Indexing test files
+ for (String i : new String[] { "00001", "00002", "00003", "00004",
+ "00005", "00006", "02439" }) {
+ ki.addDoc(
+ getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
+ true);
+ ki.commit();
+ };
+
+ CollectionBuilder kf = new CollectionBuilder();
+
+ // Create Virtual collections:
+ KrillCollection kc = new KrillCollection(ki);
+
+ assertEquals("Documents", 7, kc.numberOf("documents"));
+
+ // If this is set - everything is fine automatically ...
+ kc.filter(kc.build().term("corpusID", "WPD"));
+
+ assertEquals("Documents", 7, kc.numberOf("documents"));
+
+ // The virtual collection consists of all documents that have the textClass "reisen" and "freizeit"
+
+ /*
+ kc.filter(kf.and("textClass", "reisen").and("textClass",
+ "freizeit-unterhaltung"));
+ */
+ kc.filter(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
+
+ assertEquals("Documents", 5, kc.numberOf("documents"));
+ assertEquals("Tokens", 1678, kc.numberOf("tokens"));
+ assertEquals("Sentences", 194, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
+
+ // Subset this to all documents that have also the text
+ // kc.filter(kf.and("textClass", "kultur"));
+
+ kc.filter(kc.build().term("textClass", "kultur"));
+
+ assertEquals("Documents", 1, kc.numberOf("documents"));
+ assertEquals("Tokens", 405, kc.numberOf("tokens"));
+ assertEquals("Sentences", 75, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
+
+ // This is already filtered though ...
+ // kc.filter(kf.and("corpusID", "WPD"));
+ kc.filter(kc.build().term("corpusID", "WPD"));
+
+ assertEquals("Documents", 1, kc.numberOf("documents"));
+ assertEquals("Tokens", 405, kc.numberOf("tokens"));
+ assertEquals("Sentences", 75, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
+
+ // Create a query
+ QueryBuilder kq = new QueryBuilder("tokens");
+ SpanQuery query = kq.seg("opennlp/p:NN").with("tt/p:NN").toQuery();
+
+ Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true, (short) 5);
+ assertEquals(kr.getTotalResults(), 70);
+
+ // kc.extend(kf.and("textClass", "uninteresting"));
+ kc.extend(kc.build().term("textClass", "uninteresting"));
+
+ /*
+
+
+ assertEquals("Documents", 1, kc.numberOf("documents"));
+
+ kc.extend(kf.and("textClass", "wissenschaft"));
+
+ assertEquals("Documents", 3, kc.numberOf("documents"));
+ assertEquals("Tokens", 1669, kc.numberOf("tokens"));
+ assertEquals("Sentences", 188, kc.numberOf("sentences"));
+ assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
+ */
+ };
+
+
private FieldDocument createDoc1 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-1");
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java
index 287ceba..8b067b5 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionLegacy.java
@@ -14,10 +14,10 @@
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.search.spans.SpanQuery;
import static org.junit.Assert.*;
import org.junit.Test;
+import org.junit.Ignore;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@@ -25,6 +25,7 @@
public class TestKrillCollectionLegacy {
@Test
+ @Ignore
public void filterExample () throws Exception {
// Construct index
@@ -92,6 +93,7 @@
@Test
+ @Ignore
public void filterExampleAtomic () throws Exception {
// That's exactly the same test class, but with multiple atomic indices
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
index 82724b0..464f4e1 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIndex.java
@@ -480,7 +480,7 @@
fail("Skipping may go horribly wrong! (Known issue)");
- kr = kc.search(sq);
+ kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
// System.err.println(kr.getOverview());
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index 2f23d35..01d1a4b 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -944,6 +944,7 @@
sc.right.setLength((short) 10);
Result kr = ks.apply(ki);
+
assertEquals(kr.getMatch(1).getSnippetBrackets(),
"... dezimalen [Wert] 65 sowohl ...");
assertEquals(kr.getTotalResults(), 3);
diff --git a/src/test/resources/queries/bsp-context-2.jsonld b/src/test/resources/queries/bsp-context-2.jsonld
index 481ab44..ddefd0e 100644
--- a/src/test/resources/queries/bsp-context-2.jsonld
+++ b/src/test/resources/queries/bsp-context-2.jsonld
@@ -1,29 +1,36 @@
{
- "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
- "query":{
- "@type":"koral:token",
- "wrap":{
- "@type":"koral:term",
- "foundry" : "mate",
- "layer":"l",
- "key":"wert",
- "match":"match:eq"
- }
- },
- "collections":[
- {
- "@type":"koral:meta-filter",
- "@value":{
- "@type":"koral:term",
- "@field":"koral:field#corpusID",
- "@value":"WPD"
- }
- }
- ],
- "meta":{
- "startPage":1,
- "count":25,
- "context":{"left":["char",210],"right":["char",210]},
- "cutOff":true
+ "@context": "http://ids-mannheim.de/ns/KorAP/json-ld/v0.1/context.jsonld",
+ "query":{
+ "@type":"koral:token",
+ "wrap":{
+ "@type":"koral:term",
+ "foundry" : "mate",
+ "layer":"l",
+ "key":"wert",
+ "match":"match:eq"
}
+ },
+ "collection" : {
+ "@type": "koral:doc",
+ "key": "corpusID",
+ "match": "match:eq",
+ "value": "WPD",
+ "type": "type:string"
+ },
+ "collections":[
+ {
+ "@type":"koral:meta-filter",
+ "@value":{
+ "@type":"koral:term",
+ "@field":"koral:field#corpusID",
+ "@value":"WPD"
+ }
+ }
+ ],
+ "meta":{
+ "startPage":1,
+ "count":25,
+ "context":{"left":["char",210],"right":["char",210]},
+ "cutOff":true
+ }
}
diff --git a/src/test/resources/queries/bsp-context-sentence.jsonld b/src/test/resources/queries/bsp-context-sentence.jsonld
index 28488b3..3ba2e94 100644
--- a/src/test/resources/queries/bsp-context-sentence.jsonld
+++ b/src/test/resources/queries/bsp-context-sentence.jsonld
@@ -10,6 +10,13 @@
"match":"match:eq"
}
},
+ "collection" : {
+ "@type": "koral:doc",
+ "key": "corpusID",
+ "match": "match:eq",
+ "value": "WPD",
+ "type": "type:string"
+ },
"collections":[
{
"@type":"koral:meta-filter",
diff --git a/src/test/resources/queries/bugs/multiterm_rewrite.jsonld b/src/test/resources/queries/bugs/multiterm_rewrite.jsonld
index 3ea96ea..e42d555 100644
--- a/src/test/resources/queries/bugs/multiterm_rewrite.jsonld
+++ b/src/test/resources/queries/bugs/multiterm_rewrite.jsonld
@@ -50,6 +50,13 @@
}
]
},
+ "collection" : {
+ "@type": "koral:doc",
+ "key": "corpusID",
+ "match": "match:eq",
+ "value": "WPD",
+ "type": "type:string"
+ },
"collections":[
{
"@type":"koral:meta-filter",
diff --git a/src/test/resources/queries/metaquery4.jsonld b/src/test/resources/queries/metaquery4.jsonld
index 0b6c263..3673d69 100644
--- a/src/test/resources/queries/metaquery4.jsonld
+++ b/src/test/resources/queries/metaquery4.jsonld
@@ -18,6 +18,26 @@
"match":"match:eq"
}
},
+ "collection" : {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "pubDate",
+ "match": "match:geq",
+ "value": "2000-01-01",
+ "type": "type:date"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "pubDate",
+ "match": "match:leq",
+ "value": "2013-12-31",
+ "type": "type:date"
+ }
+ ]
+ },
"collections": [
{
"@type": "koral:meta-filter",
diff --git a/src/test/resources/queries/metaquery5.jsonld b/src/test/resources/queries/metaquery5.jsonld
index 06141c3..2d2ccd4 100644
--- a/src/test/resources/queries/metaquery5.jsonld
+++ b/src/test/resources/queries/metaquery5.jsonld
@@ -18,6 +18,32 @@
"match":"match:eq"
}
},
+ "collection" : {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "pubDate",
+ "match": "match:geq",
+ "value": "2000-01-01",
+ "type": "type:date"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "pubDate",
+ "match": "match:leq",
+ "value": "2013-12-31",
+ "type": "type:date"
+ }
+ ]
+ }
+ ]
+ },
"collections": [
{
"@type": "koral:meta-filter",
diff --git a/src/test/resources/queries/metaquery6.jsonld b/src/test/resources/queries/metaquery6.jsonld
index 2d4b8e1..4abcd6f 100644
--- a/src/test/resources/queries/metaquery6.jsonld
+++ b/src/test/resources/queries/metaquery6.jsonld
@@ -17,6 +17,32 @@
"key":"lediglich"
}
},
+ "collection" : {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "key": "pubDate",
+ "match": "match:geq",
+ "value": "2005-01-01",
+ "type": "type:date"
+ },
+ {
+ "@type": "koral:doc",
+ "key": "pubDate",
+ "match": "match:leq",
+ "value": "2013-12-31",
+ "type": "type:date"
+ }
+ ]
+ }
+ ]
+ },
"collections": [
{
"@type": "koral:meta-filter",
diff --git a/src/test/resources/queries/metas/fields.jsonld b/src/test/resources/queries/metas/fields.jsonld
index 845f3f8..7bac417 100644
--- a/src/test/resources/queries/metas/fields.jsonld
+++ b/src/test/resources/queries/metas/fields.jsonld
@@ -1,6 +1,13 @@
{
"@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
"announcements" : [],
+ "collection" : {
+ "@type": "koral:doc",
+ "key": "corpusID",
+ "match": "match:eq",
+ "value": "WPD",
+ "type": "type:string"
+ },
"collections" : [
{
"@type" : "koral:meta-filter",
diff --git a/src/test/resources/queries/metas/fields_2.jsonld b/src/test/resources/queries/metas/fields_2.jsonld
index 02c2474..fa10af4 100644
--- a/src/test/resources/queries/metas/fields_2.jsonld
+++ b/src/test/resources/queries/metas/fields_2.jsonld
@@ -1,6 +1,14 @@
{
"@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.2/context.jsonld",
"announcements" : [],
+ "collection" : {
+ "@type": "koral:doc",
+ "key": "corpusID",
+ "match": "match:eq",
+ "value": "WPD",
+ "type": "type:string"
+ },
+
"collections" : [
{
"@type" : "koral:meta-filter",