Moved and updated cache-tests from TestKrillCollectionIndex to
TestVirtualCorpusCache (resolved #44).
Change-Id: Ia5a0bf208fe8f95bc38ea118bd6d27172eda808f
diff --git a/Changes b/Changes
index 48d360c..d58d561 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,8 @@
-0.60.2 2022-01-03
+0.60.2 2022-01-11
- [security] More log4j updates (diewald)
- [feature] Support for field value vector method (fixes #81; diewald)
+ - [cleanup] Moved and updated cache-tests from TestKrillCollectionIndex to
+ TestVirtualCorpusCache (resolved #44; margaretha)
0.60.1 2021-12-17
- [feature] Added vc loading from classpath (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/collection/VirtualCorpusFilter.java b/src/main/java/de/ids_mannheim/korap/collection/VirtualCorpusFilter.java
index 883e069..489f944 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/VirtualCorpusFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/VirtualCorpusFilter.java
@@ -100,6 +100,6 @@
@Override
public String toString () {
- return "VirtualCorpusReferenceFilter("+vcId+")";
+ return "vcFilter("+vcId+")";
}
}
diff --git a/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java b/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java
index 2235f2a..9630767 100644
--- a/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java
+++ b/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java
@@ -14,15 +14,25 @@
import org.junit.Test;
import de.ids_mannheim.korap.Krill;
+import de.ids_mannheim.korap.KrillCollection;
import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.collection.CollectionBuilder;
import de.ids_mannheim.korap.collection.DocBits;
+import de.ids_mannheim.korap.collection.TestKrillCollectionIndex;
+import de.ids_mannheim.korap.query.QueryBuilder;
import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.KrillProperties;
import de.ids_mannheim.korap.util.QueryException;
public class TestVirtualCorpusCache {
private KrillIndex ki;
private String queryRefJson;
+ private String queryRefJson2;
+ private String named_vc1 = "named-vc1";
+ private String named_vc2 = "named-vc2";
+ private String named_vc3 = "named-vc3";
+ private String named_vc4 = "named-vc4";
public TestVirtualCorpusCache () throws IOException {
ki = createIndex();
@@ -30,6 +40,10 @@
String file = "/queries/collections/vc-ref/query-with-vc-ref.jsonld";
InputStream is = getClass().getResourceAsStream(file);
queryRefJson = IOUtils.toString(is, "utf-8");
+
+ file = "/queries/collections/vc-ref/query-with-vc-ref2.jsonld";
+ is = getClass().getResourceAsStream(file);
+ queryRefJson2 = IOUtils.toString(is, "utf-8");
}
@@ -68,12 +82,13 @@
assertFalse(VirtualCorpusCache.contains(vcId));
}
+
@Test
public void testReferToUncachedVC () throws IOException, QueryException {
String vcId = "named-vc1";
assertFalse(VirtualCorpusCache.contains(vcId));
- Krill krill = new Krill(queryRefJson);
+ Krill krill = new Krill(queryRefJson2);
Result result = krill.apply(ki);
assertEquals(27, result.getTotalResults());
@@ -92,36 +107,33 @@
// VC cache will be marked for cleaning up
// because of storing a new VC
KrillIndex ki = createIndex();
- Krill krill = new Krill(queryRefJson);
+ Krill krill = new Krill(queryRefJson2);
Result result = krill.apply(ki);
assertEquals(27, result.getTotalResults());
- assertEquals(2,
- VirtualCorpusCache.map.get(vcId).keySet().size());
+ assertEquals(2, VirtualCorpusCache.map.get(vcId).keySet().size());
ki.delDoc(2);
ki.commit();
// VC cache will be marked for cleaning up again
// because of index change.
- krill = new Krill(queryRefJson);
+ krill = new Krill(queryRefJson2);
result = krill.apply(ki);
assertEquals(17, result.getTotalResults());
// The old leaf fingerprint should be cleaned up, thus the map
// should have the same size. But the fingerprints should be
// different from before the 1st cleaning up
- assertEquals(2,
- VirtualCorpusCache.map.get(vcId).keySet().size());
+ assertEquals(2, VirtualCorpusCache.map.get(vcId).keySet().size());
// VC cache will be cleaned up for the 2nd time
// resulting the same leaf-fingerprints
- krill = new Krill(queryRefJson);
+ krill = new Krill(queryRefJson2);
result = krill.apply(ki);
assertEquals(17, result.getTotalResults());
- assertEquals(2,
- VirtualCorpusCache.map.get(vcId).keySet().size());
+ assertEquals(2, VirtualCorpusCache.map.get(vcId).keySet().size());
ki.close();
@@ -129,22 +141,343 @@
assertFalse(VirtualCorpusCache.contains(vcId));
}
+
@Test
public void testCleanUpVC () throws QueryException, IOException {
VirtualCorpusCache.CAPACITY = 3;
-
+
VirtualCorpusCache.store("named-vc1", ki);
VirtualCorpusCache.store("named-vc2", ki);
VirtualCorpusCache.store("named-vc3", ki);
VirtualCorpusCache.store("named-vc4", ki);
-
+
assertEquals(3, VirtualCorpusCache.map.size());
assertEquals(4, VirtualCorpusCache.vcToCleanUp.size());
-
- Krill krill = new Krill(queryRefJson);
+
+ Krill krill = new Krill(queryRefJson2);
Result result = krill.apply(ki);
assertEquals(27, result.getTotalResults());
-
+
+ VirtualCorpusCache.reset();
+ assertFalse(VirtualCorpusCache.contains(named_vc1));
+ assertFalse(VirtualCorpusCache.contains(named_vc2));
+ assertFalse(VirtualCorpusCache.contains(named_vc3));
+ assertFalse(VirtualCorpusCache.contains(named_vc4));
+
+ }
+
+ // The following tests have been moved from TestKrillCollectionIndex
+ // and updated according to the new cache mechanism
+
+
+ @Test
+ public void testCache () throws IOException, QueryException {
+ KrillProperties.loadDefaultProperties();
+
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(TestKrillCollectionIndex.createDoc1());
+ ki.addDoc(TestKrillCollectionIndex.createDoc2());
+ ki.commit();
+
+ // add VC to cache manually
+ VirtualCorpusCache.store(named_vc1, ki);
+ VirtualCorpusCache.store(named_vc2, ki);
+
+ assertTrue(VirtualCorpusCache.contains(named_vc1));
+ assertTrue(VirtualCorpusCache.contains(named_vc2));
+
+ // Check for cache location
+ // assertFalse(KrillCollection.cache.isElementInMemory("named-vc1"));
+ // assertTrue(KrillCollection.cache.isElementOnDisk("named-vc1"));
+ // assertTrue(KrillCollection.cache.isElementInMemory("named-vc2"));
+ // assertTrue(KrillCollection.cache.isElementOnDisk("named-vc2"));
+
+ // references named-vc1: ID eq ["doc-2","doc-3"]
+
+ Krill krill = new Krill(queryRefJson);
+ // TODO: Better keep the reference
+ assertEquals("vcFilter(named-vc1)", krill.getCollection().toString());
+
+ Result result = krill.apply(ki);
+ assertEquals("[[a]] c d", result.getMatch(0).getSnippetBrackets());
+ assertEquals(result.getMatch(0).getUID(), 2);
+ assertEquals(result.getMatches().size(), 1);
+
+ ki.addDoc(TestKrillCollectionIndex.createDoc3());
+ ki.commit();
+
+ // Cache is not removed after index change
+ assertTrue(VirtualCorpusCache.contains(named_vc1));
+
+ krill = new Krill(queryRefJson);
+ assertEquals("vcFilter(named-vc1)", krill.getCollection().toString());
+ result = krill.apply(ki);
+
+ assertEquals("[[a]] c d", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] d e", result.getMatch(1).getSnippetBrackets());
+ assertEquals(result.getMatches().size(), 2);
+
+ // Cache is not removed on deletion
+ ki.delDoc(2);
+ ki.commit();
+
+ // Check cache
+ assertTrue(VirtualCorpusCache.contains(named_vc1));
+
+ // Rerun query
+ krill = new Krill(queryRefJson);
+ assertEquals("vcFilter(named-vc1)", krill.getCollection().toString());
+ result = krill.apply(ki);
+ assertEquals("[[a]] d e", result.getMatch(0).getSnippetBrackets());
+ assertEquals(result.getMatches().size(), 1);
+
+ VirtualCorpusCache.reset();
+ assertFalse(VirtualCorpusCache.contains(named_vc1));
+ };
+
+
+ @Test
+ public void testNestedNamedVCs () throws IOException {
+ KrillProperties.loadDefaultProperties();
+
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(TestKrillCollectionIndex.createDoc1());
+ ki.addDoc(TestKrillCollectionIndex.createDoc2());
+ ki.addDoc(TestKrillCollectionIndex.createDoc3());
+ ki.commit();
+
+ // Check cache
+ assertFalse(VirtualCorpusCache.contains(named_vc1));
+ assertFalse(VirtualCorpusCache.contains(named_vc2));
+
+ QueryBuilder kq = new QueryBuilder("tokens");
+ KrillCollection kc = new KrillCollection(ki);
+ CollectionBuilder cb = kc.build();
+ Krill krill = new Krill(kq.seg("i:a"));
+
+ kc.fromBuilder(cb.orGroup().with(cb.referTo("named-vc1"))
+ .with(cb.referTo("named-vc2")));
+ krill.setCollection(kc);
+ // named-vc1: UID:[2,3]
+ // named-vc2: author:Frank (doc-1)
+
+ assertEquals("OrGroup(vcFilter(named-vc1) vcFilter(named-vc2))",
+ krill.getCollection().toString());
+
+ assertEquals("tokens:i:a", krill.getSpanQuery().toString());
+
+ Result result = krill.apply(ki);
+ assertEquals("[[a]] b c", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] c d", result.getMatch(1).getSnippetBrackets());
+ assertEquals("[[a]] d e", result.getMatch(2).getSnippetBrackets());
+ assertEquals(3, result.getMatches().size());
+
+ assertTrue(VirtualCorpusCache.contains(named_vc2));
+
+ kc.fromBuilder(cb.orGroup().with(cb.referTo("named-vc1"))
+ .with(cb.referTo("named-vc2")));
+
+ assertEquals("OrGroup(vcFilter(named-vc1) vcFilter(named-vc2))",
+ krill.getCollection().toString());
+
+ result = krill.apply(ki);
+ assertEquals("[[a]] b c", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] c d", result.getMatch(1).getSnippetBrackets());
+ assertEquals("[[a]] d e", result.getMatch(2).getSnippetBrackets());
+ assertEquals(3, result.getMatches().size());
+
+ // EM: Redundant?
+ kc.fromBuilder(cb.orGroup().with(cb.referTo("named-vc1"))
+ .with(cb.referTo("named-vc2")));
+
+ assertEquals("OrGroup(vcFilter(named-vc1) vcFilter(named-vc2))",
+ krill.getCollection().toString());
+
+ result = krill.apply(ki);
+ assertEquals("[[a]] b c", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] c d", result.getMatch(1).getSnippetBrackets());
+ assertEquals("[[a]] d e", result.getMatch(2).getSnippetBrackets());
+ assertEquals(3, result.getMatches().size());
+
+ kc.fromBuilder(cb.referTo("named-vc1"));
+
+ assertEquals("vcFilter(named-vc1)", krill.getCollection().toString());
+
+ result = krill.apply(ki);
+ assertEquals("[[a]] c d", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] d e", result.getMatch(1).getSnippetBrackets());
+ assertEquals(2, result.getMatches().size());
+
+
+ kc.fromBuilder(cb.referTo("named-vc2"));
+
+ assertEquals("vcFilter(named-vc2)", krill.getCollection().toString());
+
+ result = krill.apply(ki);
+ assertEquals("[[a]] b c", result.getMatch(0).getSnippetBrackets());
+ assertEquals(1, result.getMatches().size());
+
+ VirtualCorpusCache.reset();
+ };
+
+
+ @Test
+ public void testNamedVCsAfterQueryWithMissingDocs () throws IOException {
+ KrillProperties.loadDefaultProperties();
+
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(TestKrillCollectionIndex.createDoc1());
+ ki.commit();
+ ki.addDoc(TestKrillCollectionIndex.createDoc2());
+ ki.commit();
+ ki.addDoc(TestKrillCollectionIndex.createDoc3());
+ ki.commit();
+
+ // Check cache
+ assertFalse(VirtualCorpusCache.contains(named_vc1));
+ assertFalse(VirtualCorpusCache.contains(named_vc2));
+
+
+ QueryBuilder kq = new QueryBuilder("tokens");
+ KrillCollection kc = new KrillCollection(ki);
+ CollectionBuilder cb = kc.build();
+
+ // Check only for c and cache
+ Krill krill = new Krill(kq.seg("i:c"));
+
+ kc.fromBuilder(cb.orGroup().with(cb.referTo("named-vc1"))
+ .with(cb.referTo("named-vc2")));
+ krill.setCollection(kc);
+ // named-vc1: UID:[2,3]
+ // named-vc2: author:Frank (doc-1)
+
+ assertEquals("OrGroup(vcFilter(named-vc1) vcFilter(named-vc2))",
+ krill.getCollection().toString());
+
+ assertEquals("tokens:i:c", krill.getSpanQuery().toString());
+
+ Result result = krill.apply(ki);
+ assertEquals("a b [[c]]", result.getMatch(0).getSnippetBrackets());
+ assertEquals("a [[c]] d", result.getMatch(1).getSnippetBrackets());
+ assertEquals(2, result.getMatches().size());
+
+ assertTrue(VirtualCorpusCache.contains(named_vc2));
+
+ kc.fromBuilder(cb.orGroup().with(cb.referTo("named-vc1"))
+ .with(cb.referTo("named-vc2")));
+
+ assertEquals("OrGroup(vcFilter(named-vc1) vcFilter(named-vc2))",
+ krill.getCollection().toString());
+
+ // Check again for c with cache
+ result = krill.apply(ki);
+ assertEquals("a b [[c]]", result.getMatch(0).getSnippetBrackets());
+ assertEquals("a [[c]] d", result.getMatch(1).getSnippetBrackets());
+ assertEquals(2, result.getMatches().size());
+
+ // Check for a with cache
+ krill = new Krill(kq.seg("i:a"));
+ krill.setCollection(kc);
+
+ assertEquals("OrGroup(vcFilter(named-vc1) vcFilter(named-vc2))",
+ krill.getCollection().toString());
+
+ // Check again for c with cache
+ result = krill.apply(ki);
+ assertEquals("[[a]] b c", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] c d", result.getMatch(1).getSnippetBrackets());
+ assertEquals("[[a]] d e", result.getMatch(2).getSnippetBrackets());
+ assertEquals(3, result.getMatches().size());
+
+ VirtualCorpusCache.reset();
+ };
+
+
+ @Test
+ public void testNamedVCsAfterCorpusWithMissingDocs () throws IOException {
+ KrillProperties.loadDefaultProperties();
+
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(TestKrillCollectionIndex.createDoc1());
+ ki.commit();
+ ki.addDoc(TestKrillCollectionIndex.createDoc2());
+ ki.commit();
+ ki.addDoc(TestKrillCollectionIndex.createDoc3());
+ ki.commit();
+
+ // Check cache
+ assertFalse(VirtualCorpusCache.contains(named_vc1));
+ assertFalse(VirtualCorpusCache.contains(named_vc2));
+
+ QueryBuilder kq = new QueryBuilder("tokens");
+ KrillCollection kc = new KrillCollection(ki);
+ CollectionBuilder cb = kc.build();
+
+ // Check only for c and cache
+ Krill krill = new Krill(kq.seg("i:a"));
+
+ kc.fromBuilder(cb.andGroup().with(cb.term("textClass", "kultur"))
+ .with(cb.orGroup().with(cb.referTo("named-vc1"))
+ .with(cb.referTo("named-vc2"))));
+ krill.setCollection(kc);
+ // named-vc1: UID:[2,3]
+ // named-vc2: author:Frank (doc-1)
+ // textClass:kultur (doc-1,doc-2)
+
+ assertEquals(
+ "AndGroup(textClass:kultur OrGroup(vcFilter(named-vc1) vcFilter(named-vc2)))",
+ krill.getCollection().toString());
+
+ assertEquals("tokens:i:a", krill.getSpanQuery().toString());
+
+ Result result = krill.apply(ki);
+ assertEquals("[[a]] b c", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] c d", result.getMatch(1).getSnippetBrackets());
+ assertEquals(2, result.getMatches().size());
+
+ // Check stored VC in cache
+ assertTrue(VirtualCorpusCache.contains(named_vc1));
+ assertTrue(VirtualCorpusCache.contains(named_vc2));
+
+ kc.fromBuilder(cb.orGroup().with(cb.referTo("named-vc1"))
+ .with(cb.referTo("named-vc2")));
+
+ assertEquals("OrGroup(vcFilter(named-vc1) vcFilter(named-vc2))",
+ krill.getCollection().toString());
+
+ // Check again for c with cache
+ result = krill.apply(ki);
+ assertEquals("[[a]] b c", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] c d", result.getMatch(1).getSnippetBrackets());
+ assertEquals("[[a]] d e", result.getMatch(2).getSnippetBrackets());
+ assertEquals(3, result.getMatches().size());
+
VirtualCorpusCache.reset();
}
+
+
+ @Test
+ public void testCollectionWithVCRefAndPubDate ()
+ throws IOException, QueryException {
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(TestKrillCollectionIndex.createDoc2());
+ ki.addDoc(TestKrillCollectionIndex.createDoc3());
+ ki.addDoc(TestKrillCollectionIndex.createDoc5000());
+ ki.commit();
+
+ VirtualCorpusCache.store(named_vc3, ki);
+
+ assertTrue(VirtualCorpusCache.contains(named_vc3));
+
+ String file = "/queries/collections/collection-with-vc-ref-and-pubDate.jsonld";
+ InputStream is = getClass().getResourceAsStream(file);
+ String json = IOUtils.toString(is, "utf-8");
+
+ KrillCollection kc = new KrillCollection(json);
+ kc.setIndex(ki);
+ assertEquals(2, kc.numberOf("documents"));
+
+ VirtualCorpusCache.reset();
+ }
+
}
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java b/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java
index 9c500f1..c9d47ea 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java
@@ -98,7 +98,7 @@
CollectionBuilder kc = new CollectionBuilder();
assertEquals(
- "OrGroup(VirtualCorpusReferenceFilter(example) opennlp:check)",
+ "OrGroup(vcFilter(example) opennlp:check)",
kc.orGroup().with(
kc.referTo("example")
).with(
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
index 7e80ba2..da7e158 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -619,7 +619,7 @@
KrillCollection kc = new KrillCollection(json);
assertEquals("referTo(https://korap.ids-mannheim.de/@ndiewald/MyCorpus)", kc.getBuilder().toString());
- assertEquals("VirtualCorpusReferenceFilter(https://korap.ids-mannheim.de/@ndiewald/MyCorpus)",kc.toString());
+ assertEquals("vcFilter(https://korap.ids-mannheim.de/@ndiewald/MyCorpus)",kc.toString());
QueryBuilder kq = new QueryBuilder("field");
@@ -631,7 +631,7 @@
assertEquals(StatusCodes.MISSING_COLLECTION, result.getError(0).getCode());
assertTrue(result.getError(0).getMessage().startsWith("Collection is not found"));
};
-
+
@Test
public void filterExampleFromLegacy () throws Exception {
@@ -1210,7 +1210,7 @@
return fd;
};
- private FieldDocument createDoc5000 () {
+ public static FieldDocument createDoc5000 () {
FieldDocument fd = new FieldDocument();
fd.addString("UID", "5000");
fd.addString("ID", "doc-5000");
diff --git a/src/test/resources/queries/collections/vc-ref/query-with-vc-ref.jsonld b/src/test/resources/queries/collections/vc-ref/query-with-vc-ref.jsonld
index 40d2c2c..86547ac 100644
--- a/src/test/resources/queries/collections/vc-ref/query-with-vc-ref.jsonld
+++ b/src/test/resources/queries/collections/vc-ref/query-with-vc-ref.jsonld
@@ -3,7 +3,7 @@
"wrap":{
"@type":"koral:term",
"layer":"orth",
- "key":"der",
+ "key":"a",
"match":"match:eq"
}
},
diff --git a/src/test/resources/queries/collections/vc-ref/query-with-vc-ref2.jsonld b/src/test/resources/queries/collections/vc-ref/query-with-vc-ref2.jsonld
new file mode 100644
index 0000000..40d2c2c
--- /dev/null
+++ b/src/test/resources/queries/collections/vc-ref/query-with-vc-ref2.jsonld
@@ -0,0 +1,14 @@
+{"query":{
+ "@type":"koral:token",
+ "wrap":{
+ "@type":"koral:term",
+ "layer":"orth",
+ "key":"der",
+ "match":"match:eq"
+ }
+ },
+ "collection": {
+ "@type": "koral:docGroupRef",
+ "ref": "named-vc1"
+ }
+}