Implemented auto-caching.
Change-Id: Id547073357ab626971e52c7d84f6e86deb05c2c0
diff --git a/Changes b/Changes
index d7a8a06..3cba834 100644
--- a/Changes
+++ b/Changes
@@ -1,8 +1,9 @@
-0.58.0 2018-07-26
+0.58.0 2018-07-30
- Implemented referencing cached collection (margaretha)
- Implemented deserialization of collection with array values and cache option (margaretha)
- Implemented caching collection (margaretha)
- Implemented KrillCollection cache clearing (margaretha)
+ - Implemented auto-caching (margaretha)
0.57 2018-04-05
- [feature] Support text queries in metadata
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index 19f06ae..e408dfc 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -1,11 +1,14 @@
package de.ids_mannheim.korap;
+import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.commons.io.IOUtils;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@@ -91,22 +94,6 @@
this.index = index;
};
-
- /**
- * Constructs a KrillCollection according to the given KrillIndex
- * and KoralQuery.
- *
- * KrillIndex is necessary for caching virtual corpora.
- *
- * @param index
- * @param jsonString
- */
- public KrillCollection (KrillIndex index, String jsonString) {
- this.index = index;
- createCollection(jsonString);
- };
-
-
/**
* Construct a new KrillCollection by passing a KoralQuery.
*
@@ -397,9 +384,26 @@
Element element = cache.get(ref);
if (element == null) {
- this.addError(StatusCodes.MISSING_COLLECTION,
- "Collection is not found.");
- return this.build().nothing();
+ String corpusQuery = loadVCFile(ref);
+ if (corpusQuery == null){
+ return this.build().nothing();
+ }
+ else{
+ JsonNode node;
+ try {
+ node = mapper.readTree(corpusQuery);
+ }
+ catch (IOException e) {
+ throw new QueryException(StatusCodes.INVALID_QUERY,
+ "Failed parsing collection query to JsonNode.");
+ }
+ if (!node.has("collection")){
+ this.addError(StatusCodes.MISSING_COLLECTION,
+ "KoralQuery does not contain a collection.");
+ return this.build().nothing();
+ }
+ return cb.toCacheVC(ref, this._fromKoral(node.at("/collection")));
+ }
}
else {
CachedVCData cc = (CachedVCData) element.getObjectValue();
@@ -412,6 +416,20 @@
throw new QueryException(813, "Collection type is not supported");
};
+
+ private String loadVCFile (String ref) {
+ File file = new File(ref);
+ String json = null;
+ try {
+ FileInputStream fis = new FileInputStream(file);
+ json = IOUtils.toString(fis);
+ }
+ catch (IOException e) {
+ this.addError(StatusCodes.MISSING_COLLECTION,
+ "Collection is not found.");
+ }
+ return json;
+ }
/**
* Set the collection from a {@link CollectionBuilder} object.
@@ -798,7 +816,7 @@
this.cbi = cb.namedVC(cc);
return cc;
}
-
+
public String getName () {
return name;
}
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
index b3ee552..5705f14 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
@@ -2,10 +2,13 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.Iterator;
+import java.util.Map;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.queries.TermsFilter;
+import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.PhraseQuery;
@@ -399,8 +402,47 @@
}
}
+
+ /** Wraps a sub CollectionBuilder.Interface to allows VC caching
+ *
+ * @author margaretha
+ *
+ */
+ public class ToCacheVC implements CollectionBuilder.Interface {
+ private CollectionBuilder.Interface child;
+ private String cacheKey;
+
+ private Map<Integer, DocIdSet> docIdMap;
+
+ public ToCacheVC (String vcRef, Interface cbi) {
+ this.child = cbi;
+ this.cacheKey = vcRef;
+ this.docIdMap = new HashMap<Integer, DocIdSet>();
+ }
+
+ @Override
+ public Filter toFilter () {
+ return new ToCacheVCFilter(cacheKey,docIdMap, child, child.toFilter());
+ }
+
+ @Override
+ public boolean isNegative () {
+ return child.isNegative();
+ }
+
+ @Override
+ public CollectionBuilder.Interface not () {
+ // not supported
+ return this;
+ }
+ }
+
public Interface namedVC (CachedVCData cc) {
return new CollectionBuilder.CachedVC(cc);
}
+
+ public Interface toCacheVC (String vcRef, Interface cbi) {
+ return new CollectionBuilder.ToCacheVC(vcRef, cbi);
+ }
};
diff --git a/src/main/java/de/ids_mannheim/korap/collection/ToCacheVCFilter.java b/src/main/java/de/ids_mannheim/korap/collection/ToCacheVCFilter.java
new file mode 100644
index 0000000..2928de3
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/collection/ToCacheVCFilter.java
@@ -0,0 +1,63 @@
+package de.ids_mannheim.korap.collection;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+
+import de.ids_mannheim.korap.KrillCollection;
+import de.ids_mannheim.korap.collection.CollectionBuilder.Interface;
+import net.sf.ehcache.Element;
+
+public class ToCacheVCFilter extends Filter {
+
+
+ private Filter filter;
+ private CollectionBuilder.Interface cbi;
+ private String cacheKey;
+ private Map<Integer, DocIdSet> docIdMap;
+
+ public ToCacheVCFilter (String cacheKey, Map<Integer, DocIdSet> docIdMap,
+ Interface cbi, Filter filter) {
+ this.cacheKey = cacheKey;
+ this.docIdMap = docIdMap;
+ this.cbi = cbi;
+ this.filter = filter;
+ }
+
+ @Override
+ public DocIdSet getDocIdSet (LeafReaderContext context, Bits acceptDocs)
+ throws IOException {
+
+ DocIdSet docIdSet = filter.getDocIdSet(context, acceptDocs);
+
+ final LeafReader reader = context.reader();
+ int maxDoc = reader.maxDoc();
+ FixedBitSet bitset = new FixedBitSet(maxDoc);
+
+ if (docIdSet == null) {
+ if (this.cbi != null) {
+ bitset.clear(0, bitset.length());
+ }
+ else {
+ bitset.set(0, bitset.length());
+ };
+ }
+ else {
+ bitset.or(docIdSet.iterator());
+ }
+
+ docIdMap.put(context.hashCode(), new SerializableDocIdSet(bitset));
+ CachedVCData cachedVCData = new CachedVCData(docIdMap);
+
+ KrillCollection.cache.put(new Element(cacheKey, cachedVCData));
+
+ return docIdSet;
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/response/Notifications.java b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
index 877c655..722dc6e 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Notifications.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
@@ -46,7 +46,7 @@
public class Notifications {
// Create object mapper for JSON generation
- ObjectMapper mapper = new ObjectMapper();
+ protected ObjectMapper mapper = new ObjectMapper();
private Messages warnings, errors, messages;
diff --git a/src/main/resources/ehcache.xml b/src/main/resources/ehcache.xml
index 0cb69b7..00465bf 100644
--- a/src/main/resources/ehcache.xml
+++ b/src/main/resources/ehcache.xml
@@ -1,4 +1,4 @@
-<ehcache xsi:noNamespaceSchemaLocation="ehcache.xsd"
+<ehcache xsi:noNamespaceSchemaLocation="http://www.ehcache.org/ehcache.xsd"
updateCheck="true" monitoring="autodetect" dynamicConfig="true">
<diskStore path="./krill_cache" />
@@ -10,11 +10,12 @@
timeToLiveSeconds="1200">
</defaultCache>
- <cache name='named_vc'
- eternal='true'
+ <cache name="named_vc"
+ eternal="true"
memoryStoreEvictionPolicy="LRU"
- maxBytesLocalHeap="256M"
- maxBytesLocalDisk="1G"
- overflowToDisk='true' />
+ maxBytesLocalHeap="256M" >
+ <!-- maxBytesLocalDisk="1G">
+ <persistence strategy="localTempSwap"/> -->
+ </cache>
</ehcache>
\ No newline at end of file
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java b/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java
index 745a606..427451d 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java
@@ -37,6 +37,8 @@
testAddToCache();
testSearchCachedVC();
testClearCache();
+ testAddDocToIndex();
+ testDelDocFromIndex();
}
private void testAddToCache () throws IOException {
@@ -78,7 +80,6 @@
assertNull(element);
}
- @Test
public void testAddDocToIndex () throws IOException {
testAddToCache();
@@ -93,7 +94,6 @@
assertNull(element);
}
- @Test
public void testDelDocFromIndex () throws IOException {
testAddToCache();
@@ -103,4 +103,15 @@
Element element = KrillCollection.cache.get("cache-goe");
assertNull(element);
}
+
+ @Test
+ public void testAutoCaching () throws IOException {
+ InputStream is = getClass().getClassLoader()
+ .getResourceAsStream("collection/query-with-vc-ref.jsonld");
+ String json = IOUtils.toString(is);
+
+ String result = new Krill(json).apply(this.index).toJsonString();
+ assertNotNull(result);
+ assertTrue(!result.isEmpty());
+ }
}
diff --git a/src/test/resources/named-vc/named-vc-free.jsonld b/src/test/resources/named-vc/named-vc-free.jsonld
index 1f90e4c..65a6c52 100644
--- a/src/test/resources/named-vc/named-vc-free.jsonld
+++ b/src/test/resources/named-vc/named-vc-free.jsonld
@@ -9,4 +9,4 @@
"GOE/AGA/01784"
],
"cache" : "true"
-}}
\ No newline at end of file
+}}