Put caching in toFilter() phase and made external test suite internal
Change-Id: Id118ae1f67050e8d0362ae3338be7e65746e2e64
diff --git a/Changes b/Changes
index dbea656..9ed843c 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.58.0 2018-08-21
+0.58.0 2018-08-23
- [feature] Implemented referencing cached collection (margaretha)
- [feature] Implemented deserialization of collection with array values
and cache option (margaretha)
@@ -15,6 +15,9 @@
- [feature] Adding loading namedVC from gz (margaretha)
- [bugfix] Fixed VC deserialization and double negations in
CollectionBuilder.group (margaretha)
+ - [cleanup] Move caching mechanism to toFilter() phase (diewald)
+ - [cleanup] Rewrite test suite for caching to be part of
+ regular test suite (diewald)
0.57 2018-04-05
- [feature] Support text queries in metadata
diff --git a/pom.xml b/pom.xml
index d73ed2c..a3e8b08 100644
--- a/pom.xml
+++ b/pom.xml
@@ -289,7 +289,6 @@
<exclude>**/TestWPDIndex.java</exclude>
<exclude>**/TestRealIndex.java</exclude>
<exclude>**/TestSampleIndex.java</exclude>
- <exclude>**/TestVCCaching.java</exclude>
</excludes>
</configuration>
</plugin>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index 33811a5..6cdcdca 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -67,6 +67,8 @@
private CollectionBuilder cb = new CollectionBuilder();
private CollectionBuilder.Interface cbi;
private byte[] pl = new byte[4];
+
+ private Filter prefiltered = null;
// private static ByteBuffer bb = ByteBuffer.allocate(4);
// Logger
@@ -119,7 +121,7 @@
}
else {
this.addError(StatusCodes.MISSING_COLLECTION,
- "Collection is not found.");
+ "Collection is not found");
this.fromBuilder(this.build().nothing());
}
}
@@ -160,6 +162,7 @@
*/
public KrillCollection fromKoral (String jsonString) throws QueryException {
ObjectMapper mapper = new ObjectMapper();
+ this.prefiltered = null;
try {
this.fromKoral((JsonNode) mapper.readTree(jsonString));
}
@@ -172,6 +175,60 @@
};
+ public KrillCollection fromCache (String ref) throws QueryException {
+ Properties prop = KrillProperties.loadDefaultProperties();
+ this.prefiltered = null;
+
+ if (prop == null) {
+ this.addError(StatusCodes.MISSING_KRILL_PROPERTIES,
+ "krill.properties is not found.");
+ return null;
+ }
+
+ String namedVCPath = prop.getProperty("krill.namedVC");
+
+ if (!namedVCPath.endsWith("/")) {
+ namedVCPath += "/";
+ };
+
+ String fileName = namedVCPath + ref + ".jsonld";
+ File file;
+ String json = null;
+ if ((file= new File(fileName)).exists()) {
+ try (FileInputStream fis = new FileInputStream(file)) {
+ json = IOUtils.toString(fis,"utf-8");
+ }
+ catch (IOException e) {
+ this.addError(StatusCodes.READING_COLLECTION_FAILED,
+ e.getMessage());
+ return this;
+ }
+ }
+ // slower than plain text, but save space
+ else if ((file = new File(fileName + ".gz")).exists()){
+ try (GZIPInputStream gzipInputStream =
+ new GZIPInputStream(new FileInputStream(file));
+ ByteArrayOutputStream bos =
+ new ByteArrayOutputStream(512);) {
+ bos.write(gzipInputStream);
+ json = bos.toString("utf-8");
+ }
+ catch (IOException e) {
+ this.addError(StatusCodes.READING_COLLECTION_FAILED,
+ e.getMessage());
+ return this;
+ }
+ }
+ else{
+ this.addError(StatusCodes.MISSING_COLLECTION,
+ "Collection is not found " + fileName);
+ return this;
+ };
+
+ return this.fromKoral(json);
+ };
+
+
/**
* Import the "collection" part of a KoralQuery.
*
@@ -182,13 +239,18 @@
*/
public KrillCollection fromKoral (JsonNode json) throws QueryException {
this.json = json;
+ this.prefiltered = null;
return this.fromBuilder(this._fromKoral(json));
};
// Create collection from KoralQuery
private CollectionBuilder.Interface _fromKoral (JsonNode json)
- throws QueryException {
+ throws QueryException {
+
+ if (json.has("collection")) {
+ return this._fromKoral(json.at("/collection"));
+ };
if (!json.has("@type")) {
throw new QueryException(701,
@@ -320,9 +382,7 @@
}
// nested group
- else if (type.equals("koral:docGroup"))
-
- {
+ else if (type.equals("koral:docGroup")) {
if (!json.has("operands") || !json.get("operands").isArray())
throw new QueryException(842,
@@ -347,8 +407,10 @@
};
return group;
}
+
// vc reference
else if (type.equals("koral:docGroupRef")) {
+
if (!json.has("ref")) {
throw new QueryException(StatusCodes.MISSING_VC_REFERENCE,
"ref is not found");
@@ -358,35 +420,9 @@
if (ref.isEmpty()) {
throw new QueryException(StatusCodes.MISSING_VC_REFERENCE,
"ref is empty");
- }
+ };
- Element element = KrillCollection.cache.get(ref);
- if (element == null) {
- String corpusQuery = loadVCFile(ref);
- if (corpusQuery == null){
- return this.build().nothing();
- }
- else{
- JsonNode node;
- try {
- node = mapper.readTree(corpusQuery);
- }
- catch (IOException e) {
- throw new QueryException(StatusCodes.INVALID_QUERY,
- "Failed parsing collection query to JsonNode.");
- }
- if (!node.has("collection")){
- this.addError(StatusCodes.MISSING_COLLECTION,
- "KoralQuery does not contain a collection.");
- return this.build().nothing();
- }
- return cb.toCacheVC(ref, this._fromKoral(node.at("/collection")));
- }
- }
- else {
- CachedVCData cc = (CachedVCData) element.getObjectValue();
- return cb.namedVC(cc);
- }
+ return this.cb.referTo(ref);
}
@@ -394,52 +430,6 @@
throw new QueryException(813, "Collection type is not supported");
};
-
- private String loadVCFile (String ref) {
- Properties prop = KrillProperties.loadDefaultProperties();
- if (prop == null) {
- this.addError(StatusCodes.MISSING_KRILL_PROPERTIES,
- "krill.properties is not found.");
- return null;
- }
-
- String namedVCPath = prop.getProperty("krill.namedVC");
- if (!namedVCPath.endsWith("/")) {
- namedVCPath += "/";
- }
- File file;
- String json = null;
- if ((file= new File(namedVCPath + ref + ".jsonld")).exists()) {
- try (FileInputStream fis = new FileInputStream(file)) {
- json = IOUtils.toString(fis,"utf-8");
- }
- catch (IOException e) {
- this.addError(StatusCodes.READING_COLLECTION_FAILED,
- e.getMessage());
- }
- }
- // slower than plain text, but save space
- else if ((file = new File(namedVCPath + ref + ".jsonld.gz")).exists()){
- try (GZIPInputStream gzipInputStream =
- new GZIPInputStream(new FileInputStream(file));
- ByteArrayOutputStream bos =
- new ByteArrayOutputStream(512);) {
- bos.write(gzipInputStream);
- json = bos.toString("utf-8");
- }
- catch (IOException e) {
- this.addError(StatusCodes.READING_COLLECTION_FAILED,
- e.getMessage());
- }
- }
- else{
- this.addError(StatusCodes.MISSING_COLLECTION,
- "Collection is not found.");
- }
-
- return json;
- }
-
/**
* Set the collection from a {@link CollectionBuilder} object.
*
@@ -447,6 +437,7 @@
* The CollectionBuilder object.
*/
public KrillCollection fromBuilder (CollectionBuilder.Interface cbi) {
+ this.prefiltered = null;
this.cbi = cbi;
return this;
};
@@ -485,6 +476,7 @@
* @return The {@link KrillCollection} object for chaining.
*/
public KrillCollection filterUIDs (String ... uids) {
+ this.prefiltered = null;
CollectionBuilder.Group cbg = this.cb.orGroup();
for (String uid : uids) {
cbg.with(this.cb.term("UID", uid));
@@ -496,10 +488,15 @@
/**
* Serialize collection to a {@link Filter} object.
*/
- public Filter toFilter () {
- if (this.cbi == null) return null;
-
- return this.cbi.toFilter();
+ public Filter toFilter () throws QueryException {
+ if (this.cbi == null)
+ return null;
+
+ if (this.prefiltered != null)
+ return this.prefiltered;
+
+ this.prefiltered = this.cbi.toFilter();
+ return this.prefiltered;
};
@@ -515,7 +512,7 @@
/**
- * Generate a string representatio of the virtual collection.
+ * Generate a string representation of the virtual collection.
*
* <strong>Warning</strong>: This currently does not generate a
* valid
@@ -524,10 +521,15 @@
* @return A string representation of the virtual collection.
*/
public String toString () {
- Filter filter = this.toFilter();
- if (filter == null) return "";
-
- return (this.isNegative() ? "-" : "") + filter.toString();
+ try {
+ Filter filter = this.toFilter();
+ if (filter == null) return "";
+ return (this.isNegative() ? "-" : "") + filter.toString();
+ }
+ catch (QueryException qe) {
+ log.warn(qe.getLocalizedMessage());
+ };
+ return "";
};
@@ -557,7 +559,7 @@
* virtual collection.
* @throws IOException
*/
- public FixedBitSet bits (LeafReaderContext atomic) throws IOException {
+ public FixedBitSet bits (LeafReaderContext atomic) throws IOException, QueryException {
LeafReader r = atomic.reader();
FixedBitSet bitset = new FixedBitSet(r.maxDoc());
@@ -590,46 +592,45 @@
* @throws IOException
*/
public DocIdSet getDocIdSet (LeafReaderContext atomic, Bits acceptDocs)
- throws IOException {
+ throws IOException, QueryException {
int maxDoc = atomic.reader().maxDoc();
FixedBitSet bitset = new FixedBitSet(maxDoc);
Filter filter;
- if (this.cbi == null || (filter = this.cbi.toFilter()) == null) {
- if (acceptDocs == null) return null;
+ if (this.cbi == null || (filter = this.toFilter()) == null) {
+ if (acceptDocs == null) return null;
+ bitset.set(0, maxDoc);
+ }
+ else {
- bitset.set(0, maxDoc);
- }
- else {
+ // Init vector
+ DocIdSet docids = filter.getDocIdSet(atomic, null);
+ DocIdSetIterator filterIter =
+ (docids == null) ? null : docids.iterator();
+
+ if (filterIter == null) {
+ if (!this.cbi.isNegative()) return null;
- // Init vector
- DocIdSet docids = filter.getDocIdSet(atomic, null);
- DocIdSetIterator filterIter =
- (docids == null) ? null : docids.iterator();
+ bitset.set(0, maxDoc);
+ }
+ else {
+ // Or bit set
+ bitset.or(filterIter);
+
+ // Revert for negation
+ if (this.cbi.isNegative()) bitset.flip(0, maxDoc);
+ };
+ };
- if (filterIter == null) {
- if (!this.cbi.isNegative()) return null;
-
- bitset.set(0, maxDoc);
- }
- else {
- // Or bit set
- bitset.or(filterIter);
-
- // Revert for negation
- if (this.cbi.isNegative()) bitset.flip(0, maxDoc);
- };
- };
-
- if (DEBUG) {
- log.debug("Bit set is {}", _bits(bitset));
- log.debug("Livedocs is {}", _bits(acceptDocs));
+ if (DEBUG) {
+ log.debug("Bit set is {}", _bits(bitset));
+ log.debug("Livedocs is {}", _bits(acceptDocs));
};
// Remove deleted docs
return (DocIdSet) BitsFilteredDocIdSet
- .wrap((DocIdSet) new BitDocIdSet(bitset), acceptDocs);
+ .wrap((DocIdSet) new BitDocIdSet(bitset), acceptDocs);
};
@@ -694,7 +695,12 @@
// Something went wrong
catch (IOException e) {
- log.warn(e.getMessage());
+ log.warn(e.getLocalizedMessage());
+ }
+
+ // E.g. reference corpus not found
+ catch (QueryException e) {
+ log.warn(e.getLocalizedMessage());
};
return occurrences;
@@ -792,6 +798,9 @@
}
catch (IOException e) {
log.warn(e.getLocalizedMessage());
+ }
+ catch (QueryException e) {
+ log.warn(e.getLocalizedMessage());
};
return docCount;
};
@@ -806,7 +815,7 @@
};
- public void storeInCache (String cacheKey) throws IOException {
+ public void storeInCache (String cacheKey) throws IOException, QueryException {
if (cacheKey ==null || cacheKey.isEmpty()) {
this.addError(StatusCodes.MISSING_ID,
"Collection name is required for caching.");
@@ -826,7 +835,7 @@
CachedVCData cc = new CachedVCData(docIdMap);
cache.put(new Element(cacheKey, cc));
- this.cbi = cb.namedVC(cc);
+ this.cbi = cb.namedVC(cacheKey, cc);
}
/*
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index dfcea3a..e47589e 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1577,7 +1577,12 @@
catch (IOException e) {
kr.addError(600, "Unable to read index", e.getLocalizedMessage());
log.warn(e.getLocalizedMessage());
- };
+ }
+
+ catch (QueryException e) {
+ kr.addError(e.getErrorCode(),e.getLocalizedMessage());
+ log.warn(e.getLocalizedMessage());
+ };
// Stop timer thread
tthread.stopTimer();
@@ -1763,7 +1768,11 @@
catch (IOException e) {
mc.addError(600, "Unable to read index", e.getLocalizedMessage());
log.warn(e.getLocalizedMessage());
- };
+ }
+ catch (QueryException e) {
+ mc.addError(e.getErrorCode(),e.getLocalizedMessage());
+ log.warn(e.getLocalizedMessage());
+ };
mc.close();
return mc;
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CachedVCData.java b/src/main/java/de/ids_mannheim/korap/collection/CachedVCData.java
index 8b73a45..ca97d52 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CachedVCData.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CachedVCData.java
@@ -70,4 +70,8 @@
}
return map;
}
+
+ public String toString () {
+ return this.docIdMap.toString();
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CachedVCFilter.java b/src/main/java/de/ids_mannheim/korap/collection/CachedVCFilter.java
index 24ce0b0..1c35233 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CachedVCFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CachedVCFilter.java
@@ -16,8 +16,10 @@
public class CachedVCFilter extends Filter {
private CachedVCData cachedCollection;
+ private String cacheKey;
- public CachedVCFilter (CachedVCData cachedCollection) {
+ public CachedVCFilter (String cacheKey, CachedVCData cachedCollection) {
+ this.cacheKey = cacheKey;
this.cachedCollection = cachedCollection;
}
@@ -26,6 +28,7 @@
throws IOException {
DocBits docBits =
cachedCollection.getDocIdMap().get(context.hashCode());
+
if (docBits == null) {
// does not exist in the cache
return null;
@@ -33,4 +36,8 @@
return docBits.createBitDocIdSet();
}
+ @Override
+ public String toString () {
+ return "referTo(cached:" + this.cacheKey + ")";
+ };
}
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
index 3e231bc..3abc0e4 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
@@ -5,6 +5,9 @@
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
+import java.util.Properties;
+import java.io.File;
+import java.io.FileInputStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.queries.TermsFilter;
@@ -17,9 +20,22 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.commons.io.IOUtils;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
import de.ids_mannheim.korap.KrillCollection;
import de.ids_mannheim.korap.index.TextPrependedTokenStream;
import de.ids_mannheim.korap.util.KrillDate;
+import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.util.StatusCodes;
+import de.ids_mannheim.korap.util.KrillProperties;
+
+import net.sf.ehcache.Cache;
+import net.sf.ehcache.CacheManager;
+import net.sf.ehcache.Element;
+
/*
* TODO: Optimize!
@@ -33,6 +49,10 @@
public class CollectionBuilder {
+ public final static CacheManager cacheManager = CacheManager.newInstance();
+ public final static Cache cache = cacheManager.getCache("named_vc");
+
+
// Logger
private final static Logger log = LoggerFactory
.getLogger(KrillCollection.class);
@@ -124,6 +144,10 @@
dateDF.ceil());
};
+ public CollectionBuilder.Interface referTo (String reference) {
+ return new CollectionBuilder.Reference(reference);
+ };
+
public CollectionBuilder.Group andGroup () {
return new CollectionBuilder.Group(false);
@@ -138,7 +162,7 @@
public String toString ();
- public Filter toFilter ();
+ public Filter toFilter () throws QueryException;
public boolean isNegative ();
@@ -214,7 +238,7 @@
// TODO:
// Currently this treatment is language specific and
- // does too mzch, I guess.
+ // does too much, I guess.
public Filter toFilter () {
PhraseQuery pq = new PhraseQuery();
int pos = 0;
@@ -257,6 +281,95 @@
};
};
+
+ public class Reference implements CollectionBuilder.Interface {
+ private boolean isNegative = false;
+ private String reference;
+ private Map<Integer, DocBits> docIdMap =
+ new HashMap<Integer, DocBits>();
+
+ public Reference (String reference) {
+ this.reference = reference;
+ };
+
+ public Filter toFilter () throws QueryException {
+ ObjectMapper mapper = new ObjectMapper();
+
+ Element element = KrillCollection.cache.get(this.reference);
+ if (element == null) {
+
+ KrillCollection kc = new KrillCollection();
+
+ kc.fromCache(this.reference);
+
+ if (kc.hasErrors()) {
+ throw new QueryException(
+ kc.getError(0).getCode(),
+ kc.getError(0).getMessage()
+ );
+ };
+
+ return new ToCacheVCFilter(
+ this.reference,
+ docIdMap,
+ kc.getBuilder(),
+ kc.toFilter()
+ );
+ }
+ else {
+ CachedVCData cc = (CachedVCData) element.getObjectValue();
+ return new CachedVCFilter(this.reference, cc);
+ }
+ };
+
+
+ public String toString () {
+ return "referTo(" + this.reference + ")";
+ };
+
+
+ public boolean isNegative () {
+ return this.isNegative;
+ };
+
+
+ public CollectionBuilder.Interface not () {
+ this.isNegative = true;
+ return this;
+ };
+
+ private String loadVCFile (String ref) {
+ Properties prop = KrillProperties.loadDefaultProperties();
+ if (prop == null){
+ /*
+ this.addError(StatusCodes.MISSING_KRILL_PROPERTIES,
+ "krill.properties is not found.");
+ */
+ return null;
+ }
+
+ String namedVCPath = prop.getProperty("krill.namedVC");
+ if (!namedVCPath.endsWith("/")){
+ namedVCPath += "/";
+ }
+ File file = new File(namedVCPath+ref+".jsonld");
+
+ String json = null;
+ try {
+ FileInputStream fis = new FileInputStream(file);
+ json = IOUtils.toString(fis);
+ }
+ catch (IOException e) {
+ /*
+ this.addError(StatusCodes.MISSING_COLLECTION,
+ "Collection is not found.");
+ */
+ return null;
+ }
+ return json;
+ }
+ };
+
public class Group implements CollectionBuilder.Interface {
private boolean isOptional = false;
@@ -300,7 +413,7 @@
};
- public Filter toFilter () {
+ public Filter toFilter () throws QueryException {
if (this.operands == null || this.operands.isEmpty())
return null;
@@ -326,10 +439,16 @@
public String toString () {
- Filter filter = this.toFilter();
- if (filter == null)
- return "";
- return filter.toString();
+ try {
+ Filter filter = this.toFilter();
+ if (filter == null)
+ return "";
+ return filter.toString();
+ }
+ catch (QueryException qe) {
+ log.warn(qe.getLocalizedMessage());
+ };
+ return "";
};
@@ -384,16 +503,18 @@
*/
public class CachedVC implements CollectionBuilder.Interface {
+ private String cacheKey;
private CachedVCData cachedCollection;
private boolean isNegative = false;
- public CachedVC (CachedVCData cc) {
- this.cachedCollection = cc;
+ public CachedVC (String vcRef, CachedVCData cc) {
+ this.cacheKey = vcRef;
+ this.cachedCollection = cc;
}
@Override
public Filter toFilter () {
- return new CachedVCFilter(cachedCollection);
+ return new CachedVCFilter(this.cacheKey, cachedCollection);
}
@Override
@@ -428,7 +549,7 @@
}
@Override
- public Filter toFilter () {
+ public Filter toFilter () throws QueryException {
return new ToCacheVCFilter(cacheKey,docIdMap, child, child.toFilter());
}
@@ -443,9 +564,10 @@
return this;
}
}
-
- public Interface namedVC (CachedVCData cc) {
- return new CollectionBuilder.CachedVC(cc);
+
+ // Maybe irrelevant
+ public Interface namedVC (String vcRef, CachedVCData cc) {
+ return new CollectionBuilder.CachedVC(vcRef, cc);
}
public Interface toCacheVC (String vcRef, Interface cbi) {
diff --git a/src/main/java/de/ids_mannheim/korap/collection/ToCacheVCFilter.java b/src/main/java/de/ids_mannheim/korap/collection/ToCacheVCFilter.java
index 981d0ed..98d911e 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/ToCacheVCFilter.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/ToCacheVCFilter.java
@@ -21,8 +21,6 @@
*
*/
public class ToCacheVCFilter extends Filter {
-
-
private Filter filter;
private CollectionBuilder.Interface cbi;
private String cacheKey;
@@ -66,7 +64,12 @@
KrillCollection.cache.remove(cacheKey);
KrillCollection.cache.put(new Element(cacheKey, cachedVCData));
+
return docIdSet;
}
+ @Override
+ public String toString () {
+ return "referTo(" + this.cacheKey + ")";
+ };
}
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java b/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java
index 213fb86..f923423 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestCollectionBuilder.java
@@ -86,6 +86,14 @@
@Test
+ public void builderReference () throws IOException {
+ CollectionBuilder kc = new CollectionBuilder();
+ assertEquals("referTo(ndiewald/myCorpus)",
+ kc.referTo("ndiewald/myCorpus").toString());
+ };
+
+
+ @Test
public void builderTill () throws IOException {
CollectionBuilder kc = new CollectionBuilder();
assertEquals("pubDate:[0 TO 20059999]",
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
index d874905..cb633ef 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -2,6 +2,8 @@
import java.io.IOException;
+import java.util.Properties;
+
import de.ids_mannheim.korap.KrillIndex;
import de.ids_mannheim.korap.KrillCollection;
import de.ids_mannheim.korap.collection.CollectionBuilder;
@@ -9,6 +11,8 @@
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.response.SearchContext;
import de.ids_mannheim.korap.util.StatusCodes;
+import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.util.KrillProperties;
import de.ids_mannheim.korap.Krill;
import de.ids_mannheim.korap.query.QueryBuilder;
@@ -18,16 +22,21 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
+import static de.ids_mannheim.korap.TestSimple.*;
import static org.junit.Assert.*;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
+import net.sf.ehcache.Element;
+
+
@RunWith(JUnit4.class)
public class TestKrillCollectionIndex {
private KrillIndex ki;
+ final String path = "/queries/collections/";
@Test
public void testKrillCollectionWithWrongJson () throws IOException {
@@ -396,7 +405,6 @@
kcn.fromBuilder(cb.term("text", sv));
assertEquals(1, kcn.docCount());
-
};
@Test
@@ -419,6 +427,133 @@
assertEquals(1, kcn.docCount());
};
+ @Test
+ public void testUnknownVC () throws IOException {
+ ki = new KrillIndex();
+ ki.addDoc(createDoc1());
+ ki.commit();
+
+ // This test was adopted from TestVCCaching,
+ // But does not fail anymore for deserialization
+ String json = _getJSONString("unknown-vc-ref.jsonld");
+
+ KrillCollection kc = new KrillCollection(json);
+ assertEquals("referTo(https://korap.ids-mannheim.de/@ndiewald/MyCorpus)", kc.getBuilder().toString());
+
+ // Fails on filtering
+ assertEquals("", kc.toString());
+
+ QueryBuilder kq = new QueryBuilder("field");
+
+ Krill krill = new Krill(kq.seg("a").with("b"));
+ krill.setCollection(kc);
+
+ Result result = krill.apply(ki);
+
+ assertEquals(StatusCodes.MISSING_COLLECTION, result.getError(0).getCode());
+ };
+
+ @Test
+ public void testCache () throws IOException {
+
+ Properties prop = KrillProperties.loadDefaultProperties();
+
+ String vcPath = getClass().getResource(path + "named-vcs").getFile();
+ String tempVC = prop.getProperty("krill.namedVC");
+ prop.setProperty("krill.namedVC", vcPath);
+
+ ki = new KrillIndex();
+ ki.addDoc(createDoc1());
+ ki.addDoc(createDoc2());
+ ki.commit();
+
+ testManualAddToCache(ki, "named-vcs/named-vc1.jsonld", "named-vc1");
+ testManualAddToCache(ki, "named-vcs/named-vc2.jsonld", "named-vc2");
+
+ Element element = KrillCollection.cache.get("named-vc1");
+ CachedVCData cc = (CachedVCData) element.getObjectValue();
+ assertTrue(cc.getDocIdMap().size() > 0);
+
+ element = KrillCollection.cache.get("named-vc2");
+ cc = (CachedVCData) element.getObjectValue();
+ assertTrue(cc.getDocIdMap().size() > 0);
+
+ // Check for cache location
+ assertFalse(KrillCollection.cache.isElementInMemory("named-vc1"));
+ assertTrue(KrillCollection.cache.isElementOnDisk("named-vc1"));
+ assertTrue(KrillCollection.cache.isElementInMemory("named-vc2"));
+ assertTrue(KrillCollection.cache.isElementOnDisk("named-vc2"));
+
+ // testSearchCachedVC();
+ String json = _getJSONString("query-with-vc-ref.jsonld");
+ // references named-vc1: ID eq ["doc-2","doc-3"]
+
+ Krill krill = new Krill(json);
+ // TODO: Better keep the reference
+ testManualAddToCache(ki, "named-vcs/named-vc1.jsonld", "named-vc1");
+ assertEquals("referTo(cached:named-vc1)", krill.getCollection().toString());
+
+ Result result = krill.apply(ki);
+ assertEquals("[[a]] c d", result.getMatch(0).getSnippetBrackets());
+ assertEquals(result.getMatch(0).getUID(), 2);
+ assertEquals(result.getMatches().size(), 1);
+
+ // testAddDocToIndex();
+ ki.addDoc(createDoc3());
+ ki.commit();
+
+ // Cache is removed after index change
+ element = KrillCollection.cache.get("named-vc1");
+ assertNull(element);
+
+ // Restart search - this time it's not precached
+ krill = new Krill(json);
+ assertEquals("referTo(named-vc1)", krill.getCollection().toString());
+ result = krill.apply(ki);
+
+ assertEquals("[[a]] c d", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] d e", result.getMatch(1).getSnippetBrackets());
+ assertEquals(result.getMatches().size(), 2);
+
+ // testAutoCachingMatch
+ // Check autocache
+ element = KrillCollection.cache.get("named-vc1");
+ cc = (CachedVCData) element.getObjectValue();
+ assertTrue(cc.getDocIdMap().size() > 0);
+
+ // Because of autocaching, this should work now
+ krill = new Krill(json);
+ assertEquals("referTo(cached:named-vc1)", krill.getCollection().toString());
+ result = krill.apply(ki);
+ assertEquals("[[a]] c d", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] d e", result.getMatch(1).getSnippetBrackets());
+ assertEquals(result.getMatches().size(), 2);
+
+ // Cache is removed on deletion
+ ki.addDoc(createDoc1());
+ ki.commit();
+
+ // Check cache
+ element = KrillCollection.cache.get("named-vc1");
+ assertNull(element);
+
+ // Rerun query
+ krill = new Krill(json);
+ assertEquals("referTo(named-vc1)", krill.getCollection().toString());
+ result = krill.apply(ki);
+ assertEquals("[[a]] c d", result.getMatch(0).getSnippetBrackets());
+ assertEquals("[[a]] d e", result.getMatch(1).getSnippetBrackets());
+ assertEquals(result.getMatches().size(), 2);
+
+ prop.setProperty("krill.namedVC", tempVC);
+
+ // testClearCache
+ KrillCollection.cache.removeAll();
+
+ element = KrillCollection.cache.get("named-vc1");
+ assertNull(element);
+ };
+
@Test
public void filterExampleFromLegacy () throws Exception {
@@ -860,33 +995,59 @@
private FieldDocument createDoc1 () {
FieldDocument fd = new FieldDocument();
+ fd.addString("UID", "1");
fd.addString("ID", "doc-1");
fd.addString("author", "Frank");
fd.addKeyword("textClass", "Nachricht Kultur Reisen");
fd.addInt("pubDate", 20051210);
fd.addText("text", "Der alte Mann ging über die Straße");
+ fd.addTV("tokens", "a b c", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
+ + "[(2-3)s:b|i:b|_1$<i>2<i>3]" + "[(4-5)s:c|i:c|_2$<i>4<i>5]");
return fd;
};
private FieldDocument createDoc2 () {
FieldDocument fd = new FieldDocument();
- fd.addString("ID", "doc-2");
+ fd.addString("UID", "2");
+ fd.addString("ID", "doc-2");
fd.addString("author", "Peter");
fd.addKeyword("textClass", "Kultur Reisen");
fd.addInt("pubDate", 20051207);
fd.addText("text", "Der junge Mann hatte keine andere Wahl");
+ fd.addTV("tokens", "a c d", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
+ + "[(2-3)s:c|i:c|_1$<i>2<i>3]" + "[(4-5)s:d|i:d|_2$<i>4<i>5]");
return fd;
};
private FieldDocument createDoc3 () {
FieldDocument fd = new FieldDocument();
- fd.addString("ID", "doc-3");
+ fd.addString("UID", "3");
+ fd.addString("ID", "doc-3");
fd.addString("author", "Sebastian");
fd.addKeyword("textClass", "Reisen Finanzen");
fd.addInt("pubDate", 20051216);
fd.addText("text", "Die Frau und der Mann küssten sich");
+ fd.addTV("tokens", "a d e", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
+ + "[(2-3)s:d|i:d|_1$<i>2<i>3]" + "[(4-5)s:e|i:e|_2$<i>4<i>5]");
return fd;
};
+
+ private void testManualAddToCache (KrillIndex index, String filename, String vcName) throws IOException {
+ String json = _getJSONString(filename);
+
+ KrillCollection kc = new KrillCollection(json);
+ kc.setIndex(index);
+ try {
+ kc.storeInCache(vcName);
+ }
+ catch (QueryException qe) {
+ System.err.println(qe.getLocalizedMessage());
+ };
+ };
+
+ private String _getJSONString (String file) {
+ return getJsonString(getClass().getResource(path + file).getFile());
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java b/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java
deleted file mode 100644
index a333f70..0000000
--- a/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java
+++ /dev/null
@@ -1,140 +0,0 @@
-package de.ids_mannheim.korap.collection;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.Paths;
-import java.util.List;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.lucene.store.MMapDirectory;
-import org.junit.Test;
-
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import de.ids_mannheim.korap.Krill;
-import de.ids_mannheim.korap.KrillCollection;
-import de.ids_mannheim.korap.KrillIndex;
-import de.ids_mannheim.korap.index.FieldDocument;
-import de.ids_mannheim.korap.response.Message;
-import de.ids_mannheim.korap.util.StatusCodes;
-import net.sf.ehcache.Element;
-
-public class TestVCCaching {
-
- public static final ObjectMapper mapper = new ObjectMapper();
-
- private KrillIndex getSampleIndex () throws IOException {
- return new KrillIndex(new MMapDirectory(
- Paths.get(getClass().getResource("/sample-index").getFile())));
- }
-
- private KrillIndex index;
-
- public TestVCCaching () throws IOException {
- index = getSampleIndex();
- }
-
- @Test
- public void testUnknownVC () throws IOException {
-
- InputStream is = getClass().getClassLoader()
- .getResourceAsStream("collection/unknown-vc-ref.jsonld");
- String json = IOUtils.toString(is,"utf-8");
-
- KrillCollection kc = new KrillCollection(json);
- List<Message> messages = kc.getErrors().getMessages();
- assertEquals(1, messages.size());
-
- assertEquals(StatusCodes.MISSING_COLLECTION, messages.get(0).getCode());
- }
-
- @Test
- public void testCache () throws IOException {
- testManualAddToCache("named-vc/named-vc1.jsonld", "named-vc1");
- testManualAddToCache("named-vc/named-vc2.jsonld", "named-vc2");
-
- Element element = KrillCollection.cache.get("named-vc1");
- CachedVCData cc = (CachedVCData) element.getObjectValue();
- assertTrue(cc.getDocIdMap().size() > 0);
-
- element = KrillCollection.cache.get("named-vc2");
- cc = (CachedVCData) element.getObjectValue();
- assertTrue(cc.getDocIdMap().size() > 0);
-
- assertFalse(KrillCollection.cache.isElementInMemory("named-vc1"));
- assertTrue(KrillCollection.cache.isElementOnDisk("named-vc1"));
- assertTrue(KrillCollection.cache.isElementInMemory("named-vc2"));
- assertTrue(KrillCollection.cache.isElementOnDisk("named-vc2"));
-
- testSearchCachedVC();
- testAddDocToIndex();
- testDelDocFromIndex();
- }
-
- private void testManualAddToCache (String filename, String vcName) throws IOException {
- InputStream is = getClass().getClassLoader()
- .getResourceAsStream(filename);
- String json = IOUtils.toString(is,"utf-8");
- is.close();
-
- KrillCollection kc = new KrillCollection(json);
- kc.setIndex(index);
- kc.storeInCache(vcName);
- }
-
- private void testSearchCachedVC () throws IOException {
- InputStream is = getClass().getClassLoader()
- .getResourceAsStream("collection/query-with-vc-ref.jsonld");
- String json = IOUtils.toString(is, "utf-8");
-
- String result = new Krill(json).apply(this.index).toJsonString();
- System.out.println(json);
- JsonNode node = mapper.readTree(result);
- assertTrue(node.at("/matches").size()>0);
- }
-
- private void testClearCache () {
- KrillCollection.cache.removeAll();
-
- Element element = KrillCollection.cache.get("named-vc1");
- assertNull(element);
- }
-
- public void testAddDocToIndex () throws IOException {
- testManualAddToCache("named-vc/named-vc1.jsonld", "named-vc1");
-
- FieldDocument fd = new FieldDocument();
- fd.addTV("base", "x y", "[(0-3)s:x]" + // 1
- "[(3-4)s:y]" // 2
- );
- index.addDoc(fd);
- index.commit();
-
- Element element = KrillCollection.cache.get("named-vc1");
- assertNull(element);
- }
-
- public void testDelDocFromIndex () throws IOException {
- testManualAddToCache("named-vc/named-vc1.jsonld", "named-vc1");
-
- index.delDocs("textSigle", "GOE/AGF/00000");
- index.commit();
-
- Element element = KrillCollection.cache.get("named-vc1");
- assertNull(element);
- }
-
- @Test
- public void testAutoCachingMatchNe () throws IOException {
- testSearchCachedVC();
- // search from cache
- testSearchCachedVC();
- testClearCache();
- }
-}
diff --git a/src/test/resources/named-vc/named-vc1.jsonld b/src/test/resources/named-vc/named-vc1.jsonld
deleted file mode 100644
index 1257134..0000000
--- a/src/test/resources/named-vc/named-vc1.jsonld
+++ /dev/null
@@ -1,10 +0,0 @@
-{"collection": {
- "@type": "koral:doc",
- "key": "textSigle",
- "match": "match:ne",
- "type" : "type:string",
- "value": [
- "GOE/AGF/00000",
- "GOE/AGA/01784"
- ]
-}}
diff --git a/src/test/resources/collection/availability-all.jsonld b/src/test/resources/queries/collections/availability-all.jsonld
similarity index 100%
rename from src/test/resources/collection/availability-all.jsonld
rename to src/test/resources/queries/collections/availability-all.jsonld
diff --git a/src/test/resources/queries/collections/named-vcs/named-vc1.jsonld b/src/test/resources/queries/collections/named-vcs/named-vc1.jsonld
new file mode 100644
index 0000000..ae2bc82
--- /dev/null
+++ b/src/test/resources/queries/collections/named-vcs/named-vc1.jsonld
@@ -0,0 +1,9 @@
+{"collection": {
+ "@type": "koral:doc",
+ "key": "UID",
+ "type" : "type:string",
+ "value": [
+ 2,
+ 3
+ ]
+}}
diff --git a/src/test/resources/named-vc/named-vc2.jsonld b/src/test/resources/queries/collections/named-vcs/named-vc2.jsonld
similarity index 100%
rename from src/test/resources/named-vc/named-vc2.jsonld
rename to src/test/resources/queries/collections/named-vcs/named-vc2.jsonld
diff --git a/src/test/resources/collection/query-with-vc-ref.jsonld b/src/test/resources/queries/collections/query-with-vc-ref.jsonld
similarity index 72%
rename from src/test/resources/collection/query-with-vc-ref.jsonld
rename to src/test/resources/queries/collections/query-with-vc-ref.jsonld
index ee0bb66..86547ac 100644
--- a/src/test/resources/collection/query-with-vc-ref.jsonld
+++ b/src/test/resources/queries/collections/query-with-vc-ref.jsonld
@@ -3,9 +3,8 @@
"wrap":{
"@type":"koral:term",
"layer":"orth",
- "key":"der",
- "match":"match:eq",
- "foundry":"opennlp"
+ "key":"a",
+ "match":"match:eq"
}
},
"collection": {
diff --git a/src/test/resources/collection/unknown-vc-ref.jsonld b/src/test/resources/queries/collections/unknown-vc-ref.jsonld
similarity index 100%
rename from src/test/resources/collection/unknown-vc-ref.jsonld
rename to src/test/resources/queries/collections/unknown-vc-ref.jsonld