Added restrictions to VC ID and handled non-existent VC
Change-Id: Ibd26ce749a9cbd4d0683cc381b94e44cf4670f5c
diff --git a/Changes b/Changes
index ad70b06..d4ececa 100644
--- a/Changes
+++ b/Changes
@@ -1,17 +1,21 @@
-0.60.3 2022-03-17
+0.60.3 2022-03-30
- [cleanup] Updated fingerprints to base64url (closed #83)
- [bugfix] Fixed ConcurrentModificationException in VC cache
handling (margaretha)
+ - [feature] Added restrictions to VC ID and handled non-existent VC
+ (margaretha)
!!! This will invalidate all VC caches. Please recache!
-0.60.2 2022-01-04
+0.60.2 2022-02-04
- [security] More log4j updates (diewald)
- [feature] Support for field value vector method (fixes #81; diewald)
- [cleanup] Moved and updated cache-tests from TestKrillCollectionIndex
to TestVirtualCorpusCache (resolved #44; margaretha)
- [feature] Added a Krill API returning textSigles for a given
corpus query (margaretha)
+ - [cleanup] Replaced array nodes of field values with a simple string
+ (margaretha)
0.60.1 2021-12-17
- [feature] Added vc loading from classpath (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/Krill.java b/src/main/java/de/ids_mannheim/korap/Krill.java
index 01e20ed..4b1b642 100644
--- a/src/main/java/de/ids_mannheim/korap/Krill.java
+++ b/src/main/java/de/ids_mannheim/korap/Krill.java
@@ -257,7 +257,7 @@
*/
public Krill setIndex (KrillIndex index) {
this.index = index;
- VirtualCorpusCache.setIndexInfo(index);
+ VirtualCorpusCache.setIndexInfo(index);
return this;
};
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index 8f8dbde..fbd2f4f 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -606,6 +606,9 @@
else if (t instanceof QueryException) {
throw new QueryException(((QueryException) t).getErrorCode(), t.getLocalizedMessage());
}
+ else {
+ throw e;
+ }
}
if (DEBUG) {
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 6421226..7f807fa 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -77,6 +77,7 @@
import de.ids_mannheim.korap.util.KrillDate;
import de.ids_mannheim.korap.util.KrillProperties;
import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.util.StatusCodes;
/**
* <p>KrillIndex implements a simple API for searching in and writing
@@ -1643,7 +1644,13 @@
catch (QueryException e) {
kr.addError(e.getErrorCode(),e.getLocalizedMessage());
log.warn(e.getLocalizedMessage());
- };
+ }
+ catch (Exception e) {
+ // 104 ILLEGAL_ARGUMENT, see Kustvakt core
+ // de.ids_mannheim.korap.exceptions.StatusCodes.ILLEGAL_ARGUMENT
+ kr.addError(104,e.getLocalizedMessage());
+ log.warn(e.getLocalizedMessage());
+ }
// Stop timer thread
tthread.stopTimer();
diff --git a/src/main/java/de/ids_mannheim/korap/cache/VirtualCorpusCache.java b/src/main/java/de/ids_mannheim/korap/cache/VirtualCorpusCache.java
index 26d74e2..eeb793b 100644
--- a/src/main/java/de/ids_mannheim/korap/cache/VirtualCorpusCache.java
+++ b/src/main/java/de/ids_mannheim/korap/cache/VirtualCorpusCache.java
@@ -13,6 +13,7 @@
import java.util.Map;
import java.util.Set;
import java.util.function.Supplier;
+import java.util.regex.Pattern;
import org.apache.lucene.index.LeafReaderContext;
@@ -32,6 +33,8 @@
*/
public class VirtualCorpusCache {
+ public static Pattern vcNamePattern = Pattern.compile("[a-zA-Z0-9]+[a-zA-Z_0-9-.]+");
+
public static final String CACHE_LOCATION = "vc-cache";
public static int CAPACITY = 5;
public static final Map<String, Map<String, DocBits>> map = Collections
@@ -59,8 +62,45 @@
}
+ /**
+ * Path traversal must not be allowed using the VC ID.
+ *
+ * VC id may only have one slash with the following format:
+ * [username]/[vc-name]
+ *
+ * VC name may only contains alphabets, numbers, dashes and
+ * full-stops. See {@link #vcNamePattern}
+ *
+ * @param vcId
+ * @return true if the given VC id is valid, false otherwise
+ */
+ private static boolean isVcIdValid (String vcId) {
+// if (vcId.contains("./")) {
+// return false;
+// }
+
+ String[] parts = vcId.split("/");
+ if (parts.length > 2) {
+ vcToCleanUp.remove(vcId);
+ return false;
+ }
+
+ String vcName = parts.length == 2 ? parts[1] : parts[0];
+ if (!vcNamePattern.matcher(vcName).matches()) {
+ vcToCleanUp.remove(vcId);
+ return false;
+ }
+
+ return true;
+ }
+
+
public static void storeOnDisk (String vcId, String leafFingerprint,
DocBits docBits) {
+ if (!isVcIdValid(vcId)) {
+ throw new IllegalArgumentException("Cannot cache VC due to invalid VC ID");
+ }
+
File dir = new File(CACHE_LOCATION + "/" + vcId);
if (!dir.exists()) {
dir.mkdirs();
@@ -84,15 +124,19 @@
}
- public static void store (String vcId, Map<String, DocBits> vcData) {
+ public static void store (String vcId, Map<String, DocBits> vcData){
map.put(vcId, vcData);
vcData.keySet().forEach(leafFingerprint -> {
storeOnDisk(vcId, leafFingerprint, vcData.get(leafFingerprint));
});
}
- public static void store (String vcId, KrillIndex index)
- throws QueryException, IOException {
+
+ public static void store (String vcId, KrillIndex index) {
+
+ if (!isVcIdValid(vcId)) {
+ throw new IllegalArgumentException("Cannot cache VC due to invalid VC ID");
+ }
DocBitsSupplier docBitsSupplier = new VirtualCorpusFilter(
vcId).new DocBitsSupplier();
@@ -100,7 +144,7 @@
for (LeafReaderContext context : index.reader().leaves()) {
leafFingerprint = Fingerprinter.create(
context.reader().getCombinedCoreAndDeletesKey().toString());
-
+
getDocBits(vcId, leafFingerprint, () -> {
try {
return docBitsSupplier.supplyDocBits(context,
@@ -143,6 +187,10 @@
public static boolean contains (String vcId) {
+ if (!isVcIdValid(vcId)) {
+ return false;
+ }
+
if (map.containsKey(vcId)) {
return true;
}
@@ -152,7 +200,19 @@
}
}
+
+ /**
+ * Deletes the VC from memory cache and disk cache. If VC doesn't
+ * exist, the method keeps silent about it and no error will be
+ * thrown because the deletion purpose has been achieved.
+ *
+ * @param vcId
+ */
public static void delete (String vcId) {
+ if (!isVcIdValid(vcId)) {
+ return;
+ }
+
vcToCleanUp.remove(vcId);
map.remove(vcId);
File vc = new File(CACHE_LOCATION + "/" + vcId);
@@ -166,6 +226,7 @@
}
}
+
public static void reset () {
vcToCleanUp.clear();
map.clear();
@@ -181,7 +242,7 @@
}
vcCache.delete();
}
-
+
/**
* Sets IndexInfo and checks if there is any VC to clean up. This
@@ -191,6 +252,8 @@
* map of a VC, it is marked for clean up. The cached VC will be
* cleaned up, next time the index is used in {@link Krill}.
* see {@link #getDocBits(String, String, Supplier)}
+ *
+ * @throws QueryException
*/
public static void setIndexInfo (IndexInfo indexInfo) {
VirtualCorpusCache.indexInfo = indexInfo;
@@ -240,6 +303,7 @@
* @param calculateDocBits
* a supplier calculating the DocBits
* @return DocBits
+ * @throws QueryException
*/
public static DocBits getDocBits (String vcId, String leafFingerprint,
Supplier<DocBits> calculateDocBits) {
diff --git a/src/main/java/de/ids_mannheim/korap/response/Response.java b/src/main/java/de/ids_mannheim/korap/response/Response.java
index 21712c7..55a97eb 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Response.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Response.java
@@ -3,7 +3,6 @@
import static de.ids_mannheim.korap.util.KrillString.quote;
import java.util.HashMap;
-import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@@ -11,7 +10,6 @@
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import de.ids_mannheim.korap.KrillCollection;
diff --git a/src/test/java/de/ids_mannheim/korap/cache/TestInvalidVcId.java b/src/test/java/de/ids_mannheim/korap/cache/TestInvalidVcId.java
new file mode 100644
index 0000000..6e6f7c2
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/cache/TestInvalidVcId.java
@@ -0,0 +1,103 @@
+package de.ids_mannheim.korap.cache;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.Test;
+
+import de.ids_mannheim.korap.Krill;
+import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.response.Message;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.StatusCodes;
+
+public class TestInvalidVcId {
+
+ private KrillIndex ki;
+
+ public TestInvalidVcId () throws IOException {
+ ki = TestVirtualCorpusCache.createIndex();
+ }
+
+ @Test
+ public void testStoreVcNotExist () {
+ String vcId = "snx";
+ RuntimeException ex = assertThrows(RuntimeException.class, () -> {
+ VirtualCorpusCache.store(vcId, ki);
+ });
+ assertEquals("de.ids_mannheim.korap.util.QueryException: "
+ + "Collection is not found queries/collections/named-vcs/snx.jsonld",
+ ex.getMessage());
+ }
+
+
+ @Test
+ public void testReferVcNotExist () throws IOException {
+ String file = "/queries/collections/vc-ref/query-with-unknown-vc.jsonld";
+ InputStream is = getClass().getResourceAsStream(file);
+ String queryRefJson = IOUtils.toString(is, "utf-8");
+
+ Krill krill = new Krill(queryRefJson);
+ Result result = krill.apply(ki);
+ Message m = result.getError(0);
+ assertEquals(StatusCodes.MISSING_COLLECTION, m.getCode());
+ assertEquals(
+ "Collection is not found queries/collections/named-vcs/unknown-vc.jsonld",
+ m.getMessage());
+ assertEquals(0, result.getTotalResults());
+ }
+
+
+ @Test
+ public void testDeleteVcNotExist () throws IOException {
+ VirtualCorpusCache.delete("unknown-vc-id");
+ }
+
+
+ @Test
+ public void testStoreVcInvalidChars () {
+ String vcId = "inval!d-vc-id";
+ IllegalArgumentException ex = assertThrows(
+ IllegalArgumentException.class, () -> {
+ VirtualCorpusCache.store(vcId, ki);
+ });
+ assertEquals("Cannot cache VC due to invalid VC ID", ex.getMessage());
+ }
+
+ @Test
+ public void testStoreVcInvalidParentPath () {
+ String vcId = "..";
+ IllegalArgumentException ex = assertThrows(
+ IllegalArgumentException.class, () -> {
+ VirtualCorpusCache.store(vcId, ki);
+ });
+ assertEquals("Cannot cache VC due to invalid VC ID", ex.getMessage());
+ }
+
+ @Test
+ public void testStoreVcInvalidNonASCII () {
+ String vcId = "aßäüö";
+ IllegalArgumentException ex = assertThrows(
+ IllegalArgumentException.class, () -> {
+ VirtualCorpusCache.store(vcId, ki);
+ });
+ assertEquals("Cannot cache VC due to invalid VC ID", ex.getMessage());
+ }
+
+ @Test
+ public void testReferVcInvalidChars () throws IOException {
+ String file = "/queries/collections/vc-ref/query-with-invalid-vc.jsonld";
+ InputStream is = getClass().getResourceAsStream(file);
+ String queryRefJson = IOUtils.toString(is, "utf-8");
+
+ Krill krill = new Krill(queryRefJson);
+ Result result = krill.apply(ki);
+ Message m = result.getError(0);
+ assertEquals("Cannot cache VC due to invalid VC ID", m.getMessage());
+ assertEquals(104, m.getCode());
+ }
+}
diff --git a/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java b/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java
index 9630767..2c8edc2 100644
--- a/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java
+++ b/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java
@@ -47,19 +47,18 @@
}
- private KrillIndex createIndex () throws IOException {
+ public static KrillIndex createIndex () throws IOException {
KrillIndex ki = new KrillIndex();
String[] docIds = new String[] { "00001", "00002", "00003" };
int uid = 1;
for (String i : docIds) {
- ki.addDoc(uid++,
- getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
- true);
+ ki.addDoc(uid++, TestVirtualCorpusCache.class
+ .getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
ki.commit();
- ki.addDoc(uid++, getClass().getResourceAsStream("/wiki/00004.json.gz"),
- true);
+ ki.addDoc(uid++, TestVirtualCorpusCache.class
+ .getResourceAsStream("/wiki/00004.json.gz"), true);
ki.commit();
return ki;
}
@@ -100,7 +99,6 @@
assertFalse(VirtualCorpusCache.contains(vcId));
}
-
@Test
public void testUpdateCachedVC () throws IOException {
String vcId = "named-vc1";
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
index c210cc5..340f460 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
@@ -1,9 +1,8 @@
package de.ids_mannheim.korap.index;
+import static de.ids_mannheim.korap.TestSimple.getJsonQuery;
import static org.junit.Assert.assertEquals;
-import static de.ids_mannheim.korap.TestSimple.*;
-
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -21,7 +20,6 @@
import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
import de.ids_mannheim.korap.response.Result;
-
import de.ids_mannheim.korap.util.QueryException;
public class TestAttributeIndex {
@@ -446,7 +444,7 @@
/**
* Arbitrary elements with only not attributes.
*/
- @Test(expected = IllegalArgumentException.class)
+ @Test
public void testCase8 () throws IOException {
ki.addDoc(createFieldDoc2());
ki.commit();
@@ -461,6 +459,11 @@
SpanWithAttributeQuery swaq = new SpanWithAttributeQuery(sql, true);
kr = ki.search(swaq, (short) 10);
+
+ assertEquals(1, kr.getErrors().size());
+ assertEquals(104, kr.getError(0).getCode());
+ assertEquals("No (positive) attribute is defined.",kr.getError(0).getMessage());
+
}
@Test
diff --git "a/src/test/resources/queries/collections/named-vcs/inval\041d-vc-id.jsonld" "b/src/test/resources/queries/collections/named-vcs/inval\041d-vc-id.jsonld"
new file mode 100644
index 0000000..7f19bb0
--- /dev/null
+++ "b/src/test/resources/queries/collections/named-vcs/inval\041d-vc-id.jsonld"
@@ -0,0 +1,11 @@
+{"collection": {
+ "@type": "koral:doc",
+ "key": "textSigle",
+ "match": "match:eq",
+ "type" : "type:string",
+ "value": [
+ "WPD/AAA/00001",
+ "WPD/AAA/00002",
+ "WPD/AAA/00003"
+ ]
+}}
diff --git a/src/test/resources/queries/collections/vc-ref/query-with-invalid-vc.jsonld b/src/test/resources/queries/collections/vc-ref/query-with-invalid-vc.jsonld
new file mode 100644
index 0000000..ec9c6a3
--- /dev/null
+++ b/src/test/resources/queries/collections/vc-ref/query-with-invalid-vc.jsonld
@@ -0,0 +1,14 @@
+{"query":{
+ "@type":"koral:token",
+ "wrap":{
+ "@type":"koral:term",
+ "layer":"orth",
+ "key":"der",
+ "match":"match:eq"
+ }
+ },
+ "collection": {
+ "@type": "koral:docGroupRef",
+ "ref": "inval!d-vc-id"
+ }
+}
diff --git a/src/test/resources/queries/collections/vc-ref/query-with-unknown-vc.jsonld b/src/test/resources/queries/collections/vc-ref/query-with-unknown-vc.jsonld
new file mode 100644
index 0000000..123fc2e
--- /dev/null
+++ b/src/test/resources/queries/collections/vc-ref/query-with-unknown-vc.jsonld
@@ -0,0 +1,14 @@
+{"query":{
+ "@type":"koral:token",
+ "wrap":{
+ "@type":"koral:term",
+ "layer":"orth",
+ "key":"der",
+ "match":"match:eq"
+ }
+ },
+ "collection": {
+ "@type": "koral:docGroupRef",
+ "ref": "unknown-vc"
+ }
+}