Added restrictions to VC ID and handled non-existent VC

Change-Id: Ibd26ce749a9cbd4d0683cc381b94e44cf4670f5c
diff --git a/Changes b/Changes
index ad70b06..d4ececa 100644
--- a/Changes
+++ b/Changes
@@ -1,17 +1,21 @@
-0.60.3 2022-03-17
+0.60.3 2022-03-30
     - [cleanup] Updated fingerprints to base64url (closed #83)
     - [bugfix] Fixed ConcurrentModificationException in VC cache
       handling (margaretha)
+    - [feature] Added restrictions to VC ID and handled non-existent VC
+      (margaretha)  
 
     !!! This will invalidate all VC caches. Please recache!
 
-0.60.2 2022-01-04
+0.60.2 2022-02-04
     - [security] More log4j updates (diewald)
     - [feature] Support for field value vector method (fixes #81; diewald)
 	- [cleanup] Moved and updated cache-tests from TestKrillCollectionIndex 
 	  to TestVirtualCorpusCache (resolved #44; margaretha)
     - [feature] Added a Krill API returning textSigles for a given 
       corpus query (margaretha)
+    - [cleanup] Replaced array nodes of field values with a simple string
+      (margaretha)
 
 0.60.1 2021-12-17
     - [feature] Added vc loading from classpath (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/Krill.java b/src/main/java/de/ids_mannheim/korap/Krill.java
index 01e20ed..4b1b642 100644
--- a/src/main/java/de/ids_mannheim/korap/Krill.java
+++ b/src/main/java/de/ids_mannheim/korap/Krill.java
@@ -257,7 +257,7 @@
      */
     public Krill setIndex (KrillIndex index) {
         this.index = index;
-        VirtualCorpusCache.setIndexInfo(index);
+            VirtualCorpusCache.setIndexInfo(index);
         return this;
     };
 
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index 8f8dbde..fbd2f4f 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -606,6 +606,9 @@
             else if (t instanceof QueryException) {
                 throw new QueryException(((QueryException) t).getErrorCode(), t.getLocalizedMessage());
             }
+            else {
+                throw e;
+            }
         }
 
         if (DEBUG) {
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 6421226..7f807fa 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -77,6 +77,7 @@
 import de.ids_mannheim.korap.util.KrillDate;
 import de.ids_mannheim.korap.util.KrillProperties;
 import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.util.StatusCodes;
 
 /**
  * <p>KrillIndex implements a simple API for searching in and writing
@@ -1643,7 +1644,13 @@
 		catch (QueryException e) {
             kr.addError(e.getErrorCode(),e.getLocalizedMessage());
             log.warn(e.getLocalizedMessage());			
-		};
+		}
+		catch (Exception e) {
+		    // 104 ILLEGAL_ARGUMENT, see Kustvakt core
+		    // de.ids_mannheim.korap.exceptions.StatusCodes.ILLEGAL_ARGUMENT
+		    kr.addError(104,e.getLocalizedMessage());
+            log.warn(e.getLocalizedMessage());
+        }
 
         // Stop timer thread
         tthread.stopTimer();
diff --git a/src/main/java/de/ids_mannheim/korap/cache/VirtualCorpusCache.java b/src/main/java/de/ids_mannheim/korap/cache/VirtualCorpusCache.java
index 26d74e2..eeb793b 100644
--- a/src/main/java/de/ids_mannheim/korap/cache/VirtualCorpusCache.java
+++ b/src/main/java/de/ids_mannheim/korap/cache/VirtualCorpusCache.java
@@ -13,6 +13,7 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.function.Supplier;
+import java.util.regex.Pattern;
 
 import org.apache.lucene.index.LeafReaderContext;
 
@@ -32,6 +33,8 @@
  */
 public class VirtualCorpusCache {
 
+    public static Pattern vcNamePattern = Pattern.compile("[a-zA-Z0-9]+[a-zA-Z_0-9-.]+");
+
     public static final String CACHE_LOCATION = "vc-cache";
     public static int CAPACITY = 5;
     public static final Map<String, Map<String, DocBits>> map = Collections
@@ -59,8 +62,45 @@
     }
 
 
+    /**
+     * Path traversal must not be allowed using the VC ID.
+     * 
+     * VC id may only have one slash with the following format:
+     * [username]/[vc-name]
+     * 
+     * VC name may only contains alphabets, numbers, dashes and
+     * full-stops. See {@link #vcNamePattern}
+     * 
+     * @param vcId
+     * @return true if the given VC id is valid, false otherwise
+     */
+    private static boolean isVcIdValid (String vcId) {
+//        if (vcId.contains("./")) {
+//            return false;
+//        }
+
+        String[] parts = vcId.split("/");
+        if (parts.length > 2) {
+            vcToCleanUp.remove(vcId);
+            return false;
+        }
+
+        String vcName = parts.length == 2 ? parts[1] : parts[0];
+        if (!vcNamePattern.matcher(vcName).matches()) {
+            vcToCleanUp.remove(vcId);
+            return false;
+        }
+
+        return true;
+    }
+
+
     public static void storeOnDisk (String vcId, String leafFingerprint,
             DocBits docBits) {
+        if (!isVcIdValid(vcId)) {
+            throw new IllegalArgumentException("Cannot cache VC due to invalid VC ID");
+        }
+
         File dir = new File(CACHE_LOCATION + "/" + vcId);
         if (!dir.exists()) {
             dir.mkdirs();
@@ -84,15 +124,19 @@
     }
 
 
-    public static void store (String vcId, Map<String, DocBits> vcData) {
+    public static void store (String vcId, Map<String, DocBits> vcData){
         map.put(vcId, vcData);
         vcData.keySet().forEach(leafFingerprint -> {
             storeOnDisk(vcId, leafFingerprint, vcData.get(leafFingerprint));
         });
     }
 
-    public static void store (String vcId, KrillIndex index)
-            throws QueryException, IOException {
+
+    public static void store (String vcId, KrillIndex index) {
+
+        if (!isVcIdValid(vcId)) {
+            throw new IllegalArgumentException("Cannot cache VC due to invalid VC ID");
+        }
         
         DocBitsSupplier docBitsSupplier = new VirtualCorpusFilter(
                 vcId).new DocBitsSupplier();
@@ -100,7 +144,7 @@
         for (LeafReaderContext context : index.reader().leaves()) {
             leafFingerprint = Fingerprinter.create(
                     context.reader().getCombinedCoreAndDeletesKey().toString());
-            
+
             getDocBits(vcId, leafFingerprint, () -> {
                 try {
                     return docBitsSupplier.supplyDocBits(context,
@@ -143,6 +187,10 @@
 
 
     public static boolean contains (String vcId) {
+        if (!isVcIdValid(vcId)) {
+            return false;
+        }
+
         if (map.containsKey(vcId)) {
             return true;
         }
@@ -152,7 +200,19 @@
         }
     }
 
+
+    /**
+     * Deletes the VC from memory cache and disk cache. If VC doesn't
+     * exist, the method keeps silent about it and no error will be
+     * thrown because the deletion purpose has been achieved.
+     * 
+     * @param vcId
+     */
     public static void delete (String vcId) {
+        if (!isVcIdValid(vcId)) {
+            return;
+        }
+
         vcToCleanUp.remove(vcId);
         map.remove(vcId);
         File vc = new File(CACHE_LOCATION + "/" + vcId);
@@ -166,6 +226,7 @@
         }
     }
 
+
     public static void reset () {
         vcToCleanUp.clear();
         map.clear();
@@ -181,7 +242,7 @@
         }
         vcCache.delete();
     }
-    
+
 
     /**
      * Sets IndexInfo and checks if there is any VC to clean up. This
@@ -191,6 +252,8 @@
      * map of a VC, it is marked for clean up. The cached VC will be
      * cleaned up, next time the index is used in {@link Krill}.
      * see {@link #getDocBits(String, String, Supplier)}
+     * 
+     * @throws QueryException
      */
     public static void setIndexInfo (IndexInfo indexInfo) {
         VirtualCorpusCache.indexInfo = indexInfo;
@@ -240,6 +303,7 @@
      * @param calculateDocBits
      *            a supplier calculating the DocBits
      * @return DocBits
+     * @throws QueryException
      */
     public static DocBits getDocBits (String vcId, String leafFingerprint,
             Supplier<DocBits> calculateDocBits) {
diff --git a/src/main/java/de/ids_mannheim/korap/response/Response.java b/src/main/java/de/ids_mannheim/korap/response/Response.java
index 21712c7..55a97eb 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Response.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Response.java
@@ -3,7 +3,6 @@
 import static de.ids_mannheim.korap.util.KrillString.quote;
 
 import java.util.HashMap;
-import java.util.List;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@@ -11,7 +10,6 @@
 import com.fasterxml.jackson.annotation.JsonInclude.Include;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.node.ArrayNode;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 
 import de.ids_mannheim.korap.KrillCollection;
diff --git a/src/test/java/de/ids_mannheim/korap/cache/TestInvalidVcId.java b/src/test/java/de/ids_mannheim/korap/cache/TestInvalidVcId.java
new file mode 100644
index 0000000..6e6f7c2
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/cache/TestInvalidVcId.java
@@ -0,0 +1,103 @@
+package de.ids_mannheim.korap.cache;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.Test;
+
+import de.ids_mannheim.korap.Krill;
+import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.response.Message;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.StatusCodes;
+
+public class TestInvalidVcId {
+
+    private KrillIndex ki;
+    
+    public TestInvalidVcId () throws IOException {
+        ki = TestVirtualCorpusCache.createIndex();
+    }
+
+    @Test
+    public void testStoreVcNotExist () {
+        String vcId = "snx";
+        RuntimeException ex = assertThrows(RuntimeException.class, () -> {
+            VirtualCorpusCache.store(vcId, ki);
+        });
+        assertEquals("de.ids_mannheim.korap.util.QueryException: "
+                + "Collection is not found queries/collections/named-vcs/snx.jsonld",
+                ex.getMessage());
+    }
+
+
+    @Test
+    public void testReferVcNotExist () throws IOException {
+        String file = "/queries/collections/vc-ref/query-with-unknown-vc.jsonld";
+        InputStream is = getClass().getResourceAsStream(file);
+        String queryRefJson = IOUtils.toString(is, "utf-8");
+
+        Krill krill = new Krill(queryRefJson);
+        Result result = krill.apply(ki);
+        Message m = result.getError(0);
+        assertEquals(StatusCodes.MISSING_COLLECTION, m.getCode());
+        assertEquals(
+                "Collection is not found queries/collections/named-vcs/unknown-vc.jsonld",
+                m.getMessage());
+        assertEquals(0, result.getTotalResults());
+    }
+
+
+    @Test
+    public void testDeleteVcNotExist () throws IOException {
+        VirtualCorpusCache.delete("unknown-vc-id");
+    }
+
+
+    @Test
+    public void testStoreVcInvalidChars () {
+        String vcId = "inval!d-vc-id";
+        IllegalArgumentException ex = assertThrows(
+                IllegalArgumentException.class, () -> {
+                    VirtualCorpusCache.store(vcId, ki);
+                });
+        assertEquals("Cannot cache VC due to invalid VC ID", ex.getMessage());
+    }
+
+    @Test
+    public void testStoreVcInvalidParentPath () {
+        String vcId = "..";
+        IllegalArgumentException ex = assertThrows(
+                IllegalArgumentException.class, () -> {
+                    VirtualCorpusCache.store(vcId, ki);
+                });
+        assertEquals("Cannot cache VC due to invalid VC ID", ex.getMessage());
+    }
+    
+    @Test
+    public void testStoreVcInvalidNonASCII () {
+        String vcId = "aßäüö";
+        IllegalArgumentException ex = assertThrows(
+                IllegalArgumentException.class, () -> {
+                    VirtualCorpusCache.store(vcId, ki);
+                });
+        assertEquals("Cannot cache VC due to invalid VC ID", ex.getMessage());
+    }
+    
+    @Test
+    public void testReferVcInvalidChars () throws IOException {
+        String file = "/queries/collections/vc-ref/query-with-invalid-vc.jsonld";
+        InputStream is = getClass().getResourceAsStream(file);
+        String queryRefJson = IOUtils.toString(is, "utf-8");
+
+        Krill krill = new Krill(queryRefJson);
+        Result result = krill.apply(ki);
+        Message m = result.getError(0);
+        assertEquals("Cannot cache VC due to invalid VC ID", m.getMessage());
+        assertEquals(104, m.getCode());
+    }
+}
diff --git a/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java b/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java
index 9630767..2c8edc2 100644
--- a/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java
+++ b/src/test/java/de/ids_mannheim/korap/cache/TestVirtualCorpusCache.java
@@ -47,19 +47,18 @@
     }
 
 
-    private KrillIndex createIndex () throws IOException {
+    public static KrillIndex createIndex () throws IOException {
         KrillIndex ki = new KrillIndex();
         String[] docIds = new String[] { "00001", "00002", "00003" };
         int uid = 1;
         for (String i : docIds) {
-            ki.addDoc(uid++,
-                    getClass().getResourceAsStream("/wiki/" + i + ".json.gz"),
-                    true);
+            ki.addDoc(uid++, TestVirtualCorpusCache.class
+                    .getResourceAsStream("/wiki/" + i + ".json.gz"), true);
         }
         ki.commit();
 
-        ki.addDoc(uid++, getClass().getResourceAsStream("/wiki/00004.json.gz"),
-                true);
+        ki.addDoc(uid++, TestVirtualCorpusCache.class
+                .getResourceAsStream("/wiki/00004.json.gz"), true);
         ki.commit();
         return ki;
     }
@@ -100,7 +99,6 @@
         assertFalse(VirtualCorpusCache.contains(vcId));
     }
 
-
     @Test
     public void testUpdateCachedVC () throws IOException {
         String vcId = "named-vc1";
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
index c210cc5..340f460 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
@@ -1,9 +1,8 @@
 package de.ids_mannheim.korap.index;
 
+import static de.ids_mannheim.korap.TestSimple.getJsonQuery;
 import static org.junit.Assert.assertEquals;
 
-import static de.ids_mannheim.korap.TestSimple.*;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -21,7 +20,6 @@
 import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
 import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
 import de.ids_mannheim.korap.response.Result;
-
 import de.ids_mannheim.korap.util.QueryException;
 
 public class TestAttributeIndex {
@@ -446,7 +444,7 @@
     /**
      * Arbitrary elements with only not attributes.
      */
-    @Test(expected = IllegalArgumentException.class)
+    @Test
     public void testCase8 () throws IOException {
         ki.addDoc(createFieldDoc2());
         ki.commit();
@@ -461,6 +459,11 @@
 
         SpanWithAttributeQuery swaq = new SpanWithAttributeQuery(sql, true);
         kr = ki.search(swaq, (short) 10);
+        
+        assertEquals(1, kr.getErrors().size());
+        assertEquals(104, kr.getError(0).getCode());
+        assertEquals("No (positive) attribute is defined.",kr.getError(0).getMessage());
+        
     }
 
     @Test
diff --git "a/src/test/resources/queries/collections/named-vcs/inval\041d-vc-id.jsonld" "b/src/test/resources/queries/collections/named-vcs/inval\041d-vc-id.jsonld"
new file mode 100644
index 0000000..7f19bb0
--- /dev/null
+++ "b/src/test/resources/queries/collections/named-vcs/inval\041d-vc-id.jsonld"
@@ -0,0 +1,11 @@
+{"collection": {
+    "@type": "koral:doc",
+    "key": "textSigle",
+    "match": "match:eq",
+    "type" : "type:string",
+    "value": [
+          "WPD/AAA/00001",
+          "WPD/AAA/00002",
+          "WPD/AAA/00003"
+    ]
+}}
diff --git a/src/test/resources/queries/collections/vc-ref/query-with-invalid-vc.jsonld b/src/test/resources/queries/collections/vc-ref/query-with-invalid-vc.jsonld
new file mode 100644
index 0000000..ec9c6a3
--- /dev/null
+++ b/src/test/resources/queries/collections/vc-ref/query-with-invalid-vc.jsonld
@@ -0,0 +1,14 @@
+{"query":{
+    "@type":"koral:token",
+    "wrap":{
+      "@type":"koral:term",
+      "layer":"orth",
+      "key":"der",
+      "match":"match:eq"
+    }
+  },
+  "collection": {
+    "@type": "koral:docGroupRef",
+    "ref": "inval!d-vc-id"
+  }
+}
diff --git a/src/test/resources/queries/collections/vc-ref/query-with-unknown-vc.jsonld b/src/test/resources/queries/collections/vc-ref/query-with-unknown-vc.jsonld
new file mode 100644
index 0000000..123fc2e
--- /dev/null
+++ b/src/test/resources/queries/collections/vc-ref/query-with-unknown-vc.jsonld
@@ -0,0 +1,14 @@
+{"query":{
+    "@type":"koral:token",
+    "wrap":{
+      "@type":"koral:term",
+      "layer":"orth",
+      "key":"der",
+      "match":"match:eq"
+    }
+  },
+  "collection": {
+    "@type": "koral:docGroupRef",
+    "ref": "unknown-vc"
+  }
+}