Support vector representation in KoralQuery terms

Change-Id: Ia6b3ca682b52e797c589be921a3d5c5b93083502
diff --git a/Changes b/Changes
index dd69c3c..fbede7d 100644
--- a/Changes
+++ b/Changes
@@ -1,12 +1,14 @@
-0.59.0 2019-11-07
+0.59.0 2019-11-27
     - [bugfix] Fix offset retrieval in concurrent searches
       (diewald)
     - [cleanup] Removed deprecated numberOf() method from index
       (diewald)
     - [bugfix] Fix offset retrieval in concurrent getMatchInfo requests
       (diewald)
-    - Updated readme and the version of java and some plugins & libraries. 
-      (margaretha)
+    - [cleanup] Updated readme and the version of java and some plugins
+      & libraries (margaretha)
+    - [feature] Support for vector representation of terms in KoralQuery
+      (diewald)
 
 0.58.7 2019-09-16
     - [bugfix] Fix the behaviour of negative operands in virtual
diff --git a/src/main/java/de/ids_mannheim/korap/KrillQuery.java b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
index a5f6e31..791a15b 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
@@ -2,6 +2,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Iterator;
 
@@ -316,7 +317,7 @@
                 // Get wrapped token
                 return this._segFromJson(json.get("wrap"));
 
-            case "koral:span":
+            case "koral:span":                
                 // EM: what to do with empty koral:span? 
                 // it is allowed only in relation queries
                 if (isOperationRelation && !json.has("key") && !json.has("wrap") && !json.has("attr")) {
@@ -1123,15 +1124,31 @@
         Boolean isTerm = termType.equals("koral:term") ? true
                 : false;
         Boolean isCaseInsensitive = false;
-
-        if (!json.has("key") || json.get("key").asText().length() < 1) {
-            // why must it have an attr?
+       
+        if (!json.has("key") ||
+            (json.get("key").size() == 1 && json.get("key").asText().length() < 1)) {
+            
+            // It may have no key but an attribute
             if (!json.has("attr")) {
 //                return new SpanRepetitionQueryWrapper();
                 throw new QueryException(740,
                         "Key definition is missing in term or span");
             }
         };
+
+        
+        // Term is represented as a list of keys
+        LinkedList<String> keys = new LinkedList<String>();
+
+        if (json.has("key")) {
+            if (json.get("key").size() > 1) {
+                for (JsonNode value : json.get("key")) {
+                    keys.push(value.asText());
+                }
+            } else {
+                keys.push(json.get("key").asText());
+            }
+        }
         
         // Empty koral:span hack
         if (isSpan) {
@@ -1159,44 +1176,50 @@
             };
         };
 
-        StringBuilder value = new StringBuilder();
-
-        if (direction != null)
-            value.append(direction.value());
-
         // expect orth? expect lemma? 
         // s:den | i:den | cnx/l:die | mate/m:mood:ind | cnx/syn:@PREMOD |
         // mate/m:number:sg | opennlp/p:ART
 
-        if (json.has("foundry") && json.get("foundry").asText().length() > 0) {
+        StringBuilder value = new StringBuilder();
+        LinkedList<String> values = new LinkedList<String>();
+
+        if (direction != null)
+            value.append(direction.value());
+            
+        if (json.has("foundry") &&
+            json.get("foundry").asText().length() > 0) {
             value.append(json.get("foundry").asText()).append('/');
         };
 
+
         // No default foundry defined
-        if (json.has("layer") && json.get("layer").asText().length() > 0) {
+        if (json.has("layer") &&
+            json.get("layer").asText().length() > 0) {
             String layer = json.get("layer").asText();
             switch (layer) {
 
-                case "lemma":
-                    layer = "l";
-                    break;
+            case "lemma":
+                layer = "l";
+                break;
 
-                case "pos":
-                    layer = "p";
-                    break;
+            case "pos":
+                layer = "p";
+                break;
 
-                case "orth":
-                    // TODO: THIS IS AN UGLY HACK! AND SHOULD BE NAMED "SURFACE" or . OR *
-                    layer = ".";
-                    break;
+            case "orth":
+                // TODO:
+                //   THIS IS AN UGLY HACK!
+                //   AND SHOULD BE NAMED "SURFACE" or . OR *
+                layer = ".";
+                break;
 
-                case "struct":
-                    layer = "s";
-                    break;
+            case "struct":
+                layer = "s";
+                break;
 
-                case "const":
-                    layer = "c";
-                    break;
+            case "const":
+                layer = "c";
+                break;
             };
 
             if (isCaseInsensitive && isTerm) {
@@ -1204,7 +1227,7 @@
                     layer = "i";
                 else {
                     this.addWarning(767,
-                            "Case insensitivity is currently not supported for this layer");
+                                    "Case insensitivity is currently not supported for this layer");
                 };
             };
 
@@ -1220,17 +1243,33 @@
             value.append(layer).append(':');
         };
 
-        if (json.has("key") && json.get("key").asText().length() > 0) {
-            String key = json.get("key").asText();
-            value.append(isCaseInsensitive ? key.toLowerCase() : key);
-        };
+        // Remember the common prefix for all values
+        int offset = value.length();
 
-        if (json.has("value") && json.get("value").asText().length() > 0)
-            value.append(':').append(json.get("value").asText());
+        // Iterate over all keys
+        for (String key : keys) {
+
+            // Reset to common prefix
+            value.setLength(offset);
+
+            // Add key to value
+            value.append(isCaseInsensitive ? key.toLowerCase() : key);
+
+            // TODO:
+            //   This should iterate over all values as well
+            if (json.has("value") && json.get("value").asText().length() > 0)
+                value.append(':').append(json.get("value").asText());
+
+            // Add to value list
+            values.push(value.toString());
+        };
 
         // Regular expression or wildcard
         if (isTerm) {
 
+            // Create alter query
+            SpanAlterQueryWrapper saqw = new SpanAlterQueryWrapper(this.field);
+            
 			String match = "match:eq";
 			if (json.has("match")) {
 				match = json.get("match").asText();
@@ -1243,37 +1282,43 @@
 				switch (json.get("type").asText()) {
                 case "type:regex": {
 
-                    // The regex can be rewritten to an any token
-                    if (value.toString().matches("^[si]:\\.[\\+\\*]\\??$")) {
-                        return new SpanRepetitionQueryWrapper();
+                    for (String v : values) {
+                        
+                        // The regex can be rewritten to an any token
+                        if (v.matches("^[si]:\\.[\\+\\*]\\??$")) {
+                            return new SpanRepetitionQueryWrapper();
+                        };
+                        
+                        saqw.or(qb.re(v, isCaseInsensitive));
                     };
 
-					SpanRegexQueryWrapper srqw = qb.re(value.toString(), isCaseInsensitive);
-
 					if (match.equals("match:ne")) {
 						if (DEBUG)
 							log.trace("Term is negated");
-						// ssqw.makeNegative();
-						return this.builder().seg().without(srqw);
+                        saqw.setNegative(true);
+						return saqw;
 					}
 					else if (match.equals("match:eq")) {
-						return srqw;
+						return saqw;
 					}
 					throw new QueryException(741, "Match relation unknown");
                 }
                 case "type:wildcard": {
 
-					SpanWildcardQueryWrapper swcqw =
-						qb.wc(value.toString(), isCaseInsensitive);
+                    // Wildcard queries are deprecated in KoralQuery since 9/2017
+                    
+                    for (String v : values) {
+                        saqw.or(qb.wc(v, isCaseInsensitive));
+                    };
 
 					if (match.equals("match:ne")) {
 						if (DEBUG)
 							log.trace("Term is negated");
-						// ssqw.makeNegative();
-						return this.builder().seg().without(swcqw);
+                        saqw.setNegative(true);
+						return saqw;
 					}
 					else if (match.equals("match:eq")) {
-						return swcqw;
+						return saqw;
 					};
 					throw new QueryException(741, "Match relation unknown");
 				}
@@ -1286,21 +1331,39 @@
 				};
 			};
 
-			SpanSegmentQueryWrapper ssqw = this.builder().seg(value.toString());
+            // TODO:
+            //   This could alternatively use
+            //   https://github.com/tokuhirom/regexp-trie
+            for (String v : values) {
+                saqw.or(v);
+            };
+                
 			if (match.equals("match:ne")) {
 				if (DEBUG)
 					log.trace("Term is negated");
-				ssqw.makeNegative();
-				return this.builder().seg().without(ssqw);
+
+                // Segment "without" doesn't work in
+                // attribute contexts
+                saqw.setNegative(true);
+				return saqw;
 			}
 			else if (match.equals("match:eq")) {
-				return ssqw;
+				return saqw;
 			}
 			else {
 				throw new QueryException(741, "Match relation unknown");
 			}
 		};
 
+        if (values.size() > 1) {
+            throw new QueryException(
+                0,
+                "List representation for spans not yet supported"
+                );
+            
+        };
+
+        // Term has attribute
         if (json.has("attr")) {
             JsonNode attrNode = json.get("attr");
             if (!attrNode.has("@type")) {
@@ -1331,11 +1394,12 @@
     private SpanQueryWrapper _createElementAttrFromJson (
             SpanQueryWrapper elementWithIdWrapper, JsonNode json,
             JsonNode attrNode) throws QueryException {
-
+        
         if (attrNode.get("@type").asText().equals("koral:term")) {
             SpanQueryWrapper attrWrapper = _attrFromJson(json.get("attr"));
+
             if (attrWrapper != null) {
-                if (elementWithIdWrapper != null) {
+                if (elementWithIdWrapper != null) {                  
                     return new SpanWithAttributeQueryWrapper(
                             elementWithIdWrapper, attrWrapper);
                 }
@@ -1430,10 +1494,8 @@
             String rootValue = attrNode.get("root").asText();
             if (rootValue.equals("true") || rootValue.equals("false")) {
 
-                // TODO: Here do not refer to 'tokens'!!!
-                // EM: what should it be? property?
                 return new SpanAttributeQueryWrapper(new SpanSimpleQueryWrapper(
-                        "tokens", "@root", Boolean.valueOf(rootValue)));
+                        this.field, "@root", Boolean.valueOf(rootValue)));
             }
         }
         return null;
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java
index 4ed669f..00c2efd 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAlterQueryWrapper.java
@@ -39,8 +39,14 @@
         };
     };
 
+    public SpanAlterQueryWrapper setNegative (Boolean neg) {
+        this.isNegative = neg;
+        return this;
+    };
 
     public SpanAlterQueryWrapper or (String term) {
+        // TODO:
+        //   Potential optimizable by directly add()ing
         SpanQueryWrapper sqw = new SpanSimpleQueryWrapper(this.field, term);
         return this.or(sqw);
     };
@@ -50,6 +56,7 @@
         if (term.isNull())
             return this;
 
+        // Check! This seems to render the whole group negative!
         if (term.isNegative())
             this.isNegative = true;
 
@@ -103,7 +110,7 @@
     public SpanQuery toFragmentQuery () throws QueryException {
         if (this.isNull || this.alternatives.size() == 0)
             return (SpanQuery) null;
-
+        
         if (this.alternatives.size() == 1) {
             return (SpanQuery) this.alternatives.get(0)
                     .retrieveNode(this.retrieveNode).toFragmentQuery();
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
index 18752ce..4d1c712 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSegmentQueryWrapper.java
@@ -205,6 +205,7 @@
         else if (this.inclusive.size() == 0 && this.exclusive.size() >= 1) {
             return (SpanQuery) this._listToQuery(this.exclusive);
         }
+
         else if (this.inclusive.size() >= 1 && this.exclusive.size() == 0) {
             return (SpanQuery) this._listToQuery(this.inclusive);
         };
@@ -215,6 +216,11 @@
 
     private SpanQuery _listToQuery (ArrayList<SpanQueryWrapper> list)
             throws QueryException {
+
+        if (list.size() == 1) {
+            return (SpanQuery) list.get(0).toFragmentQuery();
+        };
+        
         SpanQuery query = list.get(0).toFragmentQuery();
 
         for (int i = 1; i < list.size(); i++) {
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
index 9627308..b4bcf5c 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
@@ -597,7 +597,91 @@
         };
     };
 
+    @Test
+    public void queryJSONtermVector () throws QueryException {
+        // base=foo|base=bar|base=xyz|base=abc
+        try {
+            String json = getJsonString(getClass()
+                    .getResource(
+                            "/queries/segment/vector.jsonld")
+                    .getFile());
+            KrillQuery kq = new KrillQuery("tokens");
 
+            assertEquals("spanOr([tokens:s:foo, tokens:s:bar, tokens:s:xyz, tokens:s:abc])",
+                         kq.fromKoral(json).toQuery().toString());
+        }
+        catch (QueryException e) {
+            fail(e.getMessage());
+        };
+    };
+
+    @Test
+    public void queryJSONtermVectorCaseInsensitive () throws QueryException {
+        // base=fOo|base=bAr|base=xYz|base=aBc
+        try {
+            String json = getJsonString(getClass()
+                    .getResource(
+                            "/queries/segment/vector-caseinsensitive.jsonld")
+                    .getFile());
+            KrillQuery kq = new KrillQuery("tokens");
+
+            assertEquals("spanOr([tokens:i:foo, tokens:i:bar, tokens:i:xyz, tokens:i:abc])",
+                         kq.fromKoral(json).toQuery().toString());
+        }
+        catch (QueryException e) {
+            fail(e.getMessage());
+        };
+    };
+
+    
+    @Test
+    public void queryJSONwildcardVector () throws QueryException {
+        // base=f?o|base=bar|base=x*z|base=abc
+        try {
+            String json = getJsonString(getClass()
+                    .getResource(
+                            "/queries/segment/vector-wildcards.jsonld")
+                    .getFile());
+            KrillQuery kq = new KrillQuery("tokens");
+
+            assertEquals("spanOr([" +
+                         "SpanMultiTermQueryWrapper(tokens:s:f?o), " +
+                         "SpanMultiTermQueryWrapper(tokens:s:bar), " +
+                         "SpanMultiTermQueryWrapper(tokens:s:x*z), " +
+                         "SpanMultiTermQueryWrapper(tokens:s:abc)" +
+                         "])",
+                         kq.fromKoral(json).toQuery().toString());
+        }
+        catch (QueryException e) {
+            fail(e.getMessage());
+        };
+    };    
+
+
+    @Test
+    public void queryJSONregexVector () throws QueryException {
+        // base=f.?o|base=b[au]r|base=x(yz)*|base=ab+c
+        try {
+            String json = getJsonString(getClass()
+                    .getResource(
+                            "/queries/segment/vector-regex.jsonld")
+                    .getFile());
+            KrillQuery kq = new KrillQuery("tokens");
+
+            assertEquals("spanOr([" +
+                         "SpanMultiTermQueryWrapper(tokens:/s:f.?o/), " +
+                         "SpanMultiTermQueryWrapper(tokens:/s:b[au]r/), " +
+                         "SpanMultiTermQueryWrapper(tokens:/s:x(yz)*/), " +
+                         "SpanMultiTermQueryWrapper(tokens:/s:ab+c/)" +
+                         "])",
+                         kq.fromKoral(json).toQuery().toString());
+        }
+        catch (QueryException e) {
+            fail(e.getMessage());
+        };
+    };        
+    
+    
     @Test
     public void queryJSONregexRewrite1 () throws QueryException {
         // "der" [.+?]
@@ -612,6 +696,25 @@
 
 
     @Test
+    public void queryJSONregexVectorRewrite () throws QueryException {
+        // der [base=f.?o|base=b[au]r|base=.*|base=ab+c]
+        try {
+            String json = getJsonString(
+                getClass()
+                .getResource("/queries/sequence/regex-rewrite-vector.jsonld")
+                .getFile());
+            KrillQuery kq = new KrillQuery("tokens");
+
+            assertEquals("focus(254: spanContain(<tokens:base/s:t />, {254: spanExpansion(tokens:s:der, []{1, 1}, right)}))",
+                         kq.fromKoral(json).toQuery().toString());
+        }
+        catch (QueryException e) {
+            fail(e.getMessage());
+        };
+    };    
+
+
+    @Test
     public void queryJSONmerge () throws QueryException {
         // treat merging gracefully
         String json = getJsonString(getClass()
diff --git a/src/test/resources/queries/segment/vector-caseinsensitive.jsonld b/src/test/resources/queries/segment/vector-caseinsensitive.jsonld
new file mode 100644
index 0000000..505ff2e
--- /dev/null
+++ b/src/test/resources/queries/segment/vector-caseinsensitive.jsonld
@@ -0,0 +1,12 @@
+{
+  "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.3/context.jsonld",
+  "query": {
+	  "@type": "koral:token", 
+	  "wrap": {
+	    "@type": "koral:term", 
+	    "layer": "orth",
+	    "key": ["fOo","bAr","xYz","aBc"],
+      "flags":["flags:caseInsensitive"]
+	  }
+  }
+}
diff --git a/src/test/resources/queries/segment/vector-regex.jsonld b/src/test/resources/queries/segment/vector-regex.jsonld
new file mode 100644
index 0000000..d682722
--- /dev/null
+++ b/src/test/resources/queries/segment/vector-regex.jsonld
@@ -0,0 +1,12 @@
+{
+  "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.3/context.jsonld",
+  "query": {
+	  "@type": "koral:token", 
+	  "wrap": {
+	    "@type": "koral:term", 
+	    "layer": "orth",
+	    "key": ["f.?o","b[au]r","x(yz)*","ab+c"],
+      "type":"type:regex"
+	  }
+  }
+}
diff --git a/src/test/resources/queries/segment/vector-wildcards.jsonld b/src/test/resources/queries/segment/vector-wildcards.jsonld
new file mode 100644
index 0000000..1c718c1
--- /dev/null
+++ b/src/test/resources/queries/segment/vector-wildcards.jsonld
@@ -0,0 +1,12 @@
+{
+  "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.3/context.jsonld",
+  "query": {
+	  "@type": "koral:token", 
+	  "wrap": {
+	    "@type": "koral:term", 
+	    "layer": "orth",
+	    "key": ["f?o","bar","x*z","abc"],
+      "type":"type:wildcard"
+	  }
+  }
+}
diff --git a/src/test/resources/queries/segment/vector.jsonld b/src/test/resources/queries/segment/vector.jsonld
new file mode 100644
index 0000000..a813ef0
--- /dev/null
+++ b/src/test/resources/queries/segment/vector.jsonld
@@ -0,0 +1,11 @@
+{
+  "@context" : "http://ids-mannheim.de/ns/KorAP/json-ld/v0.3/context.jsonld",
+  "query": {
+	  "@type": "koral:token", 
+	  "wrap": {
+	    "@type": "koral:term", 
+	    "layer": "orth",
+	    "key": ["foo","bar","xyz","abc"]
+	  }
+  }
+}
diff --git a/src/test/resources/queries/sequence/regex-rewrite-vector.jsonld b/src/test/resources/queries/sequence/regex-rewrite-vector.jsonld
new file mode 100644
index 0000000..174ab19
--- /dev/null
+++ b/src/test/resources/queries/sequence/regex-rewrite-vector.jsonld
@@ -0,0 +1,29 @@
+{
+  "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.3/context.jsonld",
+  "collection":null,
+  "query":{
+    "@type": "koral:group",
+    "operation": "operation:sequence",
+    "operands": [
+      {
+        "@type": "koral:token",
+        "wrap": {
+          "@type": "koral:term",
+          "key": "der",
+          "layer": "orth",
+          "match": "match:eq"
+        }
+      },
+      {
+        "@type": "koral:token",
+        "wrap": {
+          "@type": "koral:term",
+          "type" : "type:regex",
+	        "key": ["f.?o","b[au]r",".*","ab+c"],
+          "layer": "orth",
+          "match": "match:eq"
+        }
+      }
+    ]
+  }
+}