Optimize regex queries that are equivalent to any tokens

Change-Id: I9934d851920eb20e4c097f268c6aa439a824797d
diff --git a/Changes b/Changes
index 2cbdcf7..6c9d48f 100644
--- a/Changes
+++ b/Changes
@@ -1,8 +1,10 @@
-0.55.7 2016-10-10
+0.55.7 2016-10-11
         - [bugfix] Throw error on optional operands in distance
           queries (diewald)
         - [performance] Remember solved problematic queries in the
           query planner (diewald)
+        - [performance] Treat term queries like ".+?", ".+", ".*?", and ".*"
+          as any-terms (diewald)
 
 0.55.6 2016-08-10
         - [bugfix] distance with key "t" uses default foundry (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/KrillQuery.java b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
index 3d70505..19fd398 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
@@ -1193,18 +1193,23 @@
 
             // Branch on type
             switch (json.get("type").asText()) {
-                case "type:regex":
-                    return qb.seg(qb.re(value.toString(), isCaseInsensitive));
+			case "type:regex": {
 
-                case "type:wildcard":
-                    return qb.seq(qb.wc(value.toString(), isCaseInsensitive));
+				// The regex can be rewritten to an any token
+				if (value.toString().matches("^[si]:\\.[\\+\\*]\\??$")) {
+					return new SpanRepetitionQueryWrapper();
+				};
+				return qb.seg(qb.re(value.toString(), isCaseInsensitive));
+			}
+			case "type:wildcard":
+				return qb.seq(qb.wc(value.toString(), isCaseInsensitive));
 
-                case "type:string":
-                    break;
+			case "type:string":
+				break;
 
-                default:
-                    this.addWarning(746,
-                            "Term type is not supported - treated as a string");
+			default:
+				this.addWarning(746,
+								"Term type is not supported - treated as a string");
             };
         };
 
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index f1f412d..19f1ec5 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -23,8 +23,14 @@
 import java.util.*;
 
 /*
-  Todo: Store primary data at base/cons field.
+  TODO: Store primary data at base/cons field.
   All other Termvectors should have no stored field!
+
+  TODO: Currently Character offsets are stored with positional
+  information in the token stream. This is bad!
+  The character offset may need a special encoding in Lucene
+  To store the character offsets directly (not in the payloads),
+  to make this less messy and speed things up.
 */
 
 /**
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
index f11a5da..09d690b 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
@@ -589,6 +589,30 @@
         };
     };
 
+	@Test
+    public void queryJSONregexRewrite1 () throws QueryException {
+        // "der" [.+?]
+		String json = getString(getClass().getResource(
+									"/queries/sequence/regex-rewrite-1.jsonld").getFile());
+		KrillQuery kq = new KrillQuery("tokens");
+
+		assertEquals(
+			kq.fromKoral(json).toQuery().toString(),
+			"focus(254: spanContain(<tokens:base/s:t />, {254: spanExpansion(tokens:s:der, []{1, 1}, right)}))");
+    };
+
+	@Ignore
+    public void queryJSONregexRewrite2 () throws QueryException {
+        // "der" [.+?]
+		String json = getString(getClass().getResource(
+									"/queries/sequence/regex-rewrite-2.jsonld").getFile());
+		KrillQuery kq = new KrillQuery("tokens");
+
+		assertEquals(
+			kq.fromKoral(json).toQuery().toString(),
+			"focus(254: spanContain(<tokens:base/s:t />, {254: spanExpansion(tokens:s:der, []{1, 4}, right)}))");
+    };
+
 
     public static String getString (String path) {
         StringBuilder contentBuilder = new StringBuilder();
diff --git a/src/test/resources/queries/sequence/regex-rewrite-1.jsonld b/src/test/resources/queries/sequence/regex-rewrite-1.jsonld
new file mode 100644
index 0000000..e3cf3bf
--- /dev/null
+++ b/src/test/resources/queries/sequence/regex-rewrite-1.jsonld
@@ -0,0 +1,29 @@
+{
+  "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.3/context.jsonld",
+  "collection":null,
+  "query":{
+    "@type": "koral:group",
+    "operation": "operation:sequence",
+    "operands": [
+      {
+        "@type": "koral:token",
+        "wrap": {
+          "@type": "koral:term",
+          "key": "der",
+          "layer": "orth",
+          "match": "match:eq"
+        }
+      },
+      {
+        "@type": "koral:token",
+        "wrap": {
+          "@type": "koral:term",
+          "type" : "type:regex",
+          "key": ".+?",
+          "layer": "orth",
+          "match": "match:eq"
+        }
+      }
+    ]
+  }
+}
diff --git a/src/test/resources/queries/sequence/regex-rewrite-2.jsonld b/src/test/resources/queries/sequence/regex-rewrite-2.jsonld
new file mode 100644
index 0000000..a073d3f
--- /dev/null
+++ b/src/test/resources/queries/sequence/regex-rewrite-2.jsonld
@@ -0,0 +1,59 @@
+{
+  "@context":"http://ids-mannheim.de/ns/KorAP/json-ld/v0.3/context.jsonld",
+  "collection":null,
+  "query":{
+    "@type": "koral:group",
+    "operation": "operation:sequence",
+    "operands": [
+      {
+        "@type": "koral:token",
+        "wrap": {
+          "@type": "koral:term",
+          "key": "der",
+          "layer": "orth",
+          "match": "match:eq"
+        }
+      },
+      {
+        "@type": "koral:token",
+        "wrap": {
+          "@type": "koral:term",
+          "type" : "type:regex",
+          "key": ".*",
+          "layer": "orth",
+          "match": "match:eq"
+        }
+      },
+      {
+        "@type": "koral:token",
+        "wrap": {
+          "@type": "koral:term",
+          "type" : "type:regex",
+          "key": ".*?",
+          "layer": "orth",
+          "match": "match:eq"
+        }
+      },
+      {
+        "@type": "koral:token",
+        "wrap": {
+          "@type": "koral:term",
+          "type" : "type:regex",
+          "key": ".+",
+          "layer": "orth",
+          "match": "match:eq"
+        }
+      },
+      {
+        "@type": "koral:token",
+        "wrap": {
+          "@type": "koral:term",
+          "type" : "type:regex",
+          "key": ".+?",
+          "layer": "orth",
+          "match": "match:eq"
+        }
+      }
+    ]
+  }
+}