Added text search functionality to collection builder

Change-Id: I16d45bb7651763e6f41857c0839962bd14a1f0af
diff --git a/Changes b/Changes
index 1ed8d2e..7ed178a 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.57 2018-03-28
+        - [feature] Support text queries in metadata (diewald)
+
 0.56.2 2018-03-23
         - [feature] Introduce meta field retrieval method (diewald)
         - [cleanup] Rename KrillQuery's "_"-method to "nr" to improve
diff --git a/pom.xml b/pom.xml
index cb67ac4..1094693 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
 
 	<groupId>de.ids_mannheim.korap</groupId>
 	<artifactId>Krill</artifactId>
-	<version>0.56.2</version>
+	<version>0.57.0</version>
 	<packaging>jar</packaging>
 
 	<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index b2d326e..5d405eb 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -227,7 +227,8 @@
                         return this.cb.term(key, json.get("value").asText())
                                 .not();
 
-                    // This may change - but for now it means the elements are lowercased
+					// TODO:
+					// This needs to change - but for now it means the elements are lowercased
                     case "match:contains":
                         return this.cb.term(key,
                                 json.get("value").asText().toLowerCase());
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
index 4716804..b688aea 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
@@ -2,11 +2,16 @@
 
 import java.util.*;
 import java.io.IOException;
+import java.io.StringReader;
 
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.TermsFilter;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.NumericRangeFilter;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
 import de.ids_mannheim.korap.util.KrillDate;
 
 import org.slf4j.Logger;
@@ -17,9 +22,12 @@
 
 /*
  * TODO: Optimize!
- * - Remove identical object in Boolean groups
- * - Flatten boolean groups
- * - create "between" ranges for multiple date objects
+ *   - Remove identical object in Boolean groups
+ *   - Flatten boolean groups
+ *   - create "between" ranges for multiple date objects
+ *
+ * TODO:
+ *   - Filters are deprecated, they should be ported to queries
  */
 
 public class CollectionBuilder {
@@ -42,6 +50,11 @@
     };
 
 
+    public CollectionBuilder.Interface text (String field, String text) {
+        return new CollectionBuilder.Text(field, text);
+    };
+	
+
     public CollectionBuilder.Interface since (String field, String date) {
         int since = new KrillDate(date).floor();
 
@@ -185,6 +198,66 @@
         };
     };
 
+
+    public class Text implements CollectionBuilder.Interface {
+        private boolean isNegative = false;
+        // private boolean regex = false;
+        private String field;
+        private String text;
+
+
+        public Text (String field, String text) {
+            this.field = field;
+            this.text = text;
+        };
+
+		// TODO:
+		//   Currently this treatment is language specific and
+		//    does too mzch, I guess.
+        public Filter toFilter () {
+			StringReader reader = new StringReader(this.text);
+			GermanAnalyzer ga = new GermanAnalyzer();
+			PhraseQuery pq = new PhraseQuery();
+			int pos = 0;
+			try {
+				TokenStream ts = ga.tokenStream(this.field , reader);
+				CharTermAttribute term;
+				ts.reset();
+				while (ts.incrementToken()) {
+					term = ts.getAttribute(CharTermAttribute.class);
+					pq.add(new org.apache.lucene.index.Term(this.field, term.toString()), pos++);
+				};
+				ts.close();
+			}
+			catch (IOException ie) {
+				System.err.println(ie);
+				return null;
+			};
+			reader.close();
+			return new QueryWrapperFilter(pq);
+        };
+
+
+        public String toString () {
+            Filter filter = this.toFilter();
+            if (filter == null)
+                return "";
+            return filter.toString();
+        };
+
+
+        public boolean isNegative () {
+            return this.isNegative;
+        };
+
+
+        public CollectionBuilder.Interface not () {
+            this.isNegative = true;
+            return this;
+        };
+    };
+
+	
     public class Group implements CollectionBuilder.Interface {
         private boolean isOptional = false;
         private boolean isNegative = true;
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
index 8108b30..4b5ae02 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -420,6 +420,23 @@
         assertEquals(3, kcn.docCount());
     };
 
+	@Test
+    public void testIndexWithTextStringQueries () throws IOException {
+		ki = new KrillIndex();
+		ki.addDoc(createDoc1());
+		ki.commit();
+
+        CollectionBuilder cb = new CollectionBuilder();
+        KrillCollection kcn = new KrillCollection(ki);
+
+		// Simple string tests
+        kcn.fromBuilder(cb.text("text", "Der alte Mann"));
+
+		// Uses german analyzer for the moment
+		assertEquals(kcn.toString(), "QueryWrapperFilter(text:\"alt mann\")");
+        // assertEquals(3, kcn.docCount());
+	};
+
 
     @Test
     public void filterExampleFromLegacy () throws Exception {