Improve keyword indexing to be retrievable as keywords

Change-Id: Iad74b910ef66bbf684fa06ad1e6bac848a4da9ff
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 81cb49f..b7cf517 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -67,11 +67,14 @@
         tvNoField.setStoreTermVectorPayloads(true);
         tvNoField.setStoreTermVectorOffsets(false);
 
+        keywords.setStoreTermVectors(false);
+		/*
         keywords.setStoreTermVectors(true);
         keywords.setStoreTermVectorPositions(false);
         keywords.setStoreTermVectorPayloads(false);
         keywords.setStoreTermVectorOffsets(false);
-        keywords.setIndexOptions(IndexOptions.DOCS);
+		*/
+        keywords.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
     };
 
 
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 009e18b..81d4c2e 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -91,7 +91,7 @@
 	private static final int CONTEXT = -99998;
 
     // This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = true;
+    public static final boolean DEBUG = false;
 
     // Mapper for JSON serialization
     ObjectMapper mapper = new ObjectMapper();
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaField.java b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
index 095dd89..e997313 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaField.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
@@ -56,7 +56,7 @@
 			};
 		}
 
-		// Value is textual
+		// Value is textual or keywords
 		else {
 			// Value is a list
 			if (this.values.size() > 1) {
@@ -69,7 +69,7 @@
 			}
 
 			// Value is a single
-			else {
+			else if (this.values.size() > 0) {
 				json.put("value", this.values.get(0));
 			};
 		};
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaFields.java b/src/main/java/de/ids_mannheim/korap/response/MetaFields.java
index 8e0bd42..213d2d6 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaFields.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaFields.java
@@ -11,6 +11,14 @@
 import com.fasterxml.jackson.databind.node.ArrayNode;
 import de.ids_mannheim.korap.index.AbstractDocument;
 
+import java.io.IOException;
+
+import de.ids_mannheim.korap.index.KeywordAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+import java.io.StringReader;
+
 import java.util.*;
 
 import org.apache.lucene.index.*;
@@ -77,15 +85,45 @@
 		// Field has a textual value
 		else if (s != null) {
 
-			// Field is not indexed
+			// Stored
 			if (iFieldType.indexOptions() == IndexOptions.NONE) {
 				mf.type = "type:store";
+				mf.values.add(s.toString());
 			}
+
+			// Keywords
+			else if (iFieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS) {
+				mf.type = "type:keywords";
+
+				// Analyze keywords
+				try {
+					StringReader reader = new StringReader(s.toString());
+					KeywordAnalyzer kwa = new KeywordAnalyzer();
+					TokenStream ts = kwa.tokenStream("-", reader);
+					CharTermAttribute term;
+					ts.reset();
+					while (ts.incrementToken()) {
+						term = ts.getAttribute(CharTermAttribute.class);
+						mf.values.add(term.toString());
+					};
+					ts.close();
+					reader.close();
+				}
+				catch (IOException e) {
+					log.error("Unable to split {}={}", iField.name(), s.toString());
+				}
+			}
+
+			// Text
 			else if (iFieldType.indexOptions() != IndexOptions.DOCS) {
 				mf.type = "type:text";
-			};
+				mf.values.add(s.toString());
+			}
 
-			mf.values.add(s.toString());
+			// String
+			else {
+				mf.values.add(s.toString());
+			};
 		}
 		
 		else {
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
index 610df0b..36360c5 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
@@ -17,7 +17,7 @@
     private final static Logger log = LoggerFactory.getLogger(Match.class);
 
     // This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = true;
+    public static final boolean DEBUG = false;
 
     private LinkedList<HighlightCombinatorElement> combine;
     private Stack<Integer> balanceStack = new Stack<>();
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
index 272a9f1..c39825a 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
@@ -34,7 +34,7 @@
 	private final static Logger log = LoggerFactory.getLogger(Match.class);
 
 	// This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = true;
+    public static final boolean DEBUG = false;
 
     // Constructor for highlighting elements
     public HighlightCombinatorElement (byte type, int number) {
diff --git a/src/main/resources/log4j2.xml b/src/main/resources/log4j2.xml
index 20f61ae..fa441c5 100644
--- a/src/main/resources/log4j2.xml
+++ b/src/main/resources/log4j2.xml
@@ -19,6 +19,11 @@
             level="trace">
       <AppenderRef ref="Console"/>
     </Logger>
+      <Logger name="de.ids_mannheim.korap.KrillIndex"
+            additivity="false"
+            level="trace">
+      <AppenderRef ref="Console"/>
+    </Logger>
     -->
   </Loggers>
 </Configuration>
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
index f15af52..3ca878d 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
@@ -242,7 +242,7 @@
 				break;
 
 			case "keyword":
-				assertEquals("type:string", field.at("/type").asText());
+				assertEquals("type:keywords", field.at("/type").asText());
 				assertEquals("koral:field", field.at("/@type").asText());
 				assertEquals("baum", field.at("/value/0").asText());
 				assertEquals("wald", field.at("/value/1").asText());
@@ -301,13 +301,10 @@
 				break;
 
 			case "foundries":
-				// TODO:
-				//   This should better be an array!
-				assertEquals("type:string", field.at("/type").asText());
-				assertEquals("dereko dereko/structure " +
-							 "dereko/structure/base-sentences-paragraphs-pagebreaks "+
-							 "lwc lwc/dependency treetagger treetagger/morpho",
-							 field.at("/value").asText());
+				assertEquals("type:keywords", field.at("/type").asText());
+				assertEquals("dereko", field.at("/value/0").asText());
+				assertEquals("dereko/structure", field.at("/value/1").asText());
+				assertEquals("dereko/structure/base-sentences-paragraphs-pagebreaks", field.at("/value/2").asText());
 				break;
 			};
 		};