Support for arbitrary metadata fields (fixes #47)

Change-Id: I3b02195699633665c3afb995dd6dc7a922e6cd45
diff --git a/Changes b/Changes
index 8fa53fd..8c82163 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.58.4 2019-01-16
+0.58.4 2019-01-17
     - [cleanup] Remove deprecated methods setLicense/getLicense,
       setTokenization/getTokenization, setLayerInfo/getLayerInfo,
       setField/getField (including json serialization)
@@ -7,6 +7,8 @@
       AbstractDocument and FieldDocument (diewald)
     - [cleanup] Remove hardwired (de)serialization of legacy metadata
       fields (diewald)
+    - [feature] Support for arbitrary metadata fields (fixes #47)
+      (diewald)
 
 0.58.3 2018-12-17
     - [feature] Introduced attachements as meta data fields
diff --git a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
index 444d862..8856d98 100644
--- a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
@@ -142,7 +142,6 @@
 
 
     public void populateFields (Document doc) {
-
         HashSet<String> fieldList = new HashSet<>(32);
         Iterator<IndexableField> fieldIterator = doc.getFields().iterator();
         while (fieldIterator.hasNext())
@@ -554,6 +553,10 @@
             return;
 
         KrillDate date = new KrillDate(value);
+
+        if (date == null)
+            return;
+        
         mFields.add(
             new MetaField(
                 key,
@@ -562,4 +565,9 @@
                 )
             );
     };
+
+    @JsonIgnore
+    public void addDate (String key, int value) {
+        this.addDate(key, new Integer(value).toString());
+    };
 };
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 4ca1ae6..de988ed 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -20,6 +20,7 @@
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.DoubleField;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.IndexOptions;
@@ -70,7 +71,7 @@
     private FieldType tvField = new FieldType(TextField.TYPE_STORED);
     private FieldType tvNoField = new FieldType(TextField.TYPE_NOT_STORED);
     private FieldType keywordField = new FieldType(TextField.TYPE_STORED);
-
+    
     {
         tvField.setStoreTermVectors(true);
         tvField.setStoreTermVectorPositions(true);
@@ -101,7 +102,7 @@
             case "type:integer":
                 try {
                     int val = Integer.parseInt(mf.values.get(0));
-                    doc.add(new IntField(mf.key, val, Field.Store.YES));
+                    doc.add(new DoubleField(mf.key, (double) val, Field.Store.YES));
                 }
                 catch (NumberFormatException ne) {
                     continue;
@@ -112,8 +113,7 @@
                 KrillDate date = new KrillDate(mf.values.get(0));
                 if (date != null) {
                     try {
-                        int dateInt = date.toInteger();
-                        doc.add(new IntField(mf.key, dateInt, Field.Store.YES));
+                        doc.add(new IntField(mf.key, date.toInteger(), Field.Store.YES));
                     }
                     catch (NumberFormatException ne) {
                         continue;
@@ -276,6 +276,9 @@
                         };
                         this.addKeywords(key, sb.toString());
                     }
+                    else if (type.equals("type:keywords")) {
+                        this.addKeywords(key, field.get("value").asText());
+                    }
                     else {
                         this.addString(key, field.get("value").asText());
                     };
@@ -291,6 +294,12 @@
                     this.addInt(key, field.get("value").asInt());
                 }
 
+                // Add store field
+                else if (type.equals("type:store")) {
+                    value = field.get("value").asText();
+                    this.addStored(key, value);
+                }
+
                 // Add attachement field
                 else if (type.equals("type:attachement")) {
                     value = field.get("value").asText();
@@ -302,12 +311,6 @@
                 // Add date field
                 else if (type.equals("type:date")) {
                     this.addDate(key, field.get("value").asText());
-                    /*
-                    KrillDate date = new KrillDate(field.get("value").asText());
-                    if (date != null) {
-                        this.addInt(key, date.toString());
-                    };
-                    */
                 }
 
                 // Unknown
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaField.java b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
index e6d6ca2..94052d7 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaField.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
@@ -53,7 +53,7 @@
         ObjectNode json = mapper.createObjectNode();
 		json.put("@type", "koral:field");
 		json.put("type", this.type);
-		json.put("key", this.key);
+        json.put("key", this.key);
 
 		// Value is numerical
 		if (this.type.equals("type:integer")) {
@@ -74,9 +74,10 @@
 			};
 		}
 
+        
 		// Value is textual or keywords
 		else {
-			// Value is a list
+            // Value is a list
 			if (this.values.size() > 1) {
 				ArrayNode list = json.putArray("value");
 
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaFieldsObj.java b/src/main/java/de/ids_mannheim/korap/response/MetaFieldsObj.java
index 049fc3a..ea992b2 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaFieldsObj.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaFieldsObj.java
@@ -25,6 +25,7 @@
 import java.util.regex.*;
 
 import org.apache.lucene.index.*;
+import org.apache.lucene.document.FieldType;
 
 public class MetaFieldsObj implements Iterable<MetaField> {
 
@@ -34,10 +35,7 @@
 	// This advices the java compiler to ignore all loggings
     public static final boolean DEBUG = false;
 
-	// TODO:
-	//   This is a temporary indicator to check
-	//   whether a date field is a date
-	private static final Pattern dateKeyPattern = Pattern.compile(".*Date$");
+    private static final Pattern dateValuePattern = Pattern.compile("^([0-9]{8})$");
 
 	// Mapper for JSON serialization
     ObjectMapper mapper = new ObjectMapper();
@@ -100,9 +98,10 @@
 		if (n != null) {
 
 			// Check if key indicates a date
-			Matcher dateMatcher = dateKeyPattern.matcher(iField.name());
-			if (dateMatcher.matches()) {
+			Matcher dateMatcher = dateValuePattern.matcher(n.toString());
+            if (dateMatcher.matches()) {
                 mf.type = "type:date";
+                
                 KrillDate date = new KrillDate(n.toString());
 				if (date != null) {
 
@@ -114,7 +113,7 @@
 			// Field is a number
 			else {
                 mf.type = "type:integer";
-                mf.values.add(n.toString());
+                mf.values.add(new Integer(n.intValue()).toString());
 			};
 		}
 		
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
index def7b39..7cdfb85 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -1389,7 +1389,7 @@
         fd.addString("ID", "doc-1");
         fd.addString("author", "Frank");
         fd.addKeywords("textClass", "Nachricht Kultur Reisen");
-        fd.addInt("pubDate", 20051210);
+        fd.addDate("pubDate", 20051210);
         fd.addText("text", "Der alte  Mann ging über die Straße");
         fd.addTV("tokens", "a b c", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
 				 + "[(2-3)s:b|i:b|_1$<i>2<i>3]" + "[(4-5)s:c|i:c|_2$<i>4<i>5]");
@@ -1403,7 +1403,7 @@
 		fd.addString("ID", "doc-2");
         fd.addString("author", "Peter");
         fd.addKeywords("textClass", "Kultur Reisen");
-        fd.addInt("pubDate", 20051207);
+        fd.addDate("pubDate", 20051207);
         fd.addText("text", "Der junge Mann hatte keine andere Wahl");
         fd.addTV("tokens", "a c d", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
 				 + "[(2-3)s:c|i:c|_1$<i>2<i>3]" + "[(4-5)s:d|i:d|_2$<i>4<i>5]");
@@ -1417,7 +1417,7 @@
 		fd.addString("ID", "doc-3");
         fd.addString("author", "Sebastian");
         fd.addKeywords("textClass", "Reisen Finanzen");
-        fd.addInt("pubDate", 20051216);
+        fd.addDate("pubDate", 20051216);
         fd.addText("text", "Die Frau und der Mann küssten sich");
         fd.addTV("tokens", "a d e", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
 				 + "[(2-3)s:d|i:d|_1$<i>2<i>3]" + "[(4-5)s:e|i:e|_2$<i>4<i>5]");
@@ -1430,7 +1430,7 @@
 		fd.addString("ID", "doc-5000");
         fd.addString("author", "Sebastian");
         fd.addKeywords("textClass", "Kultur Finanzen");
-        fd.addInt("pubDate", 20180202);
+        fd.addDate("pubDate", 20180202);
         fd.addText("text", "Die Frau und der Mann küssten sich");
         fd.addTV("tokens", "a d e", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>3]"
 				 + "[(2-3)s:d|i:d|_1$<i>2<i>3]" + "[(4-5)s:e|i:e|_2$<i>4<i>5]");
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index dcaa5c3..141e60b 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -26,6 +26,7 @@
 import de.ids_mannheim.korap.response.Match;
 import de.ids_mannheim.korap.response.Result;
 import de.ids_mannheim.korap.util.QueryException;
+import static de.ids_mannheim.korap.response.MetaFieldsObj.*;
 
 import org.apache.lucene.document.Document;
 
@@ -41,12 +42,13 @@
         fd.addString("ID", "WPD-AAA-00001");
         fd.addText("textClass", "music entertainment");
         fd.addText("author", "Peter Frankenfeld");
-        fd.addInt("pubDate", 20130617);
+        fd.addDate("pubDate", 20130617);
+        fd.addInt("justanumber", 12345678);
         fd.addText("title", "Wikipedia");
         fd.addText("subTitle", "Die freie Enzyklopädie");
         fd.addStored("layerInfo", "opennlp/p=pos");
         fd.addString("pubPlace", "Bochum");
-        fd.addInt("lastModified", 20130717);
+        fd.addDate("lastModified", 20130717);
         fd.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
                 + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]");
         fd.addAttachement("Wikilink", "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel");
@@ -55,7 +57,7 @@
         
         assertEquals(doc.getField("title").name(), "title");
         assertEquals(doc.getField("title").stringValue(), "Wikipedia");
-
+       
         assertEquals(doc.getField("corpusID").name(), "corpusID");
         assertEquals(doc.getField("corpusID").stringValue(), "WPD");
 
@@ -90,6 +92,9 @@
         assertEquals(doc.getField("Wikilink").stringValue(),
                      "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel"
             );
+
+        assertEquals(doc.getField("justanumber").numericValue().intValue(), 12345678);
+
     };
 
 
@@ -435,4 +440,187 @@
             };
         };
     };
+
+    @Test
+    public void indexArbitraryMetaData () throws Exception {
+        String json = new String(
+            "{"
+            + "  \"fields\" : ["
+            + "    { "
+            + "      \"primaryData\" : \"abc\""
+            + "    },"
+            + "    {"
+            + "      \"name\" : \"tokens\","
+            + "      \"data\" : ["
+            + "         [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"],"
+            + "         [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ],"
+            + "         [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]"
+            + "      ]"
+            + "    }"
+            + "  ],"
+            + "  \"metaFields\" : ["
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:string\","
+            + "      \"key\" : \"textSigle\","
+            + "      \"value\" : \"aa/bb/cc\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:integer\","
+            + "      \"key\" : \"alter\","
+            + "      \"value\" : 40"
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:string\","
+            + "      \"key\" : \"name\","
+            + "      \"value\" : \"Frank\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:string\","
+            + "      \"key\" : \"name\","
+            + "      \"value\" : \"Julian\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:string\","
+            + "      \"key\" : \"schluesselwoerter\","
+            + "      \"value\" : [\"musik\",\"unterhaltung\"]"
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:keywords\","
+            + "      \"key\" : \"tags\","
+            + "      \"value\" : \"nachrichten feuilleton\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:keywords\","
+            + "      \"key\" : \"tags\","
+            + "      \"value\" : [\"sport\",\"raetsel\"]"
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:text\","
+            + "      \"key\" : \"titel\","
+            + "      \"value\" : \"Der alte Baum\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:attachement\","
+            + "      \"key\" : \"anhang\","
+            + "      \"value\" : \"data:application/x.korap-link,http://spiegel.de/\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:store\","
+            + "      \"key\" : \"referenz\","
+            + "      \"value\" : \"So war das\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:date\","
+            + "      \"key\" : \"datum\","
+            + "      \"value\" : \"2018-04-03\""
+            + "    }"
+            + "  ]"
+            + "}");
+
+        KrillIndex ki = new KrillIndex();
+        FieldDocument fd = ki.addDoc(json);
+
+        ki.commit();
+
+        assertEquals(fd.getPrimaryData(), "abc");
+        assertEquals(fd.doc.getField("alter").stringValue(), "40.0");
+        assertEquals(fd.doc.getField("name").stringValue(), "Frank");
+        assertEquals(fd.doc.getField("schluesselwoerter").stringValue(), "musik unterhaltung");
+        assertEquals(fd.doc.getField("tags").stringValue(), "nachrichten feuilleton sport raetsel");
+        assertEquals(fd.doc.getField("titel").stringValue(), "Der alte Baum");
+        assertEquals(fd.doc.getField("anhang").stringValue(), "data:application/x.korap-link,http://spiegel.de/");
+        assertEquals(fd.doc.getField("referenz").stringValue(), "So war das");
+        assertEquals(fd.doc.getField("datum").stringValue(), "20180403");
+
+        JsonNode res = ki.getFields("aa/bb/cc").toJsonNode();
+
+        Iterator fieldIter = res.at("/document/fields").elements();
+
+        int checkC = 0;
+		while (fieldIter.hasNext()) {
+			JsonNode field = (JsonNode) fieldIter.next();
+
+			String key = field.at("/key").asText();
+
+			switch (key) {
+			case "textSigle":
+				assertEquals("type:string", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("aa/bb/cc", field.at("/value").asText());
+				checkC++;
+				break;
+
+			case "alter":
+				assertEquals("type:integer", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals(40, field.at("/value").asInt());
+				checkC++;
+				break;
+
+			case "name":
+				assertEquals("type:string", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("Frank", field.at("/value").asText());
+				checkC++;
+				break;
+
+			case "schluesselwoerter":
+				assertEquals("type:keywords", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("musik", field.at("/value/0").asText());
+				assertEquals("unterhaltung", field.at("/value/1").asText());
+				checkC++;
+				break;
+
+            case "tags":
+				assertEquals("type:keywords", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("nachrichten", field.at("/value/0").asText());
+				assertEquals("feuilleton", field.at("/value/1").asText());
+				assertEquals("sport", field.at("/value/2").asText());
+				assertEquals("raetsel", field.at("/value/3").asText());
+				checkC++;
+				break;
+
+            case "titel":
+				assertEquals("type:text", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("Der alte Baum", field.at("/value").asText());
+				checkC++;
+				break;
+
+            case "anhang":
+				assertEquals("type:attachement", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("data:application/x.korap-link,http://spiegel.de/", field.at("/value").asText());
+				checkC++;
+				break;
+
+            case "referenz":
+				assertEquals("type:store", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("So war das", field.at("/value").asText());
+				checkC++;
+				break;
+
+            case "datum":
+				assertEquals("type:date", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("2018-04-03", field.at("/value").asText());
+				checkC++;
+				break;
+            };
+        };
+    };
 };
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 7b2df00..a346f79 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -1219,7 +1219,7 @@
         fd.addString("corpusSigle", "c1");
         fd.addString("docSigle", "c1/d1");
         fd.addString("textSigle", "c1/d1/t1");
-        fd.addInt("UID", 1);
+        fd.setUID(1);
         fd.addTV("tokens", "abcabcabac",
                 "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
 				 + "[(1-2)s:b|i:b|_1$<i>1<i>2]"
@@ -1239,7 +1239,7 @@
         fd.addString("corpusSigle", "c1");
         fd.addString("docSigle", "c1/d1");
         fd.addString("textSigle", "c1/d1/t1");
-        fd.addInt("UID", 2);
+        fd.setUID(2);
         fd.addTV("tokens", "abcabcabac",
                 "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]"
 				 + "[(1-2)s:b|i:b|_1$<i>1<i>2]"
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
index edede59..c61a1f2 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
@@ -328,7 +328,7 @@
         fd.addText("title", "Die Wahlverwandtschaften");
         fd.addText("author", "Johann Wolfgang von Goethe");
         fd.addKeywords("textClass", "reisen wissenschaft");
-        fd.addInt("pubDate", 20130617);
+        fd.addDate("pubDate", 20130617);
         fd.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]"
                 + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
         fd.addAttachement("WikiLink", "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel");