Preliminary support of arbitrary meta data fields

Change-Id: I81b6a1fca2fe415af51c8693fb7d6a4edccab22c
diff --git a/Changes b/Changes
index 910ce3d..42755eb 100644
--- a/Changes
+++ b/Changes
@@ -1,11 +1,15 @@
 0.58.3 2018-12-12
-    - [feature] Introduced attachements as meta data fields (diewald).
+    - [feature] Introduced attachements as meta data fields
+      (fixes #49) (diewald).
+    - [feature] Introduced preliminary support of arbitrary
+      metadata fields (see #47) (diewald)
 
 0.58.2 2018-12-05
     - [bugfix] Fixed the candidate list in NextSpans, see de.ids_mannheim.
       korap.index.TestNextIndex.testNextExpansionBug() (margaretha)  
     - [bugfix] Fixed left expansion match order (margaretha)
-    - [bugfix] Fixed right expansion match order & expansion over start (margaretha)
+    - [bugfix] Fixed right expansion match order & expansion over start
+      (margaretha)
     - [feature] Added opt() method to QueryBuilder (diewald)
     - [bugfix] Improved FocusSpans sorting (fixes #7) (margaretha)
     - [bugfix] Adopt sorting for FocusSpans in SpanQueryWrappers (diewald)
diff --git a/pom.xml b/pom.xml
index 81d2921..3cb6196 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
 
 	<groupId>de.ids_mannheim.korap</groupId>
 	<artifactId>Krill</artifactId>
-	<version>0.58.2</version>
+	<version>0.58.3</version>
 	<packaging>jar</packaging>
 
 	<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index a244282..0d0f64f 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -6,6 +6,9 @@
 import de.ids_mannheim.korap.util.KrillDate;
 import de.ids_mannheim.korap.util.CorpusDataException;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.fasterxml.jackson.annotation.*;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.JsonNode;
@@ -53,7 +56,14 @@
 @JsonIgnoreProperties(ignoreUnknown = true)
 public class FieldDocument extends AbstractDocument {
     ObjectMapper mapper = new ObjectMapper();
-	
+
+
+    // Logger
+	private final static Logger log = LoggerFactory.getLogger(FieldDocument.class);
+
+	// This advices the java compiler to ignore all loggings
+    public static final boolean DEBUG = false;
+    
     @JsonIgnore
     public Document doc = new Document();
     private FieldType tvField = new FieldType(TextField.TYPE_STORED);
@@ -209,6 +219,75 @@
             this.setTokenSource((String) node.get("tokenSource"));
     };
 
+    
+    /**
+     * Deserialize koral:field types for meta data
+     */
+    public void setMetaFields (ArrayList<Map<String, JsonNode>> fields) {
+        String type, key, value;
+        StringBuffer sb = new StringBuffer();
+        Iterator<JsonNode> i;
+
+        for (Map<String, JsonNode> field : fields) {
+            if (field.get("@type").asText().equals("koral:field")) {
+                type = (String) field.get("type").asText();
+                key = (String) field.get("key").asText();
+                
+                // Add string field
+                if (type.equals("type:string") || type.equals("type:keywords")) {
+
+                    // Field is an array
+                    if (field.get("value").isArray()) {
+                        i = field.get("value").elements();
+                        
+                        sb.setLength(0);
+                        while (i.hasNext()) {
+                            sb.append(i.next().asText()).append(" ");
+                        };
+                        if (sb.length() > 1) {
+                            sb.setLength(sb.length() - 1);
+                        };
+                        this.addKeyword(key, sb.toString());
+                    }
+                    else {
+                        this.addString(key, field.get("value").asText());
+                    };
+                }
+
+                // Add text field
+                else if (type.equals("type:text")) {
+                    this.addText(key, field.get("value").asText());
+                }
+
+                // Add integer field
+                else if (type.equals("type:integer")) {
+                    this.addInt(key, field.get("value").asInt());
+                }
+
+                // Add attachement field
+                else if (type.equals("type:attachement")) {
+                    value = field.get("value").asText();
+                    if (value.startsWith("data:")) {
+                        this.addAttachement(key, value);
+                    };
+                }
+
+                // Add date field
+                else if (type.equals("type:date")) {
+                    KrillDate date = new KrillDate(field.get("value").asText());
+                    if (date != null) {
+                        this.addInt(key, date.toString());
+                    };
+                }
+
+                // Unknown
+                else {
+                    log.error("Unknown field type {}", type);
+                };
+            };
+        }
+    };    
+    
 
     /**
      * Deserialize token stream data (LEGACY).
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaField.java b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
index e997313..bcedd95 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaField.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
@@ -1,8 +1,5 @@
 package de.ids_mannheim.korap.response;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.fasterxml.jackson.databind.node.ArrayNode;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index 513185e..02c9b8e 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -3,6 +3,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
 
+import java.util.*;
 import java.io.BufferedReader;
 import java.io.FileReader;
 import java.io.IOException;
@@ -13,6 +14,9 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import com.fasterxml.jackson.databind.JsonNode;
+
+import static de.ids_mannheim.korap.TestSimple.*;
 import de.ids_mannheim.korap.Krill;
 import de.ids_mannheim.korap.KrillIndex;
 import de.ids_mannheim.korap.KrillMeta;
@@ -197,7 +201,7 @@
                 "UTF-8");
 
         // {1:der} \w0:5 nicht
-        SpanQueryWrapper sqwi = jsonQuery(jsonPath);
+        SpanQueryWrapper sqwi = getJsonQuery(jsonPath);
 
         Result kr = ki.search(sqwi.toQuery(), 0, (short) 5, true, (short) 2,
                 false, (short) 5);
@@ -242,36 +246,188 @@
         assertEquals(fd.getPubPlace(), "Bochum");
         assertEquals(fd.getPubDate().toDisplay(), "");
 	};
-	
-    public static String getString (String path) {
-        StringBuilder contentBuilder = new StringBuilder();
-        try {
-			BufferedReader in = new BufferedReader(new FileReader(path));
 
-            String str;
-            while ((str = in.readLine()) != null) {
-                contentBuilder.append(str);
+    @Test
+    public void indexNewMetaData () throws Exception {
+
+        String json = new String(
+            "{"
+            + "  \"fields\" : ["
+            + "    { "
+            + "      \"primaryData\" : \"abc\""
+            + "    },"
+            + "    {"
+            + "      \"name\" : \"tokens\","
+            + "      \"data\" : ["
+            + "         [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"],"
+            + "         [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ],"
+            + "         [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]"
+            + "      ]"
+            + "    }"
+            + "  ],"
+            + "  \"metaFields\" : ["
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:string\","
+            + "      \"key\" : \"corpusID\","
+            + "      \"value\" : \"WPD\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:string\","
+            + "      \"key\" : \"textSigle\","
+            + "      \"value\" : \"x/y/z\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:string\","
+            + "      \"key\" : \"ID\","
+            + "      \"value\" : \"WPD-AAA-00001\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:string\","
+            + "      \"key\" : \"textClass\","
+            + "      \"value\" : [\"music\",\"entertainment\"]"
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:text\","
+            + "      \"key\" : \"author\","
+            + "      \"value\" : \"Peter Frankenfeld\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:date\","
+            + "      \"key\" : \"pubDate\","
+            + "      \"value\" : \"2015-05-01\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:text\","
+            + "      \"key\" : \"title\","
+            + "      \"value\" : \"Wikipedia\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:text\","
+            + "      \"key\" : \"subTitle\","
+            + "      \"value\" : \"Die freie Enzyklopädie\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:string\","
+            + "      \"key\" : \"pubPlace\","
+            + "      \"value\" : \"Bochum\""
+            + "    },"
+            + "    {"
+            + "      \"@type\" : \"koral:field\","
+            + "      \"type\" : \"type:attachement\","
+            + "      \"key\" : \"link\","
+            + "      \"value\" : \"data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel\""
+            + "    }"
+            + "  ]"
+            + "}");
+
+        KrillIndex ki = new KrillIndex();
+        FieldDocument fd = ki.addDoc(json);
+
+        ki.commit();
+
+        assertEquals(fd.getPrimaryData(), "abc");
+        assertEquals(fd.doc.getField("corpusID").stringValue(), "WPD");
+        assertEquals(fd.doc.getField("textSigle").stringValue(), "x/y/z");
+        assertEquals(fd.doc.getField("ID").stringValue(), "WPD-AAA-00001");
+        assertEquals(fd.doc.getField("textClass").stringValue(), "music entertainment");
+        assertEquals(fd.doc.getField("author").stringValue(), "Peter Frankenfeld");
+        assertEquals(fd.doc.getField("title").stringValue(), "Wikipedia");
+        assertEquals(fd.doc.getField("subTitle").stringValue(), "Die freie Enzyklopädie");
+        assertEquals(fd.doc.getField("pubPlace").stringValue(), "Bochum");
+        assertEquals(fd.doc.getField("pubDate").stringValue(), "20150501");
+        assertEquals(fd.doc.getField("link").stringValue(), "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel");
+
+        JsonNode res = ki.getFields("x/y/z").toJsonNode();
+
+        Iterator fieldIter = res.at("/document/fields").elements();
+
+        		int checkC = 0;
+		while (fieldIter.hasNext()) {
+			JsonNode field = (JsonNode) fieldIter.next();
+
+			String key = field.at("/key").asText();
+
+			switch (key) {
+			case "corpusID":
+				assertEquals("type:string", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("WPD", field.at("/value").asText());
+				checkC++;
+				break;
+
+			case "textSigle":
+				assertEquals("type:string", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("x/y/z", field.at("/value").asText());
+				checkC++;
+				break;
+
+			case "ID":
+				assertEquals("type:string", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("WPD-AAA-00001", field.at("/value").asText());
+				checkC++;
+				break;
+
+			case "textClass":
+				assertEquals("type:keywords", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("music", field.at("/value/0").asText());
+				assertEquals("entertainment", field.at("/value/1").asText());
+				checkC++;
+				break;
+
+            case "author":
+				assertEquals("type:text", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("Peter Frankenfeld", field.at("/value").asText());
+				checkC++;
+				break;
+
+            case "title":
+				assertEquals("type:text", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("Wikipedia", field.at("/value").asText());
+				checkC++;
+				break;
+
+            case "subTitle":
+				assertEquals("type:text", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("Die freie Enzyklopädie", field.at("/value").asText());
+				checkC++;
+				break;
+
+            case "pubPlace":
+				assertEquals("type:string", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("Bochum", field.at("/value").asText());
+				checkC++;
+				break;
+
+            case "pubDate":
+				assertEquals("type:date", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("2015-05-01", field.at("/value").asText());
+				checkC++;
+				break;
+
+            case "link":
+				assertEquals("type:attachement", field.at("/type").asText());
+				assertEquals("koral:field", field.at("/@type").asText());
+				assertEquals("data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel", field.at("/value").asText());
+				checkC++;
+				break;
             };
-            in.close();
-        }
-        catch (IOException e) {
-            fail(e.getMessage());
-        }
-        return contentBuilder.toString();
-    };
-
-
-    public static SpanQueryWrapper jsonQuery (String jsonFile) {
-        SpanQueryWrapper sqwi;
-
-        try {
-            String json = getString(jsonFile);
-            sqwi = new KrillQuery("tokens").fromKoral(json);
-        }
-        catch (QueryException e) {
-            fail(e.getMessage());
-            sqwi = new QueryBuilder("tokens").seg("???");
         };
-        return sqwi;
     };
 };