Populate documents with metadata fields based on index types

Change-Id: I813dfabd1b8dc2a51986fc35f4601f211fe0b663
diff --git a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
index 05f8e6a..9b8f395 100644
--- a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
@@ -40,9 +40,9 @@
 // @JsonIgnoreProperties(ignoreUnknown = true)
 public abstract class AbstractDocument extends Response {
     ObjectMapper mapper = new ObjectMapper();
-
+    
     private String primaryData;
-
+    
     private static HashSet<String> legacyStringFields =
         new HashSet<String>(Arrays.asList(
                                 "pubPlace",
@@ -135,62 +135,34 @@
     };
 
 
-    public void populateFields (Document doc, Collection<String> fields) {
-        // Remember - never serialize "tokens"
-
-        // TODO:
-        //   Pupulate based on field types!
-
+    public void populateFields (Document doc, Collection<String> fields) {        
         if (fields.contains("UID"))
             this.setUID(doc.get("UID"));
 
-        String field;
-        Iterator<String> i = legacyTextFields.iterator();
-        while (i.hasNext()) {
-            field = i.next();
-            if (fields.contains(field)) {
-                this.addText(field, doc.get(field));
-            };
-        };
+        Iterator<String> fieldsIter = fields.iterator();
 
-        i = legacyKeywordsFields.iterator();
-        while (i.hasNext()) {
-            field = i.next();
-            if (fields.contains(field)) {
-                this.addKeywords(field, doc.get(field));
-            };
-        };
+        while (fieldsIter.hasNext()) {
+            String name = fieldsIter.next();
 
-        i = legacyStoredFields.iterator();
-        while (i.hasNext()) {
-            field = i.next();
-            if (fields.contains(field)) {
-                this.addStored(field, doc.get(field));
-            };
-        };
+            // Remember - never serialize "tokens"
+            if (name == "tokens" || name == "UID")
+                continue;
 
-        i = legacyStringFields.iterator();
-        while (i.hasNext()) {
-            field = i.next();
-            if (fields.contains(field)) {
-                this.addString(field, doc.get(field));
-            };
-        };
+            IndexableField iField = doc.getField(name);
+            
+            if (iField == null)
+                continue;
 
-        i = legacyDateFields.iterator();
-        while (i.hasNext()) {
-            field = i.next();
-            if (fields.contains(field)) {
-                this.addDate(field, doc.get(field));
-            };
-        };
-        
-        // Legacy
-        if (fields.contains("license"))
-            this.addString("availability", doc.get("license"));
+            
+            MetaField mf = mFields.add(iField);
 
+            // Legacy
+            if (name == "license")
+                this.addString("availability", doc.get("license"));
+
+        };
     };
-
+    
 
     /**
      * Populate document meta information with information coming from
@@ -368,82 +340,24 @@
 
     @JsonAnyGetter
     public Map<String, JsonNode> getLegacyMetaFields () {
-        Iterator mfIterator = mFields.iterator();
+
+        Iterator<MetaField> mfIterator = mFields.iterator();
 
         HashMap<String, JsonNode> map = new HashMap<>();
 
-        String field;
-        Iterator<String> i = legacyDateFields.iterator();
-        while (i.hasNext()) {
-            field = i.next();
-            if (mFields.contains(field)) {
-                KrillDate date = this.getFieldValueAsDate(field);
-                if (date != null) {
-                    String dateStr = date.toDisplay();
-                    if (dateStr.length() != 0) {
-                        map.put(
-                            field,
-                            new TextNode(dateStr)
-                            );
-                    };
-                };
-            };
-        };
-
-        i = legacyStoredFields.iterator();
-        while (i.hasNext()) {
-            field = i.next();
-            if (mFields.contains(field)) {
-                String value = this.getFieldValue(field);
-                if (value != null) {
-                    map.put(
-                        field,
-                        new TextNode(this.getFieldValue(field))
-                        );
-                };
-            };
-        };
-
-        i = legacyTextFields.iterator();
-        while (i.hasNext()) {
-            field = i.next();
-            if (mFields.contains(field)) {
-                String value = this.getFieldValue(field);
-                if (value != null) {
-                    map.put(
-                        field,
-                        new TextNode(value)
-                        );
-                };
-            };
-        };
-
-        i = legacyStringFields.iterator();
-        while (i.hasNext()) {
-            field = i.next();
-            if (mFields.contains(field)) {
-                String value = this.getFieldValue(field);
-                if (value != null) {
-                    map.put(
-                        field,
-                        new TextNode(value)
-                        );
-                };
-            };
-        };
-
-        i = legacyKeywordsFields.iterator();
-        while (i.hasNext()) {
-            field = i.next();
-            if (mFields.contains(field)) {
-                String value = this.getFieldValue(field);
-                if (value != null) {
-                    map.put(
-                        field,
-                        new TextNode(value)
-                        );
-                };
-            };
+        while (mfIterator.hasNext()) {
+            String mfs = mfIterator.next().key;
+            if (legacyDateFields.contains(mfs) ||
+                legacyStoredFields.contains(mfs) ||
+                legacyTextFields.contains(mfs) ||
+                legacyStringFields.contains(mfs) ||
+                legacyKeywordsFields.contains(mfs)
+                ) {
+                map.put(mfs, new TextNode(this.getFieldValue(mfs)));
+            }
+            else if (legacyDateFields.contains(mfs)) {
+                map.put(mfs, new TextNode(this.getFieldValue(mfs)));
+            }
         };
         
         return map;
@@ -513,8 +427,11 @@
     public String getFieldValue (String field) {
         MetaField mf = mFields.get(field);
 
-        if (mf != null) {
-            return mFields.get(field).values.get(0);
+        if (mf != null && mf.values.size() > 0) {
+            return String.join(
+                " ",
+                mf.values
+                );
         };
 
         return null;
@@ -533,8 +450,10 @@
 
     @JsonIgnore
     public void addString (String key, String value) {
+        if (value == null)
+            return;
+        
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:string",
@@ -545,8 +464,10 @@
     
     @JsonIgnore
     public void addStored (String key, String value) {
+        if (value == null)
+            return;
+
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:store",
@@ -554,11 +475,13 @@
                 )
             );
     };
-
+    
     @JsonIgnore
     public void addKeywords (String key, String value) {
+        if (value == null)
+            return;
+
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:keywords",
@@ -569,8 +492,10 @@
 
     @JsonIgnore
     public void addText (String key, String value) {
+        if (value == null)
+            return;
+
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:text",
@@ -581,9 +506,11 @@
 
     @JsonIgnore
     public void addDate (String key, String value) {
+        if (value == null)
+            return;
+
         KrillDate date = new KrillDate(value);
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:date",
@@ -591,5 +518,4 @@
                 )
             );
     };
-
 };
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 656f77b..b6b3292 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -100,12 +100,14 @@
 
     @Override
     public void addDate (String key, String value) {
+        if (value == null)
+            return;
+
         KrillDate date = new KrillDate(value);
 		if (date != null) {
 			this.addInt(key, date.toString());
 		};
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:date",
@@ -116,8 +118,10 @@
 
     @Override
     public void addText (String key, String value) {
+        if (value == null)
+            return;
+
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:text",
@@ -130,8 +134,10 @@
 
     @Override
     public void addKeywords (String key, String value) {
+        if (value == null)
+            return;
+
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:keywords",
@@ -144,8 +150,10 @@
 
     @Override
     public void addString (String key, String value) {
+        if (value == null)
+            return;
+        
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:string",
@@ -156,8 +164,10 @@
     };
 
     public void addAttachement (String key, String value) {
+        if (value == null)
+            return;
+
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:attachement",
@@ -169,8 +179,10 @@
 
     @Override
     public void addStored (String key, String value) {
+        if (value == null)
+            return;
+
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:store",
@@ -183,7 +195,6 @@
 
     public void addStored (String key, int value) {
         mFields.add(
-            key,
             new MetaField(
                 key,
                 "type:store",
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaField.java b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
index 270d0f9..e6d6ca2 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaField.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
@@ -26,14 +26,26 @@
 		this.key = key;
 	};
 
+    public MetaField (String key, String type) {
+        this.key = key;
+        this.type = type;
+	};
 
     public MetaField (String key, String type, String value) {
         this.key = key;
         this.type = type;
         this.values.add(value);
     };
-    
 
+    /**
+     * Add value to meta field.
+     */
+    public MetaField addValue (String value) {
+        this.values.add(value);
+        return this;
+    };
+
+    
 	/**
 	 * Create JsonNode
 	 */
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java b/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java
index 91dab5a..460ae15 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java
@@ -51,8 +51,34 @@
 	/**
 	 * Add field to collection
 	 */
-	public void add (IndexableField iField) {
-					
+	public MetaField add (IndexableField iField) {
+        MetaField mf = metaFieldFromIndexableField(iField);
+
+		// Ignore non-stored fields
+		if (mf == null)
+			return null;
+
+        fieldsMap.put(mf.key, mf);
+        return mf;
+	};
+
+
+	/**
+	 * Add field to collection
+	 */
+    public MetaField add (MetaField mf) {
+		// Ignore non-stored fields
+		if (mf == null)
+			return null;
+
+        fieldsMap.put(mf.key, mf);
+        return mf;
+    };
+
+    
+    // Field type needs to be restored heuristically
+    // - though that's not very elegant
+    public static MetaField metaFieldFromIndexableField (IndexableField iField) {
 		IndexableFieldType iFieldType = iField.fieldType();
 
 		// Field type needs to be restored heuristically
@@ -60,19 +86,9 @@
 
 		// Ignore non-stored fields
 		if (!iFieldType.stored())
-			return;
+			return null;
 
 		MetaField mf = new MetaField(iField.name());
-
-		// Reuse existing metafield
-		if (fieldsMap.containsKey(mf.key)) {
-			mf = fieldsMap.get(mf.key);
-		}
-
-		// Add new field
-		else {
-			fieldsMap.put(mf.key, mf);
-		};
 		
 		// TODO: Check if metaField exists for that field
 
@@ -83,31 +99,29 @@
 		if (n != null) {
 
 			// Check if key indicates a date
-			Matcher dateMatcher = dateKeyPattern.matcher(mf.key);
+			Matcher dateMatcher = dateKeyPattern.matcher(iField.name());
 			if (dateMatcher.matches()) {
-				mf.type = "type:date";
-
-				// Check structure with KrillDate
-				KrillDate date = new KrillDate(n.toString());
+                mf.type = "type:date";
+                KrillDate date = new KrillDate(n.toString());
 				if (date != null) {
 
 					// Serialize withz dash separation
 					mf.values.add(date.toDisplay());
 				};
-			}
+            }
 
 			// Field is a number
 			else {
-				mf.type = "type:number";
-				mf.values.add(n.toString());
+                mf.values.add(n.toString());
 			};
 		}
 		
 		// Field has a textual value
 		else if (s != null) {
 
-			// Stored
+            // Stored
 			if (iFieldType.indexOptions() == IndexOptions.NONE) {
+
                 String value = s.toString();
                 if (value.startsWith("data:")) {
                     mf.type = "type:attachement";
@@ -116,6 +130,7 @@
                     mf.type = "type:store";
                 };
 				mf.values.add(value);
+                return mf;
 			}
 
 			// Keywords
@@ -147,6 +162,12 @@
 				mf.values.add(s.toString());
 			}
 
+            // Special treatment for legacy indices
+            else if (mf.key.equals("UID")) {
+				mf.type = "type:integer";
+				mf.values.add(s.toString());
+            }
+
 			// String
 			else {
 				mf.values.add(s.toString());
@@ -156,17 +177,10 @@
 		else {
 			log.error("Unknown field type {}", iField.name());
 		};
-	};
 
+        mf.values.removeAll(Collections.singleton(null));
 
-    /**
-	 * Add field to collection
-     *
-     * @param key
-     *        The key of the field
-     */
-    public void add (String key, MetaField mf) {
-        fieldsMap.put(key, mf);
+        return mf;
     };
 
 
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
index 59887bd..edede59 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
@@ -172,6 +172,7 @@
         assertEquals(
                 "base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/constituency corenlp/morpho corenlp/namedentities corenlp/sentences glemm glemm/morpho mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/morpho xip/sentences",
                 res.at("/matches/0/foundries").asText());
+
         assertEquals("Goethe-Korpus",
                 res.at("/matches/0/corpusTitle").asText());
         assertEquals("QAO-NC", res.at("/matches/0/availability").asText());