Serialize field value lists (fixes #81)

Change-Id: I7901e679f9168668025730ce6b460c52a076f5e3
diff --git a/Changes b/Changes
index 17f1622..48d360c 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,6 @@
 0.60.2 2022-01-03
     - [security] More log4j updates (diewald)
+    - [feature] Support for field value vector method (fixes #81; diewald)
 
 0.60.1 2021-12-17
     - [feature] Added vc loading from classpath (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 819a713..40addf9 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -28,6 +28,7 @@
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
@@ -1875,5 +1876,74 @@
         }
         return fingerprints;
     }
-    
+
+
+    // Return a vector representation of all
+    // different values for a certain field.
+    // This is a simplified "group" API and should in the future be
+    // succeeded by group.
+    public List<String> getFieldVector (String field, KrillCollection collection) {
+        collection.setIndex(this);
+       
+        List fieldValues = new ArrayList<String>();
+        String fieldValue;
+
+        try {
+            final Filter filter = collection.toFilter();
+
+            // Get from filtered index
+            if (filter != null) {
+            
+                // Iterate over all atomic readers and collect occurrences
+                for (LeafReaderContext atomic : this.reader().leaves()) {
+
+                    LeafReader lreader = atomic.reader();
+
+                    DocIdSet docids = filter.getDocIdSet(atomic, null);
+                
+                    DocIdSetIterator docs = (docids == null) ? null : docids.iterator();
+
+                    if (docs == null)
+                        continue;
+                
+                    while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+                        fieldValue = lreader.document(docs.docID()).get(field);
+                        if (fieldValue != null && fieldValue != "")
+                            fieldValues.add(fieldValue);
+                    };
+                    
+                }
+            } else { // Get from unfiltered index
+
+                // Iterate over all atomic readers and collect occurrences
+                for (LeafReaderContext atomic : this.reader().leaves()) {
+
+                    LeafReader lreader = atomic.reader();
+                    Bits live = lreader.getLiveDocs();
+
+                    for (int i=0; i<lreader.maxDoc(); i++) {
+                        if (live != null && !live.get(i))
+                            continue;
+                        
+                        Document doc = lreader.document(i);
+                        fieldValue = doc.get(field);
+                        if (fieldValue != null && fieldValue != "")
+                            fieldValues.add(fieldValue);
+                    };
+                };
+            };
+        }
+
+        // Something went wrong
+        catch (IOException e) {
+            log.warn(e.getLocalizedMessage());
+		}
+
+        // E.g. reference corpus not found
+        catch (QueryException e) {
+            log.warn(e.getLocalizedMessage());
+        };
+
+        return fieldValues;
+    };
 };
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
index 0e44573..2b224c3 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
@@ -20,6 +20,8 @@
 import de.ids_mannheim.korap.index.MultiTermTokenStream;
 import de.ids_mannheim.korap.response.Result;
 import de.ids_mannheim.korap.util.QueryException;
+import de.ids_mannheim.korap.collection.CollectionBuilder;
+import de.ids_mannheim.korap.KrillCollection;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.JsonNode;
@@ -375,4 +377,57 @@
         assertEquals(1, checkC);
         
     }
+
+    @Test
+    public void indexFieldVector () throws IOException {
+        KrillIndex ki = new KrillIndex();
+
+        FieldDocument fd = new FieldDocument();
+        fd.addString("textSigle", "aaaa");
+        ki.addDoc(fd);
+
+        fd = new FieldDocument();
+        fd.addString("textSigle", "bbbb");
+        fd.setUID("05678");
+        ki.addDoc(fd);
+
+        ki.commit();
+
+        CollectionBuilder cb = new CollectionBuilder();
+        KrillCollection kcn = new KrillCollection(ki);
+        
+        List fieldValues = ki.getFieldVector("textSigle", kcn);
+        assertEquals(2, fieldValues.size());
+        assertEquals("aaaa", fieldValues.get(0));
+        assertEquals("bbbb", fieldValues.get(1));
+
+        fieldValues = ki.getFieldVector("UID", kcn);
+        assertEquals(1, fieldValues.size(), 1);
+        assertEquals("5678", fieldValues.get(0));
+        
+        kcn.fromBuilder(cb.term("textSigle","bbbb"));
+        fieldValues = ki.getFieldVector("textSigle", kcn);
+        assertEquals(1, fieldValues.size());
+        assertEquals("bbbb", fieldValues.get(0));
+
+        
+        fd = new FieldDocument();
+        fd.addString("textSigle", "cccc");
+        ki.addDoc(fd);
+
+        ki.commit();
+        
+        kcn.fromBuilder(null);
+        fieldValues = ki.getFieldVector("textSigle", kcn);
+        assertEquals(3, fieldValues.size());
+        assertEquals("aaaa", fieldValues.get(0));
+        assertEquals("bbbb", fieldValues.get(1));
+        assertEquals("cccc", fieldValues.get(2));
+
+        kcn.fromBuilder(cb.orGroup().with(cb.term("textSigle","aaaa")).with(cb.term("textSigle","cccc")));
+        fieldValues = ki.getFieldVector("textSigle", kcn);
+        assertEquals(2, fieldValues.size());
+        assertEquals("aaaa", fieldValues.get(0));
+        assertEquals("cccc", fieldValues.get(1));
+    };
 };