Added getDoc() method to KrillIndex for field retrieval

Change-Id: I17628024f74081e86f400d9fc52c031fbb0df815
diff --git a/.gitignore b/.gitignore
index 27469c6..b855d70 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@
 /.classpath
 /todo.org
 /wiki.org
+/misc/web-api.md
 *~
 .*
 !.gitignore
diff --git a/Changes b/Changes
index 64087df..3b39a83 100644
--- a/Changes
+++ b/Changes
@@ -8,6 +8,7 @@
         - [documentation] Refer to KoralQuery instead of Koral (diewald)
 	- [cleanup] Removed deprecated method in KrillCollection:
 	  getCount() (diewald)
+	- [feature] Added getDoc() method to KrillIndex (diewald)
 
 0.55.5 2016-05-02
 	- [performance] Changed to a dynamic window for sorting in FocusSpans (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index c603f6d..2706d8c 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -774,6 +774,68 @@
     };
 
 
+    public Text getDoc (String uid) {
+        // This is very similar to getMatchInfo
+
+        Text text = new Text();
+
+        Filter filter = (Filter) new QueryWrapperFilter(new TermQuery(new Term("UID", uid)));
+
+        try {
+
+            // Iterate over all atomic indices and find the matching document
+            for (LeafReaderContext atomic : this.reader().leaves()) {
+
+                // Retrieve the single document of interest
+                DocIdSet filterSet = filter.getDocIdSet(atomic, atomic.reader()
+                                                        .getLiveDocs());
+
+                // Create a bitset for the correct document
+                Bits bitset = filterSet.bits();
+
+                DocIdSetIterator filterIterator = filterSet.iterator();
+
+                if (DEBUG)
+                    log.trace("Checking document in {} with {}", filterSet,
+                            bitset);
+
+                // No document found
+                if (filterIterator == null)
+                    continue;
+
+                // Go to the matching doc - and remember its ID
+                int localDocID = filterIterator.nextDoc();
+
+                if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
+                    continue;
+
+                // We've found the correct document! Hurray!
+                if (DEBUG)
+                    log.trace("We've found a matching document");
+
+                // HashSet<String> fields = (HashSet<String>) new Krill()
+                //    .getMeta().getFields().clone();
+                // fields.add(field);
+
+                // Load the necessary fields of the document
+
+                // TODO: Probably use
+                // document(int docID, StoredFieldVisitor visitor)
+                Document doc = atomic.reader().document(localDocID);
+                text.populateFields(doc);
+
+                return text;
+            };
+        }
+        catch (IOException e) {
+            text.addError(600, "Unable to read index", e.getLocalizedMessage());
+            log.warn(e.getLocalizedMessage());
+        };
+
+        text.addError(830, "Filter was empty");
+
+        return text;
+    };
 
     public String getMatchIDWithContext (String id) {
         /* No includeHighlights */
diff --git a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
index 2d3053b..e7712c8 100644
--- a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
@@ -6,6 +6,9 @@
 import de.ids_mannheim.korap.index.FieldDocument;
 import de.ids_mannheim.korap.response.Response;
 
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexableField;
+
 import com.fasterxml.jackson.annotation.*;
 import com.fasterxml.jackson.annotation.JsonInclude.Include;
 import com.fasterxml.jackson.databind.JsonNode;
@@ -72,6 +75,151 @@
             // Meta information regarding annotations
             tokenSource, layerInfos;
 
+    /**
+     * Populate document meta information with information coming from
+     * the index.
+     * 
+     * @param doc
+     *            Document object.
+     * @param field
+     *            Primary data field.
+     */
+    public void populateDocument (Document doc, String field) {
+        HashSet<String> fieldList = new HashSet<>(32);
+        Iterator<IndexableField> fieldIterator = doc.getFields().iterator();
+        while (fieldIterator.hasNext())
+            fieldList.add(fieldIterator.next().name());
+
+        this.populateDocument(doc, field, fieldList);
+    };
+
+    public void populateFields (Document doc) {
+
+        HashSet<String> fieldList = new HashSet<>(32);
+        Iterator<IndexableField> fieldIterator = doc.getFields().iterator();
+        while (fieldIterator.hasNext())
+            fieldList.add(fieldIterator.next().name());
+
+        this.populateFields(doc, fieldList);
+    };
+
+    public void populateFields (Document doc, Collection<String> fields) {
+
+        // Remember - never serialize "tokens"
+
+        // LEGACY
+        if (fields.contains("corpusID"))
+            this.setCorpusID(doc.get("corpusID"));
+        if (fields.contains("ID"))
+            this.setID(doc.get("ID"));
+        if (fields.contains("tokenization"))
+            this.setTokenization(doc.get("tokenization"));
+        if (fields.contains("layerInfo"))
+            this.setLayerInfo(doc.get("layerInfo"));
+
+        // valid
+        if (fields.contains("UID"))
+            this.setUID(doc.get("UID"));
+        if (fields.contains("author"))
+            this.setAuthor(doc.get("author"));
+        if (fields.contains("textClass"))
+            this.setTextClass(doc.get("textClass"));
+        if (fields.contains("title"))
+            this.setTitle(doc.get("title"));
+        if (fields.contains("subTitle"))
+            this.setSubTitle(doc.get("subTitle"));
+        if (fields.contains("pubDate"))
+            this.setPubDate(doc.get("pubDate"));
+        if (fields.contains("pubPlace"))
+            this.setPubPlace(doc.get("pubPlace"));
+
+        // Temporary (later meta fields in term vector)
+        if (fields.contains("foundries"))
+            this.setFoundries(doc.get("foundries"));
+
+        // New fields
+        if (fields.contains("textSigle"))
+            this.setTextSigle(doc.get("textSigle"));
+        if (fields.contains("docSigle"))
+            this.setDocSigle(doc.get("docSigle"));
+        if (fields.contains("corpusSigle"))
+            this.setCorpusSigle(doc.get("corpusSigle"));
+        if (fields.contains("layerInfos"))
+            this.setLayerInfos(doc.get("layerInfos"));
+        if (fields.contains("tokenSource"))
+            this.setTokenSource(doc.get("tokenSource"));
+        if (fields.contains("editor"))
+            this.setEditor(doc.get("editor"));
+
+        if (fields.contains("corpusAuthor"))
+            this.setCorpusAuthor(doc.get("corpusAuthor"));
+        if (fields.contains("corpusEditor"))
+            this.setCorpusEditor(doc.get("corpusEditor"));
+        if (fields.contains("corpusTitle"))
+            this.setCorpusTitle(doc.get("corpusTitle"));
+        if (fields.contains("corpusSubTitle"))
+            this.setCorpusSubTitle(doc.get("corpusSubTitle"));
+
+        if (fields.contains("docAuthor"))
+            this.setDocAuthor(doc.get("docAuthor"));
+        if (fields.contains("docEditor"))
+            this.setDocEditor(doc.get("docEditor"));
+        if (fields.contains("docTitle"))
+            this.setDocTitle(doc.get("docTitle"));
+        if (fields.contains("docSubTitle"))
+            this.setDocSubTitle(doc.get("docSubTitle"));
+
+        if (fields.contains("publisher"))
+            this.setPublisher(doc.get("publisher"));
+        if (fields.contains("reference"))
+            this.setReference(doc.get("reference"));
+        if (fields.contains("creationDate"))
+            this.setCreationDate(doc.get("creationDate"));
+        if (fields.contains("keywords"))
+            this.setKeywords(doc.get("keywords"));
+        if (fields.contains("textClass"))
+            this.setTextClass(doc.get("textClass"));
+        if (fields.contains("textColumn"))
+            this.setTextColumn(doc.get("textColumn"));
+        if (fields.contains("textDomain"))
+            this.setTextDomain(doc.get("textDomain"));
+        if (fields.contains("textType"))
+            this.setTextType(doc.get("textType"));
+        if (fields.contains("textTypeArt"))
+            this.setTextTypeArt(doc.get("textTypeArt"));
+        if (fields.contains("textTypeRef"))
+            this.setTextTypeRef(doc.get("textTypeRef"));
+        if (fields.contains("language"))
+            this.setLanguage(doc.get("language"));
+        if (fields.contains("license"))
+            this.setLicense(doc.get("license"));
+        if (fields.contains("pages"))
+            this.setPages(doc.get("pages"));
+
+        if (fields.contains("biblEditionStatement"))
+            this.setBiblEditionStatement(doc.get("biblEditionStatement"));
+        if (fields.contains("fileEditionStatement"))
+            this.setFileEditionStatement(doc.get("fileEditionStatement"));
+    };
+
+    /**
+     * Populate document meta information with information coming from
+     * the index.
+     * 
+     * @param doc
+     *            Document object.
+     * @param field
+     *            Primary data field.
+     * @param fields
+     *            Hash object with all supported fields.
+     */
+    public void populateDocument (Document doc, String field,
+            Collection<String> fields) {
+        this.setField(field);
+        this.setPrimaryData(doc.get(field));
+        this.populateFields(doc, fields);
+    };
+
 
     /**
      * Get the publication date of the document
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 47806af..a3aeee7 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -4,14 +4,12 @@
 import java.nio.ByteBuffer;
 import java.util.*;
 
-import org.apache.lucene.document.Document;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
 import org.apache.lucene.search.spans.Spans;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.index.IndexableField;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -46,7 +44,7 @@
 
 /**
  * Representation of Matches in a Result.
- * <strong>Warning:</strong> This is currently highliy dependent
+ * <strong>Warning:</strong> This is currently highly dependent
  * on DeReKo data and will change in the future.
  * 
  * @author Nils Diewald
@@ -392,139 +390,6 @@
 
 
     /**
-     * Populate document meta information with information coming from
-     * the index.
-     * 
-     * @param doc
-     *            Document object.
-     * @param field
-     *            Primary data field.
-     */
-    public void populateDocument (Document doc, String field) {
-        HashSet<String> fieldList = new HashSet<>(32);
-        Iterator<IndexableField> fieldIterator = doc.getFields().iterator();
-        while (fieldIterator.hasNext())
-            fieldList.add(fieldIterator.next().name());
-
-        this.populateDocument(doc, field, fieldList);
-    };
-
-
-    /**
-     * Populate document meta information with information coming from
-     * the index.
-     * 
-     * @param doc
-     *            Document object.
-     * @param field
-     *            Primary data field.
-     * @param fields
-     *            Hash object with all supported fields.
-     */
-    public void populateDocument (Document doc, String field,
-            Collection<String> fields) {
-        this.setField(field);
-        this.setPrimaryData(doc.get(field));
-
-        // Remember - never serialize "tokens"
-
-        // LEGACY
-        if (fields.contains("corpusID"))
-            this.setCorpusID(doc.get("corpusID"));
-        if (fields.contains("ID"))
-            this.setDocID(doc.get("ID"));
-        if (fields.contains("tokenization"))
-            this.setTokenization(doc.get("tokenization"));
-        if (fields.contains("layerInfo"))
-            this.setLayerInfo(doc.get("layerInfo"));
-
-        // valid
-        if (fields.contains("UID"))
-            this.setUID(doc.get("UID"));
-        if (fields.contains("author"))
-            this.setAuthor(doc.get("author"));
-        if (fields.contains("textClass"))
-            this.setTextClass(doc.get("textClass"));
-        if (fields.contains("title"))
-            this.setTitle(doc.get("title"));
-        if (fields.contains("subTitle"))
-            this.setSubTitle(doc.get("subTitle"));
-        if (fields.contains("pubDate"))
-            this.setPubDate(doc.get("pubDate"));
-        if (fields.contains("pubPlace"))
-            this.setPubPlace(doc.get("pubPlace"));
-
-        // Temporary (later meta fields in term vector)
-        if (fields.contains("foundries"))
-            this.setFoundries(doc.get("foundries"));
-
-        // New fields
-        if (fields.contains("textSigle"))
-            this.setTextSigle(doc.get("textSigle"));
-        if (fields.contains("docSigle"))
-            this.setDocSigle(doc.get("docSigle"));
-        if (fields.contains("corpusSigle"))
-            this.setCorpusSigle(doc.get("corpusSigle"));
-        if (fields.contains("layerInfos"))
-            this.setLayerInfos(doc.get("layerInfos"));
-        if (fields.contains("tokenSource"))
-            this.setTokenSource(doc.get("tokenSource"));
-        if (fields.contains("editor"))
-            this.setEditor(doc.get("editor"));
-
-        if (fields.contains("corpusAuthor"))
-            this.setCorpusAuthor(doc.get("corpusAuthor"));
-        if (fields.contains("corpusEditor"))
-            this.setCorpusEditor(doc.get("corpusEditor"));
-        if (fields.contains("corpusTitle"))
-            this.setCorpusTitle(doc.get("corpusTitle"));
-        if (fields.contains("corpusSubTitle"))
-            this.setCorpusSubTitle(doc.get("corpusSubTitle"));
-
-        if (fields.contains("docAuthor"))
-            this.setDocAuthor(doc.get("docAuthor"));
-        if (fields.contains("docEditor"))
-            this.setDocEditor(doc.get("docEditor"));
-        if (fields.contains("docTitle"))
-            this.setDocTitle(doc.get("docTitle"));
-        if (fields.contains("docSubTitle"))
-            this.setDocSubTitle(doc.get("docSubTitle"));
-
-        if (fields.contains("publisher"))
-            this.setPublisher(doc.get("publisher"));
-        if (fields.contains("reference"))
-            this.setReference(doc.get("reference"));
-        if (fields.contains("creationDate"))
-            this.setCreationDate(doc.get("creationDate"));
-        if (fields.contains("keywords"))
-            this.setKeywords(doc.get("keywords"));
-        if (fields.contains("textClass"))
-            this.setTextClass(doc.get("textClass"));
-        if (fields.contains("textColumn"))
-            this.setTextColumn(doc.get("textColumn"));
-        if (fields.contains("textDomain"))
-            this.setTextDomain(doc.get("textDomain"));
-        if (fields.contains("textType"))
-            this.setTextType(doc.get("textType"));
-        if (fields.contains("textTypeArt"))
-            this.setTextTypeArt(doc.get("textTypeArt"));
-        if (fields.contains("textTypeRef"))
-            this.setTextTypeRef(doc.get("textTypeRef"));
-        if (fields.contains("language"))
-            this.setLanguage(doc.get("language"));
-        if (fields.contains("license"))
-            this.setLicense(doc.get("license"));
-        if (fields.contains("pages"))
-            this.setPages(doc.get("pages"));
-
-        if (fields.contains("biblEditionStatement"))
-            this.setBiblEditionStatement(doc.get("biblEditionStatement"));
-        if (fields.contains("fileEditionStatement"))
-            this.setFileEditionStatement(doc.get("fileEditionStatement"));
-    };
-
-
-    /**
      * Get document id.
      */
     @JsonProperty("docID")
diff --git a/src/main/java/de/ids_mannheim/korap/response/Text.java b/src/main/java/de/ids_mannheim/korap/response/Text.java
new file mode 100644
index 0000000..437f24c
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/response/Text.java
@@ -0,0 +1,53 @@
+package de.ids_mannheim.korap.response;
+
+import java.util.*;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+
+import de.ids_mannheim.korap.index.AbstractDocument;
+
+/**
+ * Representation of Texts in a Result.
+ * <strong>Warning:</strong> This is currently highly dependent
+ * on DeReKo data and will change in the future.
+ * 
+ * @author Nils Diewald
+ * @see Result
+ */
+@JsonInclude(Include.NON_NULL)
+public class Text extends AbstractDocument {
+
+    // Logger
+    private final static Logger log = LoggerFactory.getLogger(Text.class);
+
+    // This advices the java compiler to ignore all loggings
+    public static final boolean DEBUG = false;
+
+    // Mapper for JSON serialization
+    ObjectMapper mapper = new ObjectMapper();
+
+    public Text () {};
+
+    public String toJsonString () {
+        JsonNode json = (JsonNode) this.toJsonNode();
+
+        // Match was no match
+        if (json.size() == 0)
+            return "{}";
+        try {
+            return mapper.writeValueAsString(json);
+        }
+        catch (Exception e) {
+            log.warn(e.getLocalizedMessage());
+        };
+
+        return "{}";
+    };
+};
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 7e03f21..b46d82c 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-# log4j.rootLogger = ERROR, stdout
+log4j.rootLogger = ERROR, stdout
 
 # Queries:
 # log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -17,6 +17,8 @@
 # log4j.logger.de.ids_mannheim.korap.query.spans.FocusSpans = TRACE, stdout
 # log4j.logger.de.ids_mannheim.korap.query.spans.MatchSpans = TRACE, stdout
 
+org.glassfish.grizzly.http.server.NetworkListener = TRACE, stdout
+
 # Wrappers:
 # log4j.logger.de.ids_mannheim.korap.KrillQuery = TRACE, stdout
 # log4j.logger.de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
index 653549a..15079c2 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
@@ -106,4 +106,20 @@
 
         // hasDeletions, hasPendingMerges
     };
+
+    @Test
+    public void indexFieldInfo () throws IOException {
+        KrillIndex ki = new KrillIndex();
+
+        FieldDocument fd = new FieldDocument();
+        fd.setTitle("Peter");
+        fd.setUID(22);
+        ki.addDoc(fd);
+        ki.commit();
+
+        assertEquals(1, ki.numberOf("base", "documents"));
+
+        assertEquals("Peter", ki.getDoc("22").getTitle());
+        assertEquals(22, ki.getDoc("22").getUID());
+    };
 };
diff --git a/src/test/java/de/ids_mannheim/korap/server/TestResource.java b/src/test/java/de/ids_mannheim/korap/server/TestResource.java
index 2f1c128..3181245 100644
--- a/src/test/java/de/ids_mannheim/korap/server/TestResource.java
+++ b/src/test/java/de/ids_mannheim/korap/server/TestResource.java
@@ -76,15 +76,9 @@
         Node.closeDBPool();
         t4 = System.nanoTime();
 
-        double startup = (double) (t2 - t1) / 1000000000.0;
-        double action = (double) (t3 - t2) / 1000000000.0;
+        double startup  = (double) (t2 - t1) / 1000000000.0;
+        double action   = (double) (t3 - t2) / 1000000000.0;
         double shutdown = (double) (t4 - t3) / 1000000000.0;
-
-        /*
-        System.err.println("Startup:  " + startup + ", " +
-                           "Action:   " + action  + ", " +
-                           "Shutdown: " + shutdown);
-        */
     };
 
 
@@ -129,6 +123,7 @@
 
                 res = mapper.readTree(resp);
                 assertEquals("milena", res.at("/meta/node").asText());
+                assertEquals(681, res.at("/messages/0/0").asInt());
             }
             catch (Exception e) {
                 fail("Server response failed " + e.getMessage()
@@ -150,6 +145,7 @@
             // Check mirroring
             assertEquals(2439, res.at("/text/UID").asInt());
             assertEquals("milena", res.at("/meta/node").asText());
+            assertEquals(681, res.at("/messages/0/0").asInt());
         }
         catch (Exception e) {
             fail("Server response failed " + e.getMessage() + " (Known issue)");
@@ -160,9 +156,18 @@
                 .post(Entity.text(""), String.class);
         res = mapper.readTree(resp);
         assertEquals("milena", res.at("/meta/node").asText());
+
+        // Staged data committed
         assertEquals(683, res.at("/messages/0/0").asInt());
     };
 
+    /*
+    @Test
+    public void testRemoving () throws IOException {
+                resp = target.path("/index/" + i).request("application/json")
+                        .put(jsonE, String.class);
+    };
+    */
 
     @Test
     public void testCollection () throws IOException {