Added meta field retrieval method
Change-Id: Ic8adfd48098001cbf13ede3d468dd989a1413b94
diff --git a/Changes b/Changes
index 7191cff..4e95d44 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.56.2 2018-02-07
+ - [feature] Introduce meta field retrieval method (diewald)
+
0.56.1 2018-01-31
- [bugfix] Changed relation serialization in snippet to work
with segmented target anchors (diewald)
diff --git a/pom.xml b/pom.xml
index 78279d8..cb67ac4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.56.1</version>
+ <version>0.56.2</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 3eb34cb..ad46ec3 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -795,14 +795,15 @@
DocIdSet filterSet = filter.getDocIdSet(atomic,
atomic.reader().getLiveDocs());
- // Create a bitset for the correct document
- Bits bitset = filterSet.bits();
-
DocIdSetIterator filterIterator = filterSet.iterator();
- if (DEBUG)
+ if (DEBUG) {
+ // Create a bitset for the correct document
+ Bits bitset = filterSet.bits();
+
log.trace("Checking document in {} with {}", filterSet,
- bitset);
+ bitset);
+ };
// No document found
if (filterIterator == null)
@@ -1043,15 +1044,15 @@
DocIdSet filterSet = filter.getDocIdSet(atomic,
atomic.reader().getLiveDocs());
-
- // Create a bitset for the correct document
- Bits bitset = filterSet.bits();
-
DocIdSetIterator filterIterator = filterSet.iterator();
- if (DEBUG)
+ if (DEBUG) {
+ // Create a bitset for the correct document
+ Bits bitset = filterSet.bits();
+
log.trace("Checking document in {} with {}", filterSet,
- bitset);
+ bitset);
+ };
// No document found
if (filterIterator == null)
@@ -1545,16 +1546,68 @@
};
- public void getFields () {
- /*
- * Return a map of key, value pairs:
- *
- * keywords => keywords (contains)
- * author => text (contains)
- */
+ // Return field values
+ public MetaFields getFields (String textSigle) {
+ // , HashSet<String> fields) {
+
+ // Create TermQuery for document
+ TermQuery textSigleQuery = new TermQuery(new Term("textSigle", textSigle));
+
+ Filter filter = (Filter) new QueryWrapperFilter(textSigleQuery);
+
+ /*
+ if (fields.contain("@all"))
+ fields = null;
+ */
+
+ MetaFields metaFields = new MetaFields(textSigle);
+
+ try {
+
+ // Iterate over all atomic indices and find the matching document
+ for (LeafReaderContext atomic : this.reader().leaves()) {
+
+ // Retrieve the single document of interest
+ DocIdSet filterSet = filter.getDocIdSet(atomic, atomic.reader().getLiveDocs());
+
+ DocIdSetIterator filterIterator = filterSet.iterator();
+
+ // No document found
+ if (filterIterator == null)
+ continue;
+
+
+ // Go to the matching doc - and remember its ID
+ int localDocID = filterIterator.nextDoc();
+
+ if (localDocID == DocIdSetIterator.NO_MORE_DOCS)
+ continue;
+
+ Document doc = atomic.reader().document(localDocID);
+
+ Iterator<IndexableField> fieldIterator = doc.getFields().iterator();
+ while (fieldIterator.hasNext()) {
+ IndexableField iField = fieldIterator.next();
+
+ // Add field
+ metaFields.add(iField);
+ };
+
+ return metaFields;
+ };
+ }
+ catch (IOException e) {
+ metaFields.addError(600, "Unable to read index", e.getLocalizedMessage());
+ log.warn(e.getLocalizedMessage());
+ };
+
+ metaFields.addError(630, "Document not found");
+
+ return metaFields;
};
+
public void getValues (String field) {
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 8c0fb14..81cb49f 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -33,6 +33,12 @@
to make this less messy and speed things up.
*/
+/*
+ * Currently the Fielddocument is not ready to be used for KQ
+ * serialization of fields - this is currently done in
+ * response/Fields.
+*/
+
/**
* FieldDocument represents a simple API to create documents
* for storing with KrillIndex. <i>Field</i> in the name resembles
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaField.java b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
new file mode 100644
index 0000000..bd065a5
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
@@ -0,0 +1,83 @@
+package de.ids_mannheim.korap.response;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.JsonNode;
+
+import java.util.*;
+
+import org.apache.lucene.index.*;
+
+/**
+ * Class representing a meta field.
+ */
+public class MetaField {
+
+ // Mapper for JSON serialization
+ ObjectMapper mapper = new ObjectMapper();
+
+ public String type = "type:string";
+ public String key;
+ public List<String> values = new ArrayList<>();
+ public Boolean retrieveOnly = false;
+
+ public MetaField (String key) {
+ this.key = key;
+ };
+
+ /**
+ * Create JsonNode
+ */
+ public JsonNode toJsonNode () {
+ ObjectNode json = mapper.createObjectNode();
+ json.put("@type", "koral:field");
+ json.put("type", this.type);
+ json.put("key", this.key);
+
+ if (this.retrieveOnly)
+ json.put("retrieveOnly", true);
+
+ // Value is numerical
+ if (this.type.equals("type:number")) {
+
+ // Value is a list
+ if (this.values.size() > 1) {
+ ArrayNode list = json.putArray("value");
+
+ Iterator vIter = this.values.iterator();
+ while (vIter.hasNext()) {
+ list.add((int) Integer.parseInt((String) vIter.next()));
+ };
+ }
+
+ // Value is a single
+ else {
+ json.put("value", Integer.parseInt(this.values.get(0)));
+ };
+ }
+
+ // Value is textual
+ else {
+ // Value is a list
+ if (this.values.size() > 1) {
+ ArrayNode list = json.putArray("value");
+
+ Iterator vIter = this.values.iterator();
+ while (vIter.hasNext()) {
+ list.add((String) vIter.next());
+ };
+ }
+
+ // Value is a single
+ else {
+ json.put("value", this.values.get(0));
+ };
+ };
+
+ return (JsonNode) json;
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaFields.java b/src/main/java/de/ids_mannheim/korap/response/MetaFields.java
new file mode 100644
index 0000000..5c5a8e5
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaFields.java
@@ -0,0 +1,133 @@
+package de.ids_mannheim.korap.response;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.annotation.*;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import de.ids_mannheim.korap.index.AbstractDocument;
+
+import java.util.*;
+
+import org.apache.lucene.index.*;
+
+@JsonInclude(Include.NON_NULL)
+public class MetaFields extends AbstractDocument {
+
+ // Logger
+ private final static Logger log = LoggerFactory.getLogger(MetaFields.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+ // Mapper for JSON serialization
+ ObjectMapper mapper = new ObjectMapper();
+
+ private Map<String, MetaField> fieldsMap = new HashMap<>();
+
+ public MetaFields (String id) {
+ this.addMessage(0, "Response format is temporary");
+ };
+
+
+ /**
+ * Add field to collection
+ */
+ public void add (IndexableField iField) {
+
+ IndexableFieldType iFieldType = iField.fieldType();
+
+ // Field type needs to be restored heuristically
+ // - though that's not very elegant
+
+ // Ignore non-stored fields
+ if (!iFieldType.stored())
+ return;
+
+ MetaField mf = new MetaField(iField.name());
+
+ // Reuse existing metafield
+ if (fieldsMap.containsKey(mf.key)) {
+ mf = fieldsMap.get(mf.key);
+ }
+
+ // Add new field
+ else {
+ fieldsMap.put(mf.key, mf);
+ };
+
+ // TODO: Check if metaField exists for that field
+
+ Number n = iField.numericValue();
+ String s = iField.stringValue();
+
+ // Field has numeric value (possibly a date)
+ if (n != null) {
+ // System.err.print("-num");
+
+ // TODO:
+ // check if the number is a date!
+ mf.type = "type:number";
+
+ mf.values.add(n.toString());
+ // System.err.println(" = " + n.toString());
+ }
+
+ // Field has a textual value
+ else if (s != null) {
+ if (iFieldType.indexOptions() == IndexOptions.NONE) {
+ // System.err.print("-readonly");
+ mf.retrieveOnly = true;
+ // System.err.print("-string");
+ }
+ else if (iFieldType.indexOptions() != IndexOptions.DOCS) {
+ // System.err.print("-string");
+ /*
+ }
+ else {
+ System.err.print("-text");
+ */
+ mf.type = "type:text";
+ };
+
+ mf.values.add(s.toString());
+ // System.err.println(" = " + s);
+ }
+
+ else {
+ log.error("Unknown field type {}", iField.name());
+ };
+ };
+
+
+ /**
+ * Serialize response as a {@link JsonNode}.
+ *
+ * @return {@link JsonNode} representation of the response
+ */
+ public JsonNode toJsonNode () {
+
+ // Get notifications
+ ObjectNode json = (ObjectNode) super.toJsonNode();
+
+ ObjectNode doc = json.putObject("document");
+ doc.put("@type", "koral:document");
+
+ ArrayNode fields = doc.putArray("fields");
+
+ // Iterate over all fields
+ Iterator fIter = fieldsMap.keySet().iterator();
+ while (fIter.hasNext()) {
+ // System.err.println(fIter.next());
+ MetaField mf = fieldsMap.get(fIter.next());
+ // System.err.println(mf.type);
+ fields.add(mf.toJsonNode());
+ };
+
+ return json;
+ };
+};
diff --git a/src/main/java/de/ids_mannheim/korap/response/Text.java b/src/main/java/de/ids_mannheim/korap/response/Text.java
index 3e6647a..2cf3e6d 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Text.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Text.java
@@ -9,6 +9,7 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import de.ids_mannheim.korap.index.AbstractDocument;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
index 024ea72..d28c0ab 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
@@ -21,6 +21,9 @@
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.util.QueryException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+
@RunWith(JUnit4.class)
public class TestKrillIndex {
@@ -181,4 +184,64 @@
assertEquals("Akron", ki.getDoc("05678").getTitle());
assertEquals(5678, ki.getDoc("05678").getUID());
};
+
+
+ @Test
+ public void indexRetrieveFieldInfo () throws IOException {
+ KrillIndex ki = new KrillIndex();
+
+ FieldDocument fd = new FieldDocument();
+
+ fd.addString("name", "Peter");
+ fd.addString("textSigle", "a/b/c");
+ fd.addInt("zahl1", 56);
+ fd.addStored("ref", "My reference");
+
+ fd.addKeyword("keyword", "baum");
+ fd.addKeyword("keyword", "wald");
+
+ fd.addText("title", "Der Name der Rose");
+
+ ki.addDoc(fd);
+
+ /* Save documents */
+ ki.commit();
+
+ JsonNode res = ki.getFields("a/b/c").toJsonNode();
+
+ // TODO: Check if the sorting is always identical!
+
+ assertEquals("ref", res.at("/document/fields/0/key").asText());
+ assertEquals("type:string", res.at("/document/fields/0/type").asText());
+ assertEquals("koral:field", res.at("/document/fields/0/@type").asText());
+ assertEquals(true, res.at("/document/fields/0/retrieveOnly").asBoolean());
+ assertEquals("My reference", res.at("/document/fields/0/value").asText());
+
+ assertEquals("title", res.at("/document/fields/1/key").asText());
+ assertEquals("type:text", res.at("/document/fields/1/type").asText());
+ assertEquals("koral:field", res.at("/document/fields/1/@type").asText());
+ assertEquals("Der Name der Rose", res.at("/document/fields/1/value").asText());
+
+ assertEquals("textSigle", res.at("/document/fields/2/key").asText());
+ assertEquals("type:string", res.at("/document/fields/2/type").asText());
+ assertEquals("koral:field", res.at("/document/fields/2/@type").asText());
+ assertEquals("a/b/c", res.at("/document/fields/2/value").asText());
+
+ assertEquals("keyword", res.at("/document/fields/3/key").asText());
+ assertEquals("type:string", res.at("/document/fields/3/type").asText());
+ assertEquals("koral:field", res.at("/document/fields/3/@type").asText());
+ assertEquals("baum", res.at("/document/fields/3/value/0").asText());
+ assertEquals("wald", res.at("/document/fields/3/value/1").asText());
+
+ assertEquals("zahl1", res.at("/document/fields/4/key").asText());
+ assertEquals("type:number", res.at("/document/fields/4/type").asText());
+ assertEquals("koral:field", res.at("/document/fields/4/@type").asText());
+ assertEquals(56, res.at("/document/fields/4/value").asInt());
+
+ assertEquals("name", res.at("/document/fields/5/key").asText());
+ assertEquals("type:string", res.at("/document/fields/5/type").asText());
+ assertEquals("koral:field", res.at("/document/fields/5/@type").asText());
+ assertEquals("Peter", res.at("/document/fields/5/value").asText());
+
+ };
};