Preliminary support of arbitrary meta data fields
Change-Id: I81b6a1fca2fe415af51c8693fb7d6a4edccab22c
diff --git a/Changes b/Changes
index 910ce3d..42755eb 100644
--- a/Changes
+++ b/Changes
@@ -1,11 +1,15 @@
0.58.3 2018-12-12
- - [feature] Introduced attachements as meta data fields (diewald).
+ - [feature] Introduced attachements as meta data fields
+ (fixes #49) (diewald).
+ - [feature] Introduced preliminary support of arbitrary
+ metadata fields (see #47) (diewald)
0.58.2 2018-12-05
- [bugfix] Fixed the candidate list in NextSpans, see de.ids_mannheim.
korap.index.TestNextIndex.testNextExpansionBug() (margaretha)
- [bugfix] Fixed left expansion match order (margaretha)
- - [bugfix] Fixed right expansion match order & expansion over start (margaretha)
+ - [bugfix] Fixed right expansion match order & expansion over start
+ (margaretha)
- [feature] Added opt() method to QueryBuilder (diewald)
- [bugfix] Improved FocusSpans sorting (fixes #7) (margaretha)
- [bugfix] Adopt sorting for FocusSpans in SpanQueryWrappers (diewald)
diff --git a/pom.xml b/pom.xml
index 81d2921..3cb6196 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.58.2</version>
+ <version>0.58.3</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index a244282..0d0f64f 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -6,6 +6,9 @@
import de.ids_mannheim.korap.util.KrillDate;
import de.ids_mannheim.korap.util.CorpusDataException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import com.fasterxml.jackson.annotation.*;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
@@ -53,7 +56,14 @@
@JsonIgnoreProperties(ignoreUnknown = true)
public class FieldDocument extends AbstractDocument {
ObjectMapper mapper = new ObjectMapper();
-
+
+
+ // Logger
+ private final static Logger log = LoggerFactory.getLogger(FieldDocument.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
@JsonIgnore
public Document doc = new Document();
private FieldType tvField = new FieldType(TextField.TYPE_STORED);
@@ -209,6 +219,75 @@
this.setTokenSource((String) node.get("tokenSource"));
};
+
+ /**
+ * Deserialize koral:field types for meta data
+ */
+ public void setMetaFields (ArrayList<Map<String, JsonNode>> fields) {
+ String type, key, value;
+ StringBuffer sb = new StringBuffer();
+ Iterator<JsonNode> i;
+
+ for (Map<String, JsonNode> field : fields) {
+ if (field.get("@type").asText().equals("koral:field")) {
+ type = (String) field.get("type").asText();
+ key = (String) field.get("key").asText();
+
+ // Add string field
+ if (type.equals("type:string") || type.equals("type:keywords")) {
+
+ // Field is an array
+ if (field.get("value").isArray()) {
+ i = field.get("value").elements();
+
+ sb.setLength(0);
+ while (i.hasNext()) {
+ sb.append(i.next().asText()).append(" ");
+ };
+ if (sb.length() > 1) {
+ sb.setLength(sb.length() - 1);
+ };
+ this.addKeyword(key, sb.toString());
+ }
+ else {
+ this.addString(key, field.get("value").asText());
+ };
+ }
+
+ // Add text field
+ else if (type.equals("type:text")) {
+ this.addText(key, field.get("value").asText());
+ }
+
+ // Add integer field
+ else if (type.equals("type:integer")) {
+ this.addInt(key, field.get("value").asInt());
+ }
+
+ // Add attachement field
+ else if (type.equals("type:attachement")) {
+ value = field.get("value").asText();
+ if (value.startsWith("data:")) {
+ this.addAttachement(key, value);
+ };
+ }
+
+ // Add date field
+ else if (type.equals("type:date")) {
+ KrillDate date = new KrillDate(field.get("value").asText());
+ if (date != null) {
+ this.addInt(key, date.toString());
+ };
+ }
+
+ // Unknown
+ else {
+ log.error("Unknown field type {}", type);
+ };
+ };
+ }
+ };
+
/**
* Deserialize token stream data (LEGACY).
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaField.java b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
index e997313..bcedd95 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaField.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
@@ -1,8 +1,5 @@
package de.ids_mannheim.korap.response;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index 513185e..02c9b8e 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -3,6 +3,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
+import java.util.*;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
@@ -13,6 +14,9 @@
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
+import com.fasterxml.jackson.databind.JsonNode;
+
+import static de.ids_mannheim.korap.TestSimple.*;
import de.ids_mannheim.korap.Krill;
import de.ids_mannheim.korap.KrillIndex;
import de.ids_mannheim.korap.KrillMeta;
@@ -197,7 +201,7 @@
"UTF-8");
// {1:der} \w0:5 nicht
- SpanQueryWrapper sqwi = jsonQuery(jsonPath);
+ SpanQueryWrapper sqwi = getJsonQuery(jsonPath);
Result kr = ki.search(sqwi.toQuery(), 0, (short) 5, true, (short) 2,
false, (short) 5);
@@ -242,36 +246,188 @@
assertEquals(fd.getPubPlace(), "Bochum");
assertEquals(fd.getPubDate().toDisplay(), "");
};
-
- public static String getString (String path) {
- StringBuilder contentBuilder = new StringBuilder();
- try {
- BufferedReader in = new BufferedReader(new FileReader(path));
- String str;
- while ((str = in.readLine()) != null) {
- contentBuilder.append(str);
+ @Test
+ public void indexNewMetaData () throws Exception {
+
+ String json = new String(
+ "{"
+ + " \"fields\" : ["
+ + " { "
+ + " \"primaryData\" : \"abc\""
+ + " },"
+ + " {"
+ + " \"name\" : \"tokens\","
+ + " \"data\" : ["
+ + " [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"],"
+ + " [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ],"
+ + " [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]"
+ + " ]"
+ + " }"
+ + " ],"
+ + " \"metaFields\" : ["
+ + " {"
+ + " \"@type\" : \"koral:field\","
+ + " \"type\" : \"type:string\","
+ + " \"key\" : \"corpusID\","
+ + " \"value\" : \"WPD\""
+ + " },"
+ + " {"
+ + " \"@type\" : \"koral:field\","
+ + " \"type\" : \"type:string\","
+ + " \"key\" : \"textSigle\","
+ + " \"value\" : \"x/y/z\""
+ + " },"
+ + " {"
+ + " \"@type\" : \"koral:field\","
+ + " \"type\" : \"type:string\","
+ + " \"key\" : \"ID\","
+ + " \"value\" : \"WPD-AAA-00001\""
+ + " },"
+ + " {"
+ + " \"@type\" : \"koral:field\","
+ + " \"type\" : \"type:string\","
+ + " \"key\" : \"textClass\","
+ + " \"value\" : [\"music\",\"entertainment\"]"
+ + " },"
+ + " {"
+ + " \"@type\" : \"koral:field\","
+ + " \"type\" : \"type:text\","
+ + " \"key\" : \"author\","
+ + " \"value\" : \"Peter Frankenfeld\""
+ + " },"
+ + " {"
+ + " \"@type\" : \"koral:field\","
+ + " \"type\" : \"type:date\","
+ + " \"key\" : \"pubDate\","
+ + " \"value\" : \"2015-05-01\""
+ + " },"
+ + " {"
+ + " \"@type\" : \"koral:field\","
+ + " \"type\" : \"type:text\","
+ + " \"key\" : \"title\","
+ + " \"value\" : \"Wikipedia\""
+ + " },"
+ + " {"
+ + " \"@type\" : \"koral:field\","
+ + " \"type\" : \"type:text\","
+ + " \"key\" : \"subTitle\","
+ + " \"value\" : \"Die freie Enzyklopädie\""
+ + " },"
+ + " {"
+ + " \"@type\" : \"koral:field\","
+ + " \"type\" : \"type:string\","
+ + " \"key\" : \"pubPlace\","
+ + " \"value\" : \"Bochum\""
+ + " },"
+ + " {"
+ + " \"@type\" : \"koral:field\","
+ + " \"type\" : \"type:attachement\","
+ + " \"key\" : \"link\","
+ + " \"value\" : \"data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel\""
+ + " }"
+ + " ]"
+ + "}");
+
+ KrillIndex ki = new KrillIndex();
+ FieldDocument fd = ki.addDoc(json);
+
+ ki.commit();
+
+ assertEquals(fd.getPrimaryData(), "abc");
+ assertEquals(fd.doc.getField("corpusID").stringValue(), "WPD");
+ assertEquals(fd.doc.getField("textSigle").stringValue(), "x/y/z");
+ assertEquals(fd.doc.getField("ID").stringValue(), "WPD-AAA-00001");
+ assertEquals(fd.doc.getField("textClass").stringValue(), "music entertainment");
+ assertEquals(fd.doc.getField("author").stringValue(), "Peter Frankenfeld");
+ assertEquals(fd.doc.getField("title").stringValue(), "Wikipedia");
+ assertEquals(fd.doc.getField("subTitle").stringValue(), "Die freie Enzyklopädie");
+ assertEquals(fd.doc.getField("pubPlace").stringValue(), "Bochum");
+ assertEquals(fd.doc.getField("pubDate").stringValue(), "20150501");
+ assertEquals(fd.doc.getField("link").stringValue(), "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel");
+
+ JsonNode res = ki.getFields("x/y/z").toJsonNode();
+
+ Iterator fieldIter = res.at("/document/fields").elements();
+
+ int checkC = 0;
+ while (fieldIter.hasNext()) {
+ JsonNode field = (JsonNode) fieldIter.next();
+
+ String key = field.at("/key").asText();
+
+ switch (key) {
+ case "corpusID":
+ assertEquals("type:string", field.at("/type").asText());
+ assertEquals("koral:field", field.at("/@type").asText());
+ assertEquals("WPD", field.at("/value").asText());
+ checkC++;
+ break;
+
+ case "textSigle":
+ assertEquals("type:string", field.at("/type").asText());
+ assertEquals("koral:field", field.at("/@type").asText());
+ assertEquals("x/y/z", field.at("/value").asText());
+ checkC++;
+ break;
+
+ case "ID":
+ assertEquals("type:string", field.at("/type").asText());
+ assertEquals("koral:field", field.at("/@type").asText());
+ assertEquals("WPD-AAA-00001", field.at("/value").asText());
+ checkC++;
+ break;
+
+ case "textClass":
+ assertEquals("type:keywords", field.at("/type").asText());
+ assertEquals("koral:field", field.at("/@type").asText());
+ assertEquals("music", field.at("/value/0").asText());
+ assertEquals("entertainment", field.at("/value/1").asText());
+ checkC++;
+ break;
+
+ case "author":
+ assertEquals("type:text", field.at("/type").asText());
+ assertEquals("koral:field", field.at("/@type").asText());
+ assertEquals("Peter Frankenfeld", field.at("/value").asText());
+ checkC++;
+ break;
+
+ case "title":
+ assertEquals("type:text", field.at("/type").asText());
+ assertEquals("koral:field", field.at("/@type").asText());
+ assertEquals("Wikipedia", field.at("/value").asText());
+ checkC++;
+ break;
+
+ case "subTitle":
+ assertEquals("type:text", field.at("/type").asText());
+ assertEquals("koral:field", field.at("/@type").asText());
+ assertEquals("Die freie Enzyklopädie", field.at("/value").asText());
+ checkC++;
+ break;
+
+ case "pubPlace":
+ assertEquals("type:string", field.at("/type").asText());
+ assertEquals("koral:field", field.at("/@type").asText());
+ assertEquals("Bochum", field.at("/value").asText());
+ checkC++;
+ break;
+
+ case "pubDate":
+ assertEquals("type:date", field.at("/type").asText());
+ assertEquals("koral:field", field.at("/@type").asText());
+ assertEquals("2015-05-01", field.at("/value").asText());
+ checkC++;
+ break;
+
+ case "link":
+ assertEquals("type:attachement", field.at("/type").asText());
+ assertEquals("koral:field", field.at("/@type").asText());
+ assertEquals("data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel", field.at("/value").asText());
+ checkC++;
+ break;
};
- in.close();
- }
- catch (IOException e) {
- fail(e.getMessage());
- }
- return contentBuilder.toString();
- };
-
-
- public static SpanQueryWrapper jsonQuery (String jsonFile) {
- SpanQueryWrapper sqwi;
-
- try {
- String json = getString(jsonFile);
- sqwi = new KrillQuery("tokens").fromKoral(json);
- }
- catch (QueryException e) {
- fail(e.getMessage());
- sqwi = new QueryBuilder("tokens").seg("???");
};
- return sqwi;
};
};