Compile document fields in a second phase for adding to an index
Change-Id: I5ec958a8b5ce16881044dcb8b09a68561f80e05c
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 7829387..6a7e495 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -412,7 +412,7 @@
try {
// Add document to writer
- this.writer().addDocument(doc.doc);
+ this.writer().addDocument(doc.compile());
if (++commitCounter > autoCommit) {
this.commit();
commitCounter = 0;
diff --git a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
index e579a16..fb467ff 100644
--- a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
@@ -460,6 +460,27 @@
)
);
};
+
+
+ @JsonIgnore
+ public void addInt (String key, String value) {
+ if (value == null)
+ return;
+
+ mFields.add(
+ new MetaField(
+ key,
+ "type:integer",
+ value
+ )
+ );
+ };
+
+ @JsonIgnore
+ public void addInt (String key, int value) {
+ this.addInt(key, new Integer(value).toString());
+ };
+
@JsonIgnore
public void addStored (String key, String value) {
@@ -474,6 +495,22 @@
)
);
};
+
+
+ @JsonIgnore
+ public void addAttachement (String key, String value) {
+ if (value == null)
+ return;
+
+ mFields.add(
+ new MetaField(
+ key,
+ "type:attachement",
+ value
+ )
+ );
+ };
+
@JsonIgnore
public void addKeywords (String key, String value) {
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index b6b3292..4ca1ae6 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -87,123 +87,74 @@
};
- // see http://www.cowtowncoder.com/blog/archives/2011/07/entry_457.html
+ /**
+ * Add all fields to document
+ */
+ public Document compile () {
- public void addInt (String key, int value) {
- doc.add(new IntField(key, value, Field.Store.YES));
+ // Iterate over all fields
+ Iterator<MetaField> fIter = mFields.iterator();
+ while (fIter.hasNext()) {
+ MetaField mf = fIter.next();
+ switch (mf.type) {
+
+ case "type:integer":
+ try {
+ int val = Integer.parseInt(mf.values.get(0));
+ doc.add(new IntField(mf.key, val, Field.Store.YES));
+ }
+ catch (NumberFormatException ne) {
+ continue;
+ };
+ break;
+
+ case "type:date":
+ KrillDate date = new KrillDate(mf.values.get(0));
+ if (date != null) {
+ try {
+ int dateInt = date.toInteger();
+ doc.add(new IntField(mf.key, dateInt, Field.Store.YES));
+ }
+ catch (NumberFormatException ne) {
+ continue;
+ };
+ };
+ break;
+
+
+ case "type:string":
+ doc.add(
+ new StringField(
+ mf.key,
+ mf.values.get(0),
+ Field.Store.YES
+ )
+ );
+ break;
+
+ case "type:keywords":
+ doc.add(
+ new Field(
+ mf.key,
+ String.join(" ", mf.values),
+ keywordField
+ )
+ );
+ break;
+
+ case "type:text":
+ doc.add(new TextPrependedField(mf.key, mf.values.get(0)));
+ break;
+
+ case "type:attachement":
+ case "type:store":
+ doc.add(new StoredField(mf.key, mf.values.get(0)));
+ };
+ };
+
+ return doc;
};
-
- public void addInt (String key, String value) {
- if (value != null)
- this.addInt(key, Integer.parseInt(value));
- };
-
- @Override
- public void addDate (String key, String value) {
- if (value == null)
- return;
-
- KrillDate date = new KrillDate(value);
- if (date != null) {
- this.addInt(key, date.toString());
- };
- mFields.add(
- new MetaField(
- key,
- "type:date",
- date.toDisplay()
- )
- );
- }
-
- @Override
- public void addText (String key, String value) {
- if (value == null)
- return;
-
- mFields.add(
- new MetaField(
- key,
- "type:text",
- value
- )
- );
- doc.add(new TextPrependedField(key, value));
- };
-
-
- @Override
- public void addKeywords (String key, String value) {
- if (value == null)
- return;
-
- mFields.add(
- new MetaField(
- key,
- "type:keywords",
- value
- )
- );
-
- doc.add(new Field(key, value, keywordField));
- };
-
- @Override
- public void addString (String key, String value) {
- if (value == null)
- return;
-
- mFields.add(
- new MetaField(
- key,
- "type:string",
- value
- )
- );
- doc.add(new StringField(key, value, Field.Store.YES));
- };
-
- public void addAttachement (String key, String value) {
- if (value == null)
- return;
-
- mFields.add(
- new MetaField(
- key,
- "type:attachement",
- value
- )
- );
- doc.add(new StoredField(key, value));
- };
-
- @Override
- public void addStored (String key, String value) {
- if (value == null)
- return;
-
- mFields.add(
- new MetaField(
- key,
- "type:store",
- value
- )
- );
- doc.add(new StoredField(key, value));
- };
-
-
- public void addStored (String key, int value) {
- mFields.add(
- new MetaField(
- key,
- "type:store",
- new Integer(value).toString()
- )
- );
- doc.add(new StoredField(key, value));
- };
-
+
public void addTV (String key, String value, String tsString) {
this.addTV(key, value, new MultiTermTokenStream(tsString));
@@ -298,6 +249,7 @@
/**
* Deserialize koral:field types for meta data
*/
+ // Temporarily this needs to be in a "metaFields" parameter
public void setMetaFields (ArrayList<Map<String, JsonNode>> fields) {
String type, key, value;
StringBuffer sb = new StringBuffer();
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaFieldsObj.java b/src/main/java/de/ids_mannheim/korap/response/MetaFieldsObj.java
index 82534ba..51131e3 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaFieldsObj.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaFieldsObj.java
@@ -52,14 +52,12 @@
* Add field to collection
*/
public MetaField add (IndexableField iField) {
- MetaField mf = metaFieldFromIndexableField(iField);
-
- // Ignore non-stored fields
- if (mf == null)
- return null;
-
- fieldsMap.put(mf.key, mf);
- return mf;
+ return this.add(
+ metaFieldFromIndexableField(
+ iField,
+ new MetaField(iField.name())
+ )
+ );
};
@@ -78,7 +76,7 @@
// Field type needs to be restored heuristically
// - though that's not very elegant
- public static MetaField metaFieldFromIndexableField (IndexableField iField) {
+ public static MetaField metaFieldFromIndexableField (IndexableField iField, MetaField mf) {
IndexableFieldType iFieldType = iField.fieldType();
// Field type needs to be restored heuristically
@@ -87,8 +85,6 @@
// Ignore non-stored fields
if (!iFieldType.stored())
return null;
-
- MetaField mf = new MetaField(iField.name());
// TODO: Check if metaField exists for that field
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillDate.java b/src/main/java/de/ids_mannheim/korap/util/KrillDate.java
index a578fdd..3e5f3c1 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillDate.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillDate.java
@@ -164,6 +164,16 @@
/**
+ * Return the date as an integer
+ * based on the stringification.
+ *
+ * @return The date as an integer.
+ */
+ public int toInteger () throws NumberFormatException {
+ return Integer.parseInt(this.toString());
+ };
+
+ /**
* Serialize ceiled date to string.
*
* @return The date as a string.
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
index faf739c..dcaa5c3 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFieldDocument.java
@@ -27,6 +27,9 @@
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.util.QueryException;
+import org.apache.lucene.document.Document;
+
+
@RunWith(JUnit4.class)
public class TestFieldDocument {
@@ -48,41 +51,43 @@
+ "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]");
fd.addAttachement("Wikilink", "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel");
- assertEquals(fd.doc.getField("title").name(), "title");
- assertEquals(fd.doc.getField("title").stringValue(), "Wikipedia");
+ Document doc = fd.compile();
+
+ assertEquals(doc.getField("title").name(), "title");
+ assertEquals(doc.getField("title").stringValue(), "Wikipedia");
- assertEquals(fd.doc.getField("corpusID").name(), "corpusID");
- assertEquals(fd.doc.getField("corpusID").stringValue(), "WPD");
+ assertEquals(doc.getField("corpusID").name(), "corpusID");
+ assertEquals(doc.getField("corpusID").stringValue(), "WPD");
- assertEquals(fd.doc.getField("ID").name(), "ID");
- assertEquals(fd.doc.getField("ID").stringValue(), "WPD-AAA-00001");
+ assertEquals(doc.getField("ID").name(), "ID");
+ assertEquals(doc.getField("ID").stringValue(), "WPD-AAA-00001");
- assertEquals(fd.doc.getField("subTitle").name(), "subTitle");
- assertEquals(fd.doc.getField("subTitle").stringValue(),
+ assertEquals(doc.getField("subTitle").name(), "subTitle");
+ assertEquals(doc.getField("subTitle").stringValue(),
"Die freie Enzyklopädie");
- assertEquals(fd.doc.getField("pubPlace").name(), "pubPlace");
- assertEquals(fd.doc.getField("pubPlace").stringValue(), "Bochum");
+ assertEquals(doc.getField("pubPlace").name(), "pubPlace");
+ assertEquals(doc.getField("pubPlace").stringValue(), "Bochum");
- assertEquals(fd.doc.getField("lastModified").name(), "lastModified");
- assertEquals(fd.doc.getField("lastModified").stringValue(), "20130717");
+ assertEquals(doc.getField("lastModified").name(), "lastModified");
+ assertEquals(doc.getField("lastModified").stringValue(), "20130717");
- assertEquals(fd.doc.getField("tokens").name(), "tokens");
- assertEquals(fd.doc.getField("tokens").stringValue(), "abc");
+ assertEquals(doc.getField("tokens").name(), "tokens");
+ assertEquals(doc.getField("tokens").stringValue(), "abc");
- assertEquals(fd.doc.getField("author").name(), "author");
- assertEquals(fd.doc.getField("author").stringValue(),
+ assertEquals(doc.getField("author").name(), "author");
+ assertEquals(doc.getField("author").stringValue(),
"Peter Frankenfeld");
- assertEquals(fd.doc.getField("layerInfo").name(), "layerInfo");
- assertEquals(fd.doc.getField("layerInfo").stringValue(),
+ assertEquals(doc.getField("layerInfo").name(), "layerInfo");
+ assertEquals(doc.getField("layerInfo").stringValue(),
"opennlp/p=pos");
- assertEquals(fd.doc.getField("textClass").name(), "textClass");
- assertEquals(fd.doc.getField("textClass").stringValue(),
+ assertEquals(doc.getField("textClass").name(), "textClass");
+ assertEquals(doc.getField("textClass").stringValue(),
"music entertainment");
- assertEquals(fd.doc.getField("Wikilink").name(), "Wikilink");
- assertEquals(fd.doc.getField("Wikilink").stringValue(),
+ assertEquals(doc.getField("Wikilink").name(), "Wikilink");
+ assertEquals(doc.getField("Wikilink").stringValue(),
"data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel"
);
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
index 8518f90..934ac83 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
@@ -198,11 +198,12 @@
fd.addStored("ref", "My reference");
fd.addAttachement("ref2", "data:text/plain;charset=UTF-8,My reference2");
-/*
+ /*
fd.addKeywords("keyword", "baum");
fd.addKeywords("keyword", "wald");
-*/
- fd.addKeywords("keyword", "baum wald");
+ */
+
+ fd.addKeywords("keyword", "baum wald");
fd.addText("title", "Der Name der Rose");