Populate documents with metadata fields based on index types
Change-Id: I813dfabd1b8dc2a51986fc35f4601f211fe0b663
diff --git a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
index 05f8e6a..9b8f395 100644
--- a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
@@ -40,9 +40,9 @@
// @JsonIgnoreProperties(ignoreUnknown = true)
public abstract class AbstractDocument extends Response {
ObjectMapper mapper = new ObjectMapper();
-
+
private String primaryData;
-
+
private static HashSet<String> legacyStringFields =
new HashSet<String>(Arrays.asList(
"pubPlace",
@@ -135,62 +135,34 @@
};
- public void populateFields (Document doc, Collection<String> fields) {
- // Remember - never serialize "tokens"
-
- // TODO:
- // Pupulate based on field types!
-
+ public void populateFields (Document doc, Collection<String> fields) {
if (fields.contains("UID"))
this.setUID(doc.get("UID"));
- String field;
- Iterator<String> i = legacyTextFields.iterator();
- while (i.hasNext()) {
- field = i.next();
- if (fields.contains(field)) {
- this.addText(field, doc.get(field));
- };
- };
+ Iterator<String> fieldsIter = fields.iterator();
- i = legacyKeywordsFields.iterator();
- while (i.hasNext()) {
- field = i.next();
- if (fields.contains(field)) {
- this.addKeywords(field, doc.get(field));
- };
- };
+ while (fieldsIter.hasNext()) {
+ String name = fieldsIter.next();
- i = legacyStoredFields.iterator();
- while (i.hasNext()) {
- field = i.next();
- if (fields.contains(field)) {
- this.addStored(field, doc.get(field));
- };
- };
+ // Remember - never serialize "tokens"
+ if (name == "tokens" || name == "UID")
+ continue;
- i = legacyStringFields.iterator();
- while (i.hasNext()) {
- field = i.next();
- if (fields.contains(field)) {
- this.addString(field, doc.get(field));
- };
- };
+ IndexableField iField = doc.getField(name);
+
+ if (iField == null)
+ continue;
- i = legacyDateFields.iterator();
- while (i.hasNext()) {
- field = i.next();
- if (fields.contains(field)) {
- this.addDate(field, doc.get(field));
- };
- };
-
- // Legacy
- if (fields.contains("license"))
- this.addString("availability", doc.get("license"));
+
+ MetaField mf = mFields.add(iField);
+ // Legacy
+ if (name == "license")
+ this.addString("availability", doc.get("license"));
+
+ };
};
-
+
/**
* Populate document meta information with information coming from
@@ -368,82 +340,24 @@
@JsonAnyGetter
public Map<String, JsonNode> getLegacyMetaFields () {
- Iterator mfIterator = mFields.iterator();
+
+ Iterator<MetaField> mfIterator = mFields.iterator();
HashMap<String, JsonNode> map = new HashMap<>();
- String field;
- Iterator<String> i = legacyDateFields.iterator();
- while (i.hasNext()) {
- field = i.next();
- if (mFields.contains(field)) {
- KrillDate date = this.getFieldValueAsDate(field);
- if (date != null) {
- String dateStr = date.toDisplay();
- if (dateStr.length() != 0) {
- map.put(
- field,
- new TextNode(dateStr)
- );
- };
- };
- };
- };
-
- i = legacyStoredFields.iterator();
- while (i.hasNext()) {
- field = i.next();
- if (mFields.contains(field)) {
- String value = this.getFieldValue(field);
- if (value != null) {
- map.put(
- field,
- new TextNode(this.getFieldValue(field))
- );
- };
- };
- };
-
- i = legacyTextFields.iterator();
- while (i.hasNext()) {
- field = i.next();
- if (mFields.contains(field)) {
- String value = this.getFieldValue(field);
- if (value != null) {
- map.put(
- field,
- new TextNode(value)
- );
- };
- };
- };
-
- i = legacyStringFields.iterator();
- while (i.hasNext()) {
- field = i.next();
- if (mFields.contains(field)) {
- String value = this.getFieldValue(field);
- if (value != null) {
- map.put(
- field,
- new TextNode(value)
- );
- };
- };
- };
-
- i = legacyKeywordsFields.iterator();
- while (i.hasNext()) {
- field = i.next();
- if (mFields.contains(field)) {
- String value = this.getFieldValue(field);
- if (value != null) {
- map.put(
- field,
- new TextNode(value)
- );
- };
- };
+ while (mfIterator.hasNext()) {
+ String mfs = mfIterator.next().key;
+ if (legacyDateFields.contains(mfs) ||
+ legacyStoredFields.contains(mfs) ||
+ legacyTextFields.contains(mfs) ||
+ legacyStringFields.contains(mfs) ||
+ legacyKeywordsFields.contains(mfs)
+ ) {
+ map.put(mfs, new TextNode(this.getFieldValue(mfs)));
+ }
+ else if (legacyDateFields.contains(mfs)) {
+ map.put(mfs, new TextNode(this.getFieldValue(mfs)));
+ }
};
return map;
@@ -513,8 +427,11 @@
public String getFieldValue (String field) {
MetaField mf = mFields.get(field);
- if (mf != null) {
- return mFields.get(field).values.get(0);
+ if (mf != null && mf.values.size() > 0) {
+ return String.join(
+ " ",
+ mf.values
+ );
};
return null;
@@ -533,8 +450,10 @@
@JsonIgnore
public void addString (String key, String value) {
+ if (value == null)
+ return;
+
mFields.add(
- key,
new MetaField(
key,
"type:string",
@@ -545,8 +464,10 @@
@JsonIgnore
public void addStored (String key, String value) {
+ if (value == null)
+ return;
+
mFields.add(
- key,
new MetaField(
key,
"type:store",
@@ -554,11 +475,13 @@
)
);
};
-
+
@JsonIgnore
public void addKeywords (String key, String value) {
+ if (value == null)
+ return;
+
mFields.add(
- key,
new MetaField(
key,
"type:keywords",
@@ -569,8 +492,10 @@
@JsonIgnore
public void addText (String key, String value) {
+ if (value == null)
+ return;
+
mFields.add(
- key,
new MetaField(
key,
"type:text",
@@ -581,9 +506,11 @@
@JsonIgnore
public void addDate (String key, String value) {
+ if (value == null)
+ return;
+
KrillDate date = new KrillDate(value);
mFields.add(
- key,
new MetaField(
key,
"type:date",
@@ -591,5 +518,4 @@
)
);
};
-
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 656f77b..b6b3292 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -100,12 +100,14 @@
@Override
public void addDate (String key, String value) {
+ if (value == null)
+ return;
+
KrillDate date = new KrillDate(value);
if (date != null) {
this.addInt(key, date.toString());
};
mFields.add(
- key,
new MetaField(
key,
"type:date",
@@ -116,8 +118,10 @@
@Override
public void addText (String key, String value) {
+ if (value == null)
+ return;
+
mFields.add(
- key,
new MetaField(
key,
"type:text",
@@ -130,8 +134,10 @@
@Override
public void addKeywords (String key, String value) {
+ if (value == null)
+ return;
+
mFields.add(
- key,
new MetaField(
key,
"type:keywords",
@@ -144,8 +150,10 @@
@Override
public void addString (String key, String value) {
+ if (value == null)
+ return;
+
mFields.add(
- key,
new MetaField(
key,
"type:string",
@@ -156,8 +164,10 @@
};
public void addAttachement (String key, String value) {
+ if (value == null)
+ return;
+
mFields.add(
- key,
new MetaField(
key,
"type:attachement",
@@ -169,8 +179,10 @@
@Override
public void addStored (String key, String value) {
+ if (value == null)
+ return;
+
mFields.add(
- key,
new MetaField(
key,
"type:store",
@@ -183,7 +195,6 @@
public void addStored (String key, int value) {
mFields.add(
- key,
new MetaField(
key,
"type:store",
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaField.java b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
index 270d0f9..e6d6ca2 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaField.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
@@ -26,14 +26,26 @@
this.key = key;
};
+ public MetaField (String key, String type) {
+ this.key = key;
+ this.type = type;
+ };
public MetaField (String key, String type, String value) {
this.key = key;
this.type = type;
this.values.add(value);
};
-
+ /**
+ * Add value to meta field.
+ */
+ public MetaField addValue (String value) {
+ this.values.add(value);
+ return this;
+ };
+
+
/**
* Create JsonNode
*/
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java b/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java
index 91dab5a..460ae15 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java
@@ -51,8 +51,34 @@
/**
* Add field to collection
*/
- public void add (IndexableField iField) {
-
+ public MetaField add (IndexableField iField) {
+ MetaField mf = metaFieldFromIndexableField(iField);
+
+ // Ignore non-stored fields
+ if (mf == null)
+ return null;
+
+ fieldsMap.put(mf.key, mf);
+ return mf;
+ };
+
+
+ /**
+ * Add field to collection
+ */
+ public MetaField add (MetaField mf) {
+ // Ignore non-stored fields
+ if (mf == null)
+ return null;
+
+ fieldsMap.put(mf.key, mf);
+ return mf;
+ };
+
+
+ // Field type needs to be restored heuristically
+ // - though that's not very elegant
+ public static MetaField metaFieldFromIndexableField (IndexableField iField) {
IndexableFieldType iFieldType = iField.fieldType();
// Field type needs to be restored heuristically
@@ -60,19 +86,9 @@
// Ignore non-stored fields
if (!iFieldType.stored())
- return;
+ return null;
MetaField mf = new MetaField(iField.name());
-
- // Reuse existing metafield
- if (fieldsMap.containsKey(mf.key)) {
- mf = fieldsMap.get(mf.key);
- }
-
- // Add new field
- else {
- fieldsMap.put(mf.key, mf);
- };
// TODO: Check if metaField exists for that field
@@ -83,31 +99,29 @@
if (n != null) {
// Check if key indicates a date
- Matcher dateMatcher = dateKeyPattern.matcher(mf.key);
+ Matcher dateMatcher = dateKeyPattern.matcher(iField.name());
if (dateMatcher.matches()) {
- mf.type = "type:date";
-
- // Check structure with KrillDate
- KrillDate date = new KrillDate(n.toString());
+ mf.type = "type:date";
+ KrillDate date = new KrillDate(n.toString());
if (date != null) {
// Serialize withz dash separation
mf.values.add(date.toDisplay());
};
- }
+ }
// Field is a number
else {
- mf.type = "type:number";
- mf.values.add(n.toString());
+ mf.values.add(n.toString());
};
}
// Field has a textual value
else if (s != null) {
- // Stored
+ // Stored
if (iFieldType.indexOptions() == IndexOptions.NONE) {
+
String value = s.toString();
if (value.startsWith("data:")) {
mf.type = "type:attachement";
@@ -116,6 +130,7 @@
mf.type = "type:store";
};
mf.values.add(value);
+ return mf;
}
// Keywords
@@ -147,6 +162,12 @@
mf.values.add(s.toString());
}
+ // Special treatment for legacy indices
+ else if (mf.key.equals("UID")) {
+ mf.type = "type:integer";
+ mf.values.add(s.toString());
+ }
+
// String
else {
mf.values.add(s.toString());
@@ -156,17 +177,10 @@
else {
log.error("Unknown field type {}", iField.name());
};
- };
+ mf.values.removeAll(Collections.singleton(null));
- /**
- * Add field to collection
- *
- * @param key
- * The key of the field
- */
- public void add (String key, MetaField mf) {
- fieldsMap.put(key, mf);
+ return mf;
};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
index 59887bd..edede59 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
@@ -172,6 +172,7 @@
assertEquals(
"base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/constituency corenlp/morpho corenlp/namedentities corenlp/sentences glemm glemm/morpho mate mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/morpho xip/sentences",
res.at("/matches/0/foundries").asText());
+
assertEquals("Goethe-Korpus",
res.at("/matches/0/corpusTitle").asText());
assertEquals("QAO-NC", res.at("/matches/0/availability").asText());