blob: 2979822bed17e4a33b11019a2d396266e719a431 [file] [log] [blame]
package de.ids_mannheim.korap.index;
import java.util.*;
import de.ids_mannheim.korap.util.KrillDate;
import static de.ids_mannheim.korap.util.KrillString.*;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.response.Response;
import de.ids_mannheim.korap.response.MetaField;
import de.ids_mannheim.korap.response.MetaFieldsObj;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import com.fasterxml.jackson.annotation.*;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
/*
* Todo:: Author and textClass may be arrays!
*/
/**
* Abstract class representing a document in the
* Krill index.
*
* This model is rather specific to DeReKo data and
* should be considered experimental. It will be replaced
* by a more agnostic model.
* string fields, e.g. will be combined with a prefix.
* For example d:pubDate will mean: A field with the key "pubDate"
* of type date.
*
* @author diewald
*/
@JsonInclude(Include.NON_EMPTY)
// @JsonIgnoreProperties(ignoreUnknown = true)
public abstract class AbstractDocument extends Response {
ObjectMapper mapper = new ObjectMapper();
private String primaryData;
private static HashSet<String> legacyStringFields =
new HashSet<String>(Arrays.asList(
"pubPlace",
"textSigle",
"docSigle",
"corpusSigle",
"textType",
"textTypeArt",
"textTypeRef",
"textColumn",
"textDomain",
"availability",
"language",
"corpusID", // Deprecated!
"ID" // Deprecated!
));
private static HashSet<String> legacyTextFields =
new HashSet<String>(Arrays.asList(
"author",
"title",
"subTitle",
"corpusTitle",
"corpusSubTitle",
"corpusAuthor",
"docTitle",
"docSubTitle",
"docAuthor"
));
private static HashSet<String> legacyKeywordsFields =
new HashSet<String>(Arrays.asList(
"textClass",
"foundries",
"keywords"
));
private static HashSet<String> legacyStoredFields =
new HashSet<String>(Arrays.asList(
"docEditor",
"tokenSource",
"layerInfos",
"publisher",
"editor",
"fileEditionStatement",
"biblEditionStatement",
"reference",
"corpusEditor"
));
private static HashSet<String> legacyDateFields =
new HashSet<String>(Arrays.asList(
"pubDate",
"creationDate"
));
@JsonIgnore
public int internalDocID, localDocID, UID;
@JsonIgnore
public MetaFieldsObj mFields = new MetaFieldsObj();
/**
* Populate document meta information with information coming from
* the index.
*
* @param doc
* Document object.
* @param field
* Primary data field.
*/
public void populateDocument (Document doc, String field) {
List<String> fieldList = new ArrayList<>(32);
Iterator<IndexableField> fieldIterator = doc.getFields().iterator();
while (fieldIterator.hasNext())
fieldList.add(fieldIterator.next().name());
this.populateDocument(doc, field, fieldList);
};
/**
* Populate document meta information with information coming from
* the index.
*
* @param doc
* Document object.
* @param field
* Primary data field.
* @param fields
* Hash object with all supported fields.
*/
public void populateDocument (Document doc, String field,
List<String> fields) {
if (field != null)
this.setPrimaryData(doc.get(field));
this.populateFields(doc, fields);
};
public void populateFields (Document doc) {
ArrayList<String> fieldList = new ArrayList<>(32);
Iterator<IndexableField> fieldIterator = doc.getFields().iterator();
while (fieldIterator.hasNext()) {
fieldList.add(fieldIterator.next().name());
};
// TODO: Sort alphabetically!
this.populateFields(doc, fieldList);
};
public void populateFields (Document doc, List<String> fields) {
Iterator<String> fieldsIter = fields.iterator();
if (fields.contains("UID")) {
this.setUID(doc.get("UID"));
};
// fieldsIter = fields.iterator();
mFields.fieldsOrder = new ArrayList<>(16);
while (fieldsIter.hasNext()) {
String name = fieldsIter.next();
// Remember - never serialize "tokens"
if (name.equals("tokens") || name.equals("UID"))
continue;
mFields.fieldsOrder.add(name);
// Ignore fields already set
if (mFields.contains(name)) {
continue;
};
IndexableField iField = doc.getField(name);
if (iField == null)
continue;
MetaField mf = mFields.add(iField);
// Legacy
if (name.equals("license"))
this.addString("availability", doc.get("license"));
};
};
/**
* Get the unique identifier of the document.
*
* @return The unique identifier of the document as an integer.
*/
@JsonProperty("UID")
public int getUID () {
return this.UID;
};
/**
* Set the unique identifier of the document.
*
* @param UID
* The unique identifier of the document as an integer.
* @return The invocant for chaining.
*/
public void setUID (int UID) {
if (UID != 0) {
this.UID = UID;
this.addString("UID", Integer.valueOf(UID).toString());
}
};
/**
* Set the unique identifier of the document.
*
* @param UID
* The unique identifier of the document as a
* string representing an integer.
* @return The invocant for chaining.
* @throws NumberFormatException
*/
public void setUID (String UID) throws NumberFormatException {
if (UID != null) {
this.UID = Integer.parseInt(UID);
this.addString("UID", Integer.valueOf(this.UID).toString());
};
};
/**
* Get the primary data of the document.
*
* @return The primary data of the document as a string.
*/
@JsonIgnore
public String getPrimaryData () {
if (this.primaryData == null)
return "";
return this.primaryData;
};
/**
* Get the primary data of the document,
* starting with a given character offset.
*
* @param startOffset
* The starting character offset.
* @return The substring of primary data of the document as a
* string.
*/
@JsonIgnore
public String getPrimaryData (int startOffset) {
return codePointSubstring(this.primaryData, startOffset);
};
/**
* Get the primary data of the document,
* starting with a given character offset and ending
* with a given character offset.
*
* @param startOffset
* The starting character offset.
* @param endOffset
* The ending character offset.
* @return The substring of the primary data of the document as a
* string.
*/
@JsonIgnore
public String getPrimaryData (int startOffset, int endOffset) {
return codePointSubstring(this.primaryData,startOffset, endOffset);
};
/**
* Set the primary data of the document.
*
* @param primary
* The primary data of the document
* as a string.
*/
public void setPrimaryData (String primary) {
this.primaryData = primary;
};
/**
* Get the length of the primary data of the document
* (i.e. the number of characters).
*
* @return The length of the primary data of the document as an
* integer.
*/
@JsonIgnore
public int getPrimaryDataLength () {
return this.primaryData.length();
};
/**
* Get the text sigle as a string.
*
* @return The text sigle as a string.
*/
@JsonIgnore
public String getTextSigle () {
return this.getFieldValue("textSigle");
};
/**
* Get the document sigle as a string.
*
* @return The document sigle as a string.
*/
@JsonIgnore
public String getDocSigle () {
return this.getFieldValue("docSigle");
};
/**
* Get the corpus sigle as a string.
*
* @return The corpus sigle as a string.
*/
@JsonIgnore
public String getCorpusSigle () {
return this.getFieldValue("corpusSigle");
};
@Deprecated
@JsonIgnore
public String getAvailability () {
return this.getFieldValue("availability");
};
@Deprecated
@JsonProperty("corpusID")
public String getCorpusID () {
return this.getFieldValue("corpusID");
};
@Deprecated
@JsonProperty("ID")
public String getID () {
return this.getFieldValue("ID");
};
//@JsonAnyGetter
@Deprecated
@JsonIgnore
public Map<String, JsonNode> getLegacyMetaFields () {
Iterator<MetaField> mfIterator = mFields.iterator();
HashMap<String, JsonNode> map = new HashMap<>();
while (mfIterator.hasNext()) {
MetaField mf = mfIterator.next();
if (mf == null)
continue;
String mfs = mf.key;
String value = this.getFieldValue(mfs);
if (value != null && (
legacyDateFields.contains(mfs) ||
legacyStoredFields.contains(mfs) ||
legacyTextFields.contains(mfs) ||
legacyStringFields.contains(mfs) ||
legacyKeywordsFields.contains(mfs) ||
legacyDateFields.contains(mfs)
)
) {
map.put(mfs, new TextNode(value));
}
};
return map;
}
@JsonAnySetter
public void setLegacyMetaField (String name, JsonNode value) {
// Treat legacy string fields
if (legacyStringFields.contains(name)) {
this.addString(name, value.asText());
}
// Treat legacy text fields
else if (legacyTextFields.contains(name)) {
this.addText(name, value.asText());
}
// Treat legacy keyword fields
else if (legacyKeywordsFields.contains(name)) {
this.addKeywords(name, value.asText());
}
// Treat legacy stored fields
else if (legacyStoredFields.contains(name)) {
this.addStored(name, value.asText());
}
// Treat legacy date fields
else if (legacyDateFields.contains(name)) {
this.addDate(name, value.asText());
}
else if (name.equals("license")) {
this.addString("availability", value.asText());
}
// Temporarily - treat legacy store values introduced for Sgbr
else if (name.equals("store")) {
// TODO: Store all values
};
//
// else {
// System.err.println("Unknown field: " + name);
// };
};
/**
* Serialize response as a {@link JsonNode}.
*
* @return {@link JsonNode} representation of the response
*/
@Override
public JsonNode toJsonNode () {
ObjectNode json = (ObjectNode) super.toJsonNode();
json.setAll((ObjectNode) mapper.valueToTree(this));
if (this.getUID() == 0)
json.remove("UID");
return json;
};
@JsonIgnore
public String getFieldValue (String field) {
MetaField mf = mFields.get(field);
if (mf != null && mf.values.size() > 0) {
return String.join(
" ",
mf.values
);
};
return null;
};
@JsonIgnore
public KrillDate getFieldValueAsDate (String field) {
String date = this.getFieldValue(field);
if (date == null)
return null;
return new KrillDate(date);
};
@JsonIgnore
public void addString (String key, String value) {
if (value == null)
return;
mFields.add(
new MetaField(
key,
"type:string",
value
)
);
};
@JsonIgnore
public void addInt (String key, String value) {
if (value == null)
return;
mFields.add(
new MetaField(
key,
"type:integer",
value
)
);
};
@JsonIgnore
public void addInt (String key, int value) {
this.addInt(key, Integer.valueOf(value).toString());
};
@JsonIgnore
public void addStored (String key, String value) {
if (value == null)
return;
mFields.add(
new MetaField(
key,
"type:store",
value
)
);
};
@JsonIgnore
public void addAttachement (String key, String value) {
if (value == null)
return;
mFields.add(
new MetaField(
key,
"type:attachement",
value
)
);
};
@JsonIgnore
public void addKeywords (String key, String value) {
if (value == null)
return;
mFields.add(
new MetaField(
key,
"type:keywords",
value
)
);
};
@JsonIgnore
public void addText (String key, String value) {
if (value == null)
return;
mFields.add(
new MetaField(
key,
"type:text",
value
)
);
};
@JsonIgnore
public void addDate (String key, String value) {
if (value == null)
return;
KrillDate date = new KrillDate(value);
if (date == null)
return;
mFields.add(
new MetaField(
key,
"type:date",
date.toDisplay()
)
);
};
@JsonIgnore
public void addDate (String key, int value) {
this.addDate(key, Integer.valueOf(value).toString());
};
};