Remove variable dependency for metadata field names and store all
in a MetaFields object. Also remove deprecated methods for
metdata field access.
Change-Id: I55f4ab7ada7c94a7b6c5211a43a6e5a9dd6c2260
diff --git a/Changes b/Changes
index 8a293e9..5f36b1d 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,9 @@
+0.58.4 2019-01-08
+ - [cleanup] Remove deprecated methods setLicense/getLicense,
+ setTokenization/getTokenization, setLayerInfo/getLayerInfo,
+ setField/getField (including json serialization)
+ (diewald)
+
0.58.3 2018-12-17
- [feature] Introduced attachements as meta data fields
(fixes #49) (diewald).
diff --git a/Readme.md b/Readme.md
index f8e95fd..47ddad2 100644
--- a/Readme.md
+++ b/Readme.md
@@ -98,7 +98,7 @@
**Authors**: [Nils Diewald](http://nils-diewald.de/),
[Eliza Margaretha](http://www1.ids-mannheim.de/direktion/personal/margaretha.html)
-Copyright (c) 2013-2018, [IDS Mannheim](http://ids-mannheim.de/), Germany
+Copyright (c) 2013-2019, [IDS Mannheim](http://ids-mannheim.de/), Germany
Krill is developed as part of the [KorAP](http://korap.ids-mannheim.de/)
Corpus Analysis Platform at the Institute for German Language
diff --git a/pom.xml b/pom.xml
index 3cb6196..9dc796b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.58.3</version>
+ <version>0.58.4</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index 3b0bf20..3d8a3a6 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -300,7 +300,9 @@
// Filter based on string
else if (valtype.equals("type:string")) {
if (json.get("value").size() > 1){
- log.debug("koral:doc size " + json.get("value").size());
+ if (DEBUG) {
+ log.debug("koral:doc size " + json.get("value").size());
+ };
if (json.has("match")) {
match = json.get("match").asText();
}
diff --git a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
index 40547e3..07c4d8e 100644
--- a/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/AbstractDocument.java
@@ -5,6 +5,8 @@
import de.ids_mannheim.korap.util.KrillDate;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.response.Response;
+import de.ids_mannheim.korap.response.MetaField;
+import de.ids_mannheim.korap.response.MetaFieldsExt;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
@@ -43,32 +45,12 @@
@JsonIgnore
public int internalDocID, localDocID, UID;
- private KrillDate pubDate,
- // newly added
- creationDate;
+ // private HashMap<String, String> fieldMap;
- private HashMap<String, String> fieldMap;
+ private MetaFieldsExt metaFields = new MetaFieldsExt();
- private String
-
- // No longer supported
- ID, corpusID, field, layerInfo, tokenization,
-
- // Still supported
- foundries, textClass, pubPlace,
-
- // Newly added for the corpus/doc/text distinction of DeReKo
- textSigle, docSigle, corpusSigle, title, subTitle, author, editor,
- docTitle, docSubTitle, docAuthor, docEditor, corpusTitle,
- corpusSubTitle, corpusAuthor, corpusEditor, textType, textTypeArt,
- textTypeRef, textColumn, textDomain, fileEditionStatement,
- biblEditionStatement, publisher, reference, language, availability,
- // pages,
- keywords,
-
- // Meta information regarding annotations
- tokenSource, layerInfos;
-
+ // Deprecated
+ private String ID, corpusID;
/**
* Populate document meta information with information coming from
@@ -108,10 +90,6 @@
this.setCorpusID(doc.get("corpusID"));
if (fields.contains("ID"))
this.setID(doc.get("ID"));
- if (fields.contains("tokenization"))
- this.setTokenization(doc.get("tokenization"));
- if (fields.contains("layerInfo"))
- this.setLayerInfo(doc.get("layerInfo"));
// valid
if (fields.contains("UID"))
@@ -188,19 +166,17 @@
if (fields.contains("language"))
this.setLanguage(doc.get("language"));
- // Deprecated
- if (fields.contains("license"))
- this.setAvailability(doc.get("license"));
- else if (fields.contains("availability"))
- this.setAvailability(doc.get("availability"));
- /*
- if (fields.contains("pages"))
- this.setPages(doc.get("pages"));
- */
if (fields.contains("biblEditionStatement"))
this.setBiblEditionStatement(doc.get("biblEditionStatement"));
if (fields.contains("fileEditionStatement"))
this.setFileEditionStatement(doc.get("fileEditionStatement"));
+
+ // Legacy
+ if (fields.contains("license"))
+ this.setAvailability(doc.get("license"));
+ else if (fields.contains("availability"))
+ this.setAvailability(doc.get("availability"));
+
};
@@ -217,7 +193,7 @@
*/
public void populateDocument (Document doc, String field,
Collection<String> fields) {
- this.setField(field);
+ // this.setField(field);
this.setPrimaryData(doc.get(field));
this.populateFields(doc, fields);
};
@@ -231,7 +207,10 @@
*/
@JsonIgnore
public KrillDate getPubDate () {
- return this.pubDate;
+ String pubDate = this.getFieldValue("pubDate");
+ if (pubDate == null)
+ return null;
+ return new KrillDate(pubDate);
};
@@ -243,8 +222,10 @@
*/
@JsonProperty("pubDate")
public String getPubDateString () {
- if (this.pubDate != null) {
- String date = this.pubDate.toDisplay();
+ KrillDate pubDate = this.getPubDate();
+
+ if (pubDate != null) {
+ String date = pubDate.toDisplay();
if (date.length() == 0)
return null;
return date;
@@ -261,21 +242,8 @@
* representation.
* @return A {@link KrillDate} object for chaining.
*/
- public KrillDate setPubDate (String date) {
- this.pubDate = new KrillDate(date);
- return this.pubDate;
- };
-
-
- /**
- * Set the publication date of the document.
- *
- * @param date
- * The date as a {@link KrillDate} object.
- * @return A {@link KrillDate} object for chaining.
- */
- public KrillDate setPubDate (KrillDate date) {
- return (this.pubDate = date);
+ public void setPubDate (String pubDate) {
+ this.addDateX("pubDate", pubDate);
};
@@ -287,7 +255,10 @@
*/
@JsonIgnore
public KrillDate getCreationDate () {
- return this.creationDate;
+ String creationDate = this.getFieldValue("creationDate");
+ if (creationDate == null)
+ return null;
+ return new KrillDate(creationDate);
};
@@ -299,8 +270,14 @@
*/
@JsonProperty("creationDate")
public String getCreationDateString () {
- if (this.creationDate != null)
- return this.creationDate.toDisplay();
+ KrillDate creationDate = this.getCreationDate();
+
+ if (creationDate != null) {
+ String date = creationDate.toDisplay();
+ if (date.length() == 0)
+ return null;
+ return date;
+ };
return null;
};
@@ -313,9 +290,8 @@
* representation.
* @return A {@link KrillDate} object for chaining.
*/
- public KrillDate setCreationDate (String date) {
- this.creationDate = new KrillDate(date);
- return this.creationDate;
+ public void setCreationDate (String creationDate) {
+ this.addDateX("creationDate", creationDate);
};
@@ -326,10 +302,11 @@
* The date as a {@link KrillDate} object.
* @return A {@link KrillDate} object for chaining.
*/
+ /*
public KrillDate setCreationDate (KrillDate date) {
return (this.creationDate = date);
};
-
+ */
/**
* Get the name of the author of the document.
@@ -337,7 +314,7 @@
* @return The name of the author as a string.
*/
public String getAuthor () {
- return this.author;
+ return this.getFieldValue("author");
};
@@ -348,7 +325,7 @@
* The name of the author as a string.
*/
public void setAuthor (String author) {
- this.author = author;
+ this.addTextX("author", author);
};
@@ -358,7 +335,7 @@
* @return The text class of the document as a string.
*/
public String getTextClass () {
- return this.textClass;
+ return this.getFieldValue("textClass");
};
@@ -369,7 +346,7 @@
* The text class of the document as a string.
*/
public void setTextClass (String textClass) {
- this.textClass = textClass;
+ this.addKeywordsX("textClass", textClass);
};
@@ -379,7 +356,7 @@
* @return The publication place of the document as a string.
*/
public String getPubPlace () {
- return this.pubPlace;
+ return this.getFieldValue("pubPlace");
};
@@ -390,7 +367,7 @@
* The publication place of the document as a string.
*/
public void setPubPlace (String pubPlace) {
- this.pubPlace = pubPlace;
+ this.addStringX("pubPlace", pubPlace);
};
@@ -439,7 +416,7 @@
* @return The title of the document as a string.
*/
public String getTitle () {
- return this.title;
+ return this.getFieldValue("title");
};
@@ -450,7 +427,7 @@
* The title of the document as a string.
*/
public void setTitle (String title) {
- this.title = title;
+ this.addTextX("title", title);
};
@@ -460,7 +437,7 @@
* @return The subtitle of the document as a string.
*/
public String getSubTitle () {
- return this.subTitle;
+ return this.getFieldValue("subTitle");
};
@@ -471,7 +448,7 @@
* The subtitle of the document as a string.
*/
public void setSubTitle (String subTitle) {
- this.subTitle = subTitle;
+ this.addTextX("subTitle", subTitle);
};
@@ -553,7 +530,7 @@
* @return The foundry information string.
*/
public String getFoundries () {
- return this.foundries;
+ return this.getFieldValue("foundries");
};
@@ -565,7 +542,7 @@
* The foundry information string.
*/
public void setFoundries (String foundries) {
- this.foundries = foundries;
+ this.addKeywordsX("foundries", foundries);
};
@@ -576,7 +553,7 @@
* @return The layer information string.
*/
public String getLayerInfos () {
- return this.layerInfos;
+ return this.getFieldValue("layerInfos");
};
@@ -588,7 +565,7 @@
* The layer information string.
*/
public void setLayerInfos (String layerInfos) {
- this.layerInfos = layerInfos;
+ this.addStoredX("layerInfos", layerInfos);
};
@@ -599,7 +576,7 @@
* @return The text sigle as a string.
*/
public String getTextSigle () {
- return this.textSigle;
+ return this.getFieldValue("textSigle");
};
@@ -611,7 +588,7 @@
* The text sigle as a string.
*/
public void setTextSigle (String textSigle) {
- this.textSigle = textSigle;
+ this.addStringX("textSigle", textSigle);
};
@@ -622,7 +599,7 @@
* @return The corpus sigle as a string.
*/
public String getCorpusSigle () {
- return this.corpusSigle;
+ return this.getFieldValue("corpusSigle");
};
@@ -634,7 +611,7 @@
* The corpus sigle as a string.
*/
public void setCorpusSigle (String corpusSigle) {
- this.corpusSigle = corpusSigle;
+ this.addStringX("corpusSigle", corpusSigle);
};
@@ -644,7 +621,7 @@
* @return The document sigle as a string.
*/
public String getDocSigle () {
- return this.docSigle;
+ return this.getFieldValue("docSigle");
};
@@ -655,7 +632,7 @@
* The document sigle as a string.
*/
public void setDocSigle (String docSigle) {
- this.docSigle = docSigle;
+ this.addStringX("docSigle", docSigle);
};
@@ -665,7 +642,7 @@
* @return The name of the publisher as a string.
*/
public String getPublisher () {
- return this.publisher;
+ return this.getFieldValue("publisher");
};
@@ -676,7 +653,7 @@
* The name of the publisher as a string.
*/
public void setPublisher (String publisher) {
- this.publisher = publisher;
+ this.addStoredX("publisher", publisher);
};
@@ -686,7 +663,7 @@
* @return The name of the editor as a string.
*/
public String getEditor () {
- return this.editor;
+ return this.getFieldValue("editor");
};
@@ -697,17 +674,17 @@
* The name of the editor as a string.
*/
public void setEditor (String editor) {
- this.editor = editor;
+ this.addStoredX("editor", editor);
};
-
+
/**
* Get the type of the text as a string.
*
* @return The type of the text as a string.
*/
public String getTextType () {
- return this.textType;
+ return this.getFieldValue("textType");
};
@@ -718,7 +695,7 @@
* The type of the text as a string.
*/
public void setTextType (String textType) {
- this.textType = textType;
+ this.addStringX("textType", textType);
};
@@ -728,7 +705,7 @@
* @return The type art of the text as a string.
*/
public String getTextTypeArt () {
- return this.textTypeArt;
+ return this.getFieldValue("textTypeArt");
};
@@ -739,7 +716,7 @@
* The type art of the text as a string.
*/
public void setTextTypeArt (String textTypeArt) {
- this.textTypeArt = textTypeArt;
+ this.addStringX("textTypeArt", textTypeArt);
};
@@ -750,7 +727,7 @@
* The type reference of the text as a string.
*/
public void setTextTypeRef (String textTypeRef) {
- this.textTypeRef = textTypeRef;
+ this.addStringX("textTypeRef", textTypeRef);
};
@@ -760,7 +737,7 @@
* @return The type reference of the text as a string.
*/
public String getTextTypeRef () {
- return this.textTypeRef;
+ return this.getFieldValue("textTypeRef");
};
@@ -770,7 +747,7 @@
* @return The column of the text as a string.
*/
public String getTextColumn () {
- return this.textColumn;
+ return this.getFieldValue("textColumn");
};
@@ -781,7 +758,7 @@
* The column of the text as a string.
*/
public void setTextColumn (String textColumn) {
- this.textColumn = textColumn;
+ this.addStringX("textColumn", textColumn);
};
@@ -791,7 +768,7 @@
* @return The domain of the text as a string.
*/
public String getTextDomain () {
- return this.textDomain;
+ return this.getFieldValue("textDomain");
};
@@ -802,39 +779,17 @@
* The domain of the text as a string.
*/
public void setTextDomain (String textDomain) {
- this.textDomain = textDomain;
+ this.addStringX("textDomain", textDomain);
};
- /**
- * Get the license of the text as a string.
- *
- * @return The license of the text as a string.
- */
- @Deprecated
- public String getLicense () {
- return this.availability;
- };
-
-
- /**
- * Set the license of the text as a string.
- *
- * @param license
- * The license of the text as a string.
- */
- @Deprecated
- public void setLicense (String license) {
- this.availability = license;
- };
-
/**
* Get the availability of the text as a string.
*
* @return The availability of the text as a string.
*/
public String getAvailability () {
- return this.availability;
+ return this.getFieldValue("availability");
};
@@ -845,38 +800,17 @@
* The availability of the text as a string.
*/
public void setAvailability (String availability) {
- this.availability = availability;
+ this.addStringX("availability", availability);
};
- /*
- * Get the page numbers of the text as a string.
- *
- * @return The page numbers of the text as a string.
- public String getPages () {
- return this.pages;
- };
- */
-
-
- /*
- * Set the page numbers of the text as a string.
- *
- * @param pages
- * The page numbers of the text as a string.
- *
- public void setPages (String pages) {
- this.pages = pages;
- };
- */
-
/**
* Get the file edition statement of the text as a string.
*
* @return The file edition statement of the text as a string.
*/
public String getFileEditionStatement () {
- return this.fileEditionStatement;
+ return this.getFieldValue("fileEditionStatement");
};
@@ -888,7 +822,7 @@
* of the text as a string.
*/
public void setFileEditionStatement (String fileEditionStatement) {
- this.fileEditionStatement = fileEditionStatement;
+ this.addStoredX("fileEditionStatement", fileEditionStatement);
};
@@ -899,7 +833,7 @@
* string.
*/
public String getBiblEditionStatement () {
- return this.biblEditionStatement;
+ return this.getFieldValue("biblEditionStatement");
};
@@ -911,7 +845,7 @@
* of the text as a string.
*/
public void setBiblEditionStatement (String biblEditionStatement) {
- this.biblEditionStatement = biblEditionStatement;
+ this.addStoredX("biblEditionStatement", biblEditionStatement);
};
@@ -921,7 +855,7 @@
* @return The reference of the text as a string.
*/
public String getReference () {
- return this.reference;
+ return this.getFieldValue("reference");
};
@@ -932,7 +866,7 @@
* The reference of the text as a string.
*/
public void setReference (String reference) {
- this.reference = reference;
+ this.addStoredX("reference", reference);
};
@@ -942,7 +876,7 @@
* @return The language of the text as a string.
*/
public String getLanguage () {
- return this.language;
+ return this.getFieldValue("language");
};
@@ -953,7 +887,7 @@
* The language of the text as a string.
*/
public void setLanguage (String language) {
- this.language = language;
+ this.addStringX("language", language);
};
@@ -963,7 +897,7 @@
* @return The corpus title of the text as a string.
*/
public String getCorpusTitle () {
- return this.corpusTitle;
+ return this.getFieldValue("corpusTitle");
};
@@ -974,7 +908,7 @@
* The corpus title of the text as a string.
*/
public void setCorpusTitle (String corpusTitle) {
- this.corpusTitle = corpusTitle;
+ this.addTextX("corpusTitle", corpusTitle);
};
@@ -984,7 +918,7 @@
* @return The corpus subtitle of the text as a string.
*/
public String getCorpusSubTitle () {
- return this.corpusSubTitle;
+ return this.getFieldValue("corpusSubTitle");
};
@@ -996,7 +930,7 @@
* text as a string.
*/
public void setCorpusSubTitle (String corpusSubTitle) {
- this.corpusSubTitle = corpusSubTitle;
+ this.addTextX("corpusSubTitle", corpusSubTitle);
};
@@ -1006,7 +940,7 @@
* @return The corpus author of the text as a string.
*/
public String getCorpusAuthor () {
- return this.corpusAuthor;
+ return this.getFieldValue("corpusAuthor");
};
@@ -1016,7 +950,7 @@
* @return The corpus author of the text as a string.
*/
public void setCorpusAuthor (String corpusAuthor) {
- this.corpusAuthor = corpusAuthor;
+ this.addTextX("corpusAuthor", corpusAuthor);
};
@@ -1026,7 +960,7 @@
* @return The corpus editor of the text as a string.
*/
public String getCorpusEditor () {
- return this.corpusEditor;
+ return this.getFieldValue("corpusEditor");
};
@@ -1037,7 +971,7 @@
* The corpus editor of the text as a string.
*/
public void setCorpusEditor (String corpusEditor) {
- this.corpusEditor = corpusEditor;
+ this.addStoredX("corpusEditor", corpusEditor);
};
@@ -1047,7 +981,7 @@
* @return The document title of the text as a string.
*/
public String getDocTitle () {
- return this.docTitle;
+ return this.getFieldValue("docTitle");
};
@@ -1058,7 +992,7 @@
* The document title of the text as a string.
*/
public void setDocTitle (String docTitle) {
- this.docTitle = docTitle;
+ this.addTextX("docTitle", docTitle);
};
@@ -1068,7 +1002,7 @@
* @return The subtitle of the document of the text as a string.
*/
public String getDocSubTitle () {
- return this.docSubTitle;
+ return this.getFieldValue("docSubTitle");
};
@@ -1080,7 +1014,7 @@
* text as a string.
*/
public void setDocSubTitle (String docSubTitle) {
- this.docSubTitle = docSubTitle;
+ this.addTextX("docSubTitle", docSubTitle);
};
@@ -1090,7 +1024,7 @@
* @return The author of the document of the text as a string.
*/
public String getDocAuthor () {
- return this.docAuthor;
+ return this.getFieldValue("docAuthor");
};
@@ -1101,7 +1035,7 @@
* The author of the document of the text as a string.
*/
public void setDocAuthor (String docAuthor) {
- this.docAuthor = docAuthor;
+ this.addTextX("docAuthor", docAuthor);
};
@@ -1111,7 +1045,7 @@
* @return The editor of the document of the text as a string.
*/
public String getDocEditor () {
- return this.docEditor;
+ return this.getFieldValue("docEditor");
};
@@ -1122,7 +1056,7 @@
* The editor of the document of the text as a string.
*/
public void setDocEditor (String docEditor) {
- this.docEditor = docEditor;
+ this.addStoredX("docEditor", docEditor);
};
@@ -1132,7 +1066,7 @@
* @return The keywords of the text as a string.
*/
public String getKeywords () {
- return this.keywords;
+ return this.getFieldValue("keywords");
};
@@ -1143,7 +1077,7 @@
* The keywords of the text as a string.
*/
public void setKeywords (String keywords) {
- this.keywords = keywords;
+ this.addKeywordsX("keywords", keywords);
};
@@ -1154,7 +1088,7 @@
* @return The tokenization information as a string.
*/
public String getTokenSource () {
- return this.tokenSource;
+ return this.getFieldValue("tokenSource");
};
@@ -1166,43 +1100,7 @@
* The tokenization information as a string.
*/
public void setTokenSource (String tokenSource) {
- this.tokenSource = tokenSource;
- };
-
-
- @Deprecated
- public void setTokenization (String tokenization) {
- this.tokenization = tokenization;
- };
-
-
- @Deprecated
- public String getTokenization () {
- return this.tokenization;
- };
-
-
- @Deprecated
- public void setLayerInfo (String layerInfo) {
- this.layerInfo = layerInfo;
- };
-
-
- @Deprecated
- public String getLayerInfo () {
- return this.layerInfo;
- };
-
-
- @Deprecated
- public void setField (String field) {
- this.field = field;
- };
-
-
- @Deprecated
- public String getField () {
- return this.field;
+ this.addStoredX("tokenSource", tokenSource);
};
@@ -1247,4 +1145,76 @@
return json;
};
+
+ @JsonIgnore
+ private String getFieldValue (String field) {
+ MetaField mf = metaFields.get(field);
+
+ if (mf != null) {
+ return metaFields.get(field).values.get(0);
+ };
+
+ return null;
+ };
+
+ @JsonIgnore
+ private void addStringX (String key, String value) {
+ metaFields.add(
+ key,
+ new MetaField(
+ key,
+ "type:string",
+ value
+ )
+ );
+ };
+
+ @JsonIgnore
+ private void addStoredX (String key, String value) {
+ metaFields.add(
+ key,
+ new MetaField(
+ key,
+ "type:attachement",
+ value
+ )
+ );
+ };
+
+ @JsonIgnore
+ private void addKeywordsX (String key, String value) {
+ metaFields.add(
+ key,
+ new MetaField(
+ key,
+ "type:keywords",
+ value
+ )
+ );
+ };
+
+ @JsonIgnore
+ private void addTextX (String key, String value) {
+ metaFields.add(
+ key,
+ new MetaField(
+ key,
+ "type:text",
+ value
+ )
+ );
+ };
+
+ @JsonIgnore
+ private void addDateX (String key, String value) {
+ metaFields.add(
+ key,
+ new MetaField(
+ key,
+ "type:date",
+ value
+ )
+ );
+ };
+
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
index 0d0f64f..b2c4e98 100644
--- a/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
+++ b/src/main/java/de/ids_mannheim/korap/index/FieldDocument.java
@@ -328,11 +328,13 @@
this.addKeyword("foundries", foundries);
super.setFoundries(foundries);
};
+ /*
if (field.containsKey("tokenization")) {
String tokenization = (String) field.get("tokenization");
this.addString("tokenization", tokenization);
super.setTokenization(tokenization);
};
+ */
this.addTV(fieldName, this.getPrimaryData(), mtts);
};
@@ -376,23 +378,23 @@
@JsonProperty("pubDate")
@Override
- public KrillDate setPubDate (String pubDate) {
- KrillDate date = super.setPubDate(pubDate);
+ public void setPubDate (String pubDate) {
+ super.setPubDate(pubDate);
+ KrillDate date = new KrillDate(pubDate);
if (date != null) {
this.addInt("pubDate", date.toString());
};
- return date;
};
@JsonProperty("creationDate")
@Override
- public KrillDate setCreationDate (String creationDate) {
- KrillDate date = super.setCreationDate(creationDate);
+ public void setCreationDate (String creationDate) {
+ super.setCreationDate(creationDate);
+ KrillDate date = new KrillDate(creationDate);
if (date != null) {
this.addInt("creationDate", date.toString());
};
- return date;
};
@@ -430,15 +432,6 @@
};
};
-
- // No longer supported
- @Override
- public void setLayerInfo (String layerInfo) {
- super.setLayerInfo(layerInfo);
- this.addStored("layerInfo", layerInfo);
- };
-
-
@Override
public void setLayerInfos (String layerInfos) {
super.setLayerInfos(layerInfos);
@@ -516,7 +509,6 @@
};
- @Override
@Deprecated
public void setLicense (String license) {
super.setAvailability(license);
@@ -530,15 +522,6 @@
this.addString("availability", availability);
};
- /*
- @Override
- public void setPages (String pages) {
- super.setPages(pages);
- this.addStored("pages", pages);
- };
- */
-
-
@Override
public void setFileEditionStatement (String fileEditionStatement) {
super.setFileEditionStatement(fileEditionStatement);
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaField.java b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
index bcedd95..270d0f9 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaField.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaField.java
@@ -21,10 +21,19 @@
public String key;
public List<String> values = new ArrayList<>();
- public MetaField (String key) {
+
+ public MetaField (String key) {
this.key = key;
};
+
+ public MetaField (String key, String type, String value) {
+ this.key = key;
+ this.type = type;
+ this.values.add(value);
+ };
+
+
/**
* Create JsonNode
*/
@@ -35,7 +44,7 @@
json.put("key", this.key);
// Value is numerical
- if (this.type.equals("type:number")) {
+ if (this.type.equals("type:integer")) {
// Value is a list
if (this.values.size() > 1) {
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaFields.java b/src/main/java/de/ids_mannheim/korap/response/MetaFields.java
index 86607fd..e2c6f0e 100644
--- a/src/main/java/de/ids_mannheim/korap/response/MetaFields.java
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaFields.java
@@ -45,6 +45,10 @@
private Map<String, MetaField> fieldsMap = new HashMap<>();
+
+ public MetaFields () {};
+
+
public MetaFields (String id) {
this.addMessage(0, "Response format is temporary");
};
@@ -100,7 +104,7 @@
// Field is a number
else {
- mf.type = "type:number";
+ mf.type = "type:integer";
mf.values.add(n.toString());
};
}
@@ -149,6 +153,12 @@
mf.values.add(s.toString());
}
+ // Special treatment for legacy indices
+ else if (mf.key.equals("UID")) {
+ mf.type = "type:integer";
+ mf.values.add(s.toString());
+ }
+
// String
else {
mf.values.add(s.toString());
@@ -161,6 +171,22 @@
};
+ /**
+ * Add field to collection
+ */
+ public void add (String key, MetaField mf) {
+ fieldsMap.put(key, mf);
+ };
+
+
+ /**
+ * Get field from collection
+ */
+ public MetaField get (String key) {
+ return fieldsMap.get(key);
+ };
+
+
/**
* Serialize response as a {@link JsonNode}.
*
diff --git a/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java b/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java
new file mode 100644
index 0000000..0df6a7d
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/response/MetaFieldsExt.java
@@ -0,0 +1,176 @@
+package de.ids_mannheim.korap.response;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.annotation.*;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+
+import de.ids_mannheim.korap.index.AbstractDocument;
+import de.ids_mannheim.korap.util.KrillDate;
+
+import java.io.IOException;
+
+import de.ids_mannheim.korap.index.KeywordAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+import java.io.StringReader;
+
+import java.util.*;
+import java.util.regex.*;
+
+import org.apache.lucene.index.*;
+
+public class MetaFieldsExt {
+
+ // Logger
+ private final static Logger log = LoggerFactory.getLogger(MetaFields.class);
+
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+ // TODO:
+ // This is a temporary indicator to check
+ // whether a date field is a date
+ private static final Pattern dateKeyPattern = Pattern.compile(".*Date$");
+
+ // Mapper for JSON serialization
+ ObjectMapper mapper = new ObjectMapper();
+
+ private Map<String, MetaField> fieldsMap = new HashMap<>();
+
+
+ public MetaFieldsExt () {};
+
+
+ /**
+ * Add field to collection
+ */
+ public void add (IndexableField iField) {
+
+ IndexableFieldType iFieldType = iField.fieldType();
+
+ // Field type needs to be restored heuristically
+ // - though that's not very elegant
+
+ // Ignore non-stored fields
+ if (!iFieldType.stored())
+ return;
+
+ MetaField mf = new MetaField(iField.name());
+
+ // Reuse existing metafield
+ if (fieldsMap.containsKey(mf.key)) {
+ mf = fieldsMap.get(mf.key);
+ }
+
+ // Add new field
+ else {
+ fieldsMap.put(mf.key, mf);
+ };
+
+ // TODO: Check if metaField exists for that field
+
+ Number n = iField.numericValue();
+ String s = iField.stringValue();
+
+ // Field has numeric value (possibly a date)
+ if (n != null) {
+
+ // Check if key indicates a date
+ Matcher dateMatcher = dateKeyPattern.matcher(mf.key);
+ if (dateMatcher.matches()) {
+ mf.type = "type:date";
+
+ // Check structure with KrillDate
+ KrillDate date = new KrillDate(n.toString());
+ if (date != null) {
+
+ // Serialize withz dash separation
+ mf.values.add(date.toDisplay());
+ };
+ }
+
+ // Field is a number
+ else {
+ mf.type = "type:number";
+ mf.values.add(n.toString());
+ };
+ }
+
+ // Field has a textual value
+ else if (s != null) {
+
+ // Stored
+ if (iFieldType.indexOptions() == IndexOptions.NONE) {
+ String value = s.toString();
+ if (value.startsWith("data:")) {
+ mf.type = "type:attachement";
+ }
+ else {
+ mf.type = "type:store";
+ };
+ mf.values.add(value);
+ }
+
+ // Keywords
+ else if (iFieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS) {
+ mf.type = "type:keywords";
+
+ // Analyze keywords
+ try {
+ StringReader reader = new StringReader(s.toString());
+ KeywordAnalyzer kwa = new KeywordAnalyzer();
+ TokenStream ts = kwa.tokenStream("-", reader);
+ CharTermAttribute term;
+ ts.reset();
+ while (ts.incrementToken()) {
+ term = ts.getAttribute(CharTermAttribute.class);
+ mf.values.add(term.toString());
+ };
+ ts.close();
+ reader.close();
+ }
+ catch (IOException e) {
+ log.error("Unable to split {}={}", iField.name(), s.toString());
+ }
+ }
+
+ // Text
+ else if (iFieldType.indexOptions() != IndexOptions.DOCS) {
+ mf.type = "type:text";
+ mf.values.add(s.toString());
+ }
+
+ // String
+ else {
+ mf.values.add(s.toString());
+ };
+ }
+
+ else {
+ log.error("Unknown field type {}", iField.name());
+ };
+ };
+
+
+ /**
+ * Add field to collection
+ */
+ public void add (String key, MetaField mf) {
+ fieldsMap.put(key, mf);
+ };
+
+
+ /**
+ * Get field from collection
+ */
+ public MetaField get (String key) {
+ return fieldsMap.get(key);
+ };
+};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKrillDocument.java b/src/test/java/de/ids_mannheim/korap/index/TestKrillDocument.java
index 26f2202..b81c489 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKrillDocument.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKrillDocument.java
@@ -99,8 +99,8 @@
krd.setTextDomain("Comment");
assertEquals("Comment", krd.getTextDomain());
- krd.setLicense("cc");
- assertEquals("cc", krd.getLicense());
+ krd.setAvailability("cc");
+ assertEquals("cc", krd.getAvailability());
/*
krd.setPages("56-78");
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
index f643944..2c86d10 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
@@ -258,7 +258,7 @@
break;
case "zahl1":
- assertEquals("type:number", field.at("/type").asText());
+ assertEquals("type:integer", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals(56, field.at("/value").asInt());
checkC++;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index f87f4b1..1d6bfaf 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -221,7 +221,7 @@
km.getSnippetHTML());
res = mapper.readTree(km.toJsonString());
- assertEquals("tokens", res.at("/field").asText());
+ // assertEquals("tokens", res.at("/field").asText());
assertTrue(res.at("/startMore").asBoolean());
assertTrue(res.at("/endMore").asBoolean());
assertEquals("c1", res.at("/corpusID").asText());
@@ -294,7 +294,7 @@
true, (String) null, (String) null, true, true, true);
JsonNode res = mapper.readTree(km.toJsonString());
- assertEquals("tokens", res.at("/field").asText());
+ // assertEquals("tokens", res.at("/field").asText());
assertEquals("GOE_AGX.00002", res.at("/textSigle").asText());
assertEquals("Goethe, Johann Wolfgang von", res.at("/author").asText());
};
@@ -923,7 +923,7 @@
Match km = ki.getMatchInfo("match-c1!d4-p3-9", "tokens", null, null,
false, false);
JsonNode res = mapper.readTree(km.toJsonString());
- assertEquals("tokens", res.at("/field").asText());
+ // assertEquals("tokens", res.at("/field").asText());
assertTrue(res.at("/startMore").asBoolean());
assertTrue(res.at("/endMore").asBoolean());
assertEquals("c1", res.at("/corpusID").asText());
@@ -945,7 +945,7 @@
false, false);
JsonNode res = mapper.readTree(km.toJsonString());
- assertEquals("tokens", res.at("/field").asText());
+ // assertEquals("tokens", res.at("/field").asText());
assertTrue(res.at("/startMore").asBoolean());
assertTrue(res.at("/endMore").asBoolean());
assertEquals("c1", res.at("/corpusSigle").asText());
@@ -1064,7 +1064,7 @@
Match km = ki.getMatchInfo("match-ca1!da1-p7-10", "tokens", null, null,
false, false);
JsonNode res = mapper.readTree(km.toJsonString());
- assertEquals("tokens", res.at("/field").asText());
+ // assertEquals("tokens", res.at("/field").asText());
assertTrue(res.at("/startMore").asBoolean());
assertTrue(res.at("/endMore").asBoolean());
assertEquals("ca1", res.at("/corpusID").asText());
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index bc12a15..87eb365 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -516,7 +516,7 @@
assertNull(fd.getTextColumn());
assertNull(fd.getTextDomain());
// assertEquals(fd.getPages(), "529-547");
- assertEquals(fd.getLicense(), "QAO-NC");
+ assertEquals(fd.getAvailability(), "QAO-NC");
assertEquals(fd.getCreationDate().toString(), "18200000");
assertEquals(fd.getPubDate().toString(), "19820000");
assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
@@ -673,7 +673,7 @@
assertEquals(fd.getTextTypeRef(), "Tageszeitung");
assertEquals(fd.getTextDomain(), "Politik");
assertEquals(fd.getCreationDate().toString(), "19590219");
- assertEquals(fd.getLicense(), "ACA-NC-LC");
+ assertEquals(fd.getAvailability(), "ACA-NC-LC");
assertEquals(fd.getTextColumn(), "POLITIK");
// assertNull(fd.getPages());
assertEquals(fd.getTextClass(), "politik ausland");
@@ -1273,7 +1273,7 @@
assertNull(fd.getTextColumn());
assertNull(fd.getTextDomain());
// assertEquals(fd.getPages(), "529-547");
- assertEquals(fd.getLicense(), "QAO-NC");
+ assertEquals(fd.getAvailability(), "QAO-NC");
assertEquals(fd.getCreationDate().toString(), "18200000");
assertEquals(fd.getPubDate().toString(), "19820000");
assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
index 4bf61c7..6a0f710 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestMetaFields.java
@@ -59,7 +59,6 @@
ObjectMapper mapper = new ObjectMapper();
JsonNode res = mapper.readTree(kr.toJsonString());
- // System.err.println(res.toString());
// mirror fields
assertEquals(9, res.at("/meta/count").asInt());
@@ -207,7 +206,7 @@
res.at("/matches/0/foundries").asText());
assertEquals("Goethe-Korpus",
res.at("/matches/0/corpusTitle").asText());
- assertEquals("QAO-NC", res.at("/matches/0/license").asText());
+ assertEquals("QAO-NC", res.at("/matches/0/availability").asText());
assertEquals("Goethe: Maximen und Reflexionen, (1827-1842)",
res.at("/matches/0/docTitle").asText());
assertEquals("1827", res.at("/matches/0/creationDate").asText());
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestResult.java b/src/test/java/de/ids_mannheim/korap/search/TestResult.java
index ac4f775..506d246 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestResult.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestResult.java
@@ -68,7 +68,7 @@
assertEquals("token", res.at("/meta/context/right/0").asText());
assertEquals(6, res.at("/meta/context/right/1").asInt());
- assertEquals("base", res.at("/matches/0/field").asText());
+ // assertEquals("base", res.at("/matches/0/field").asText());
/*
Probably a Jackson bug
assertTrue(res.at("/matches/0/startMore").asBoolean());
@@ -82,7 +82,7 @@
"<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-1 level-0\">a</mark></mark></span><span class=\"context-right\">bab</span>",
res.at("/matches/0/snippet").asText());
- assertEquals("base", res.at("/matches/6/field").asText());
+ // assertEquals("base", res.at("/matches/6/field").asText());
/*
Probably a Jackson bug
assertEquals(true, res.at("/matches/6/startMore").asBoolean());