| Nils Diewald | e4986d7 | 2015-02-27 17:35:00 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.index; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 2 | |
| 3 | import java.util.*; |
| 4 | |
| Nils Diewald | c383ed0 | 2015-02-26 21:35:22 +0000 | [diff] [blame] | 5 | import de.ids_mannheim.korap.util.KrillDate; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 6 | import de.ids_mannheim.korap.index.FieldDocument; |
| Nils Diewald | 0881e24 | 2015-02-27 17:31:01 +0000 | [diff] [blame] | 7 | import de.ids_mannheim.korap.response.Response; |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 8 | import de.ids_mannheim.korap.response.MetaField; |
| Akron | 50e5f61 | 2019-01-16 12:52:39 +0100 | [diff] [blame] | 9 | import de.ids_mannheim.korap.response.MetaFieldsObj; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 10 | |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 11 | import org.apache.lucene.document.Document; |
| 12 | import org.apache.lucene.index.IndexableField; |
| 13 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 14 | import com.fasterxml.jackson.annotation.*; |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 15 | import com.fasterxml.jackson.annotation.JsonInclude.Include; |
| 16 | import com.fasterxml.jackson.databind.JsonNode; |
| 17 | import com.fasterxml.jackson.databind.ObjectMapper; |
| 18 | import com.fasterxml.jackson.databind.node.ObjectNode; |
| 19 | |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 20 | import com.fasterxml.jackson.databind.node.TextNode; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 21 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 22 | /* |
| 23 | * Todo:: Author and textClass may be arrays! |
| 24 | */ |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 25 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 26 | /** |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 27 | * Abstract class representing a document in the |
| Nils Diewald | 15aa348 | 2015-02-26 18:14:34 +0000 | [diff] [blame] | 28 | * Krill index. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 29 | * |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 30 | * This model is rather specific to DeReKo data and |
| Akron | 8798be8 | 2016-06-23 23:10:25 +0200 | [diff] [blame] | 31 | * should be considered experimental. It will be replaced |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 32 | * by a more agnostic model. |
| Akron | 8798be8 | 2016-06-23 23:10:25 +0200 | [diff] [blame] | 33 | * string fields, e.g. will be combined with a prefix. |
| 34 | * For example d:pubDate will mean: A field with the key "pubDate" |
| 35 | * of type date. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 36 | * |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 37 | * @author diewald |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 38 | */ |
| Akron | d7d7b1f | 2016-06-25 00:31:16 +0200 | [diff] [blame] | 39 | @JsonInclude(Include.NON_EMPTY) |
| Akron | 7410746 | 2019-01-10 11:32:54 +0100 | [diff] [blame] | 40 | // @JsonIgnoreProperties(ignoreUnknown = true) |
| Akron | be9638d | 2019-02-07 17:09:42 +0100 | [diff] [blame] | 41 | |
| Nils Diewald | 0881e24 | 2015-02-27 17:31:01 +0000 | [diff] [blame] | 42 | public abstract class AbstractDocument extends Response { |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 43 | ObjectMapper mapper = new ObjectMapper(); |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 44 | |
| Nils Diewald | 3e3cbf3 | 2015-02-06 21:30:49 +0000 | [diff] [blame] | 45 | private String primaryData; |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 46 | |
| Akron | 7410746 | 2019-01-10 11:32:54 +0100 | [diff] [blame] | 47 | private static HashSet<String> legacyStringFields = |
| 48 | new HashSet<String>(Arrays.asList( |
| 49 | "pubPlace", |
| 50 | "textSigle", |
| 51 | "docSigle", |
| 52 | "corpusSigle", |
| 53 | "textType", |
| 54 | "textTypeArt", |
| 55 | "textTypeRef", |
| 56 | "textColumn", |
| 57 | "textDomain", |
| 58 | "availability", |
| 59 | "language", |
| 60 | "corpusID", // Deprecated! |
| 61 | "ID" // Deprecated! |
| 62 | )); |
| 63 | |
| 64 | private static HashSet<String> legacyTextFields = |
| 65 | new HashSet<String>(Arrays.asList( |
| 66 | "author", |
| 67 | "title", |
| 68 | "subTitle", |
| 69 | "corpusTitle", |
| 70 | "corpusSubTitle", |
| 71 | "corpusAuthor", |
| 72 | "docTitle", |
| 73 | "docSubTitle", |
| 74 | "docAuthor" |
| 75 | )); |
| 76 | |
| 77 | private static HashSet<String> legacyKeywordsFields = |
| 78 | new HashSet<String>(Arrays.asList( |
| 79 | "textClass", |
| 80 | "foundries", |
| 81 | "keywords" |
| 82 | )); |
| 83 | |
| 84 | private static HashSet<String> legacyStoredFields = |
| 85 | new HashSet<String>(Arrays.asList( |
| 86 | "docEditor", |
| 87 | "tokenSource", |
| 88 | "layerInfos", |
| 89 | "publisher", |
| 90 | "editor", |
| 91 | "fileEditionStatement", |
| 92 | "biblEditionStatement", |
| 93 | "reference", |
| 94 | "corpusEditor" |
| 95 | )); |
| 96 | |
| 97 | private static HashSet<String> legacyDateFields = |
| 98 | new HashSet<String>(Arrays.asList( |
| 99 | "pubDate", |
| 100 | "creationDate" |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 101 | )); |
| Akron | 7410746 | 2019-01-10 11:32:54 +0100 | [diff] [blame] | 102 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 103 | @JsonIgnore |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 104 | public int internalDocID, localDocID, UID; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 105 | |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 106 | @JsonIgnore |
| Akron | 50e5f61 | 2019-01-16 12:52:39 +0100 | [diff] [blame] | 107 | public MetaFieldsObj mFields = new MetaFieldsObj(); |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 108 | |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 109 | /** |
| 110 | * Populate document meta information with information coming from |
| 111 | * the index. |
| 112 | * |
| 113 | * @param doc |
| 114 | * Document object. |
| 115 | * @param field |
| 116 | * Primary data field. |
| 117 | */ |
| 118 | public void populateDocument (Document doc, String field) { |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 119 | List<String> fieldList = new ArrayList<>(32); |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 120 | Iterator<IndexableField> fieldIterator = doc.getFields().iterator(); |
| 121 | while (fieldIterator.hasNext()) |
| 122 | fieldList.add(fieldIterator.next().name()); |
| 123 | |
| 124 | this.populateDocument(doc, field, fieldList); |
| 125 | }; |
| 126 | |
| Akron | 50e5f61 | 2019-01-16 12:52:39 +0100 | [diff] [blame] | 127 | /** |
| 128 | * Populate document meta information with information coming from |
| 129 | * the index. |
| 130 | * |
| 131 | * @param doc |
| 132 | * Document object. |
| 133 | * @param field |
| 134 | * Primary data field. |
| 135 | * @param fields |
| 136 | * Hash object with all supported fields. |
| 137 | */ |
| 138 | public void populateDocument (Document doc, String field, |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 139 | List<String> fields) { |
| Akron | fbc7616 | 2019-06-04 15:51:09 +0200 | [diff] [blame] | 140 | if (field != null) |
| 141 | this.setPrimaryData(doc.get(field)); |
| Akron | 50e5f61 | 2019-01-16 12:52:39 +0100 | [diff] [blame] | 142 | this.populateFields(doc, fields); |
| 143 | }; |
| 144 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 145 | |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 146 | public void populateFields (Document doc) { |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 147 | ArrayList<String> fieldList = new ArrayList<>(32); |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 148 | Iterator<IndexableField> fieldIterator = doc.getFields().iterator(); |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 149 | while (fieldIterator.hasNext()) { |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 150 | fieldList.add(fieldIterator.next().name()); |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 151 | }; |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 152 | |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 153 | // TODO: Sort alphabetically! |
| 154 | |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 155 | this.populateFields(doc, fieldList); |
| 156 | }; |
| 157 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 158 | |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 159 | public void populateFields (Document doc, List<String> fields) { |
| Akron | de4f085 | 2019-01-16 16:29:44 +0100 | [diff] [blame] | 160 | Iterator<String> fieldsIter = fields.iterator(); |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 161 | |
| 162 | if (fields.contains("UID")) { |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 163 | this.setUID(doc.get("UID")); |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 164 | }; |
| Akron | de4f085 | 2019-01-16 16:29:44 +0100 | [diff] [blame] | 165 | |
| Akron | be9638d | 2019-02-07 17:09:42 +0100 | [diff] [blame] | 166 | // fieldsIter = fields.iterator(); |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 167 | mFields.fieldsOrder = new ArrayList<>(16); |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 168 | |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 169 | while (fieldsIter.hasNext()) { |
| 170 | String name = fieldsIter.next(); |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 171 | |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 172 | // Remember - never serialize "tokens" |
| Akron | 1a975d1 | 2019-02-05 13:13:06 +0100 | [diff] [blame] | 173 | if (name.equals("tokens") || name.equals("UID")) |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 174 | continue; |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 175 | |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 176 | mFields.fieldsOrder.add(name); |
| 177 | |
| Akron | be9638d | 2019-02-07 17:09:42 +0100 | [diff] [blame] | 178 | // Ignore fields already set |
| 179 | if (mFields.contains(name)) { |
| 180 | continue; |
| 181 | }; |
| 182 | |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 183 | IndexableField iField = doc.getField(name); |
| 184 | |
| 185 | if (iField == null) |
| 186 | continue; |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 187 | |
| 188 | MetaField mf = mFields.add(iField); |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 189 | |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 190 | // Legacy |
| Akron | 1a975d1 | 2019-02-05 13:13:06 +0100 | [diff] [blame] | 191 | if (name.equals("license")) |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 192 | this.addString("availability", doc.get("license")); |
| 193 | |
| 194 | }; |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 195 | }; |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 196 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 197 | |
| 198 | /** |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 199 | * Get the unique identifier of the document. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 200 | * |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 201 | * @return The unique identifier of the document as an integer. |
| 202 | */ |
| Nils Diewald | ff6f766 | 2014-09-21 15:08:52 +0000 | [diff] [blame] | 203 | @JsonProperty("UID") |
| 204 | public int getUID () { |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 205 | return this.UID; |
| 206 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 207 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 208 | |
| 209 | /** |
| 210 | * Set the unique identifier of the document. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 211 | * |
| 212 | * @param UID |
| 213 | * The unique identifier of the document as an integer. |
| Nils Diewald | d37f7e4 | 2015-02-27 21:08:22 +0000 | [diff] [blame] | 214 | * @return The invocant for chaining. |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 215 | */ |
| 216 | public void setUID (int UID) { |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 217 | if (UID != 0) { |
| 218 | this.UID = UID; |
| 219 | this.addString("UID", new Integer(UID).toString()); |
| 220 | } |
| Nils Diewald | ff6f766 | 2014-09-21 15:08:52 +0000 | [diff] [blame] | 221 | }; |
| 222 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 223 | |
| 224 | /** |
| 225 | * Set the unique identifier of the document. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 226 | * |
| 227 | * @param UID |
| 228 | * The unique identifier of the document as a |
| 229 | * string representing an integer. |
| Nils Diewald | d37f7e4 | 2015-02-27 21:08:22 +0000 | [diff] [blame] | 230 | * @return The invocant for chaining. |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 231 | * @throws NumberFormatException |
| 232 | */ |
| 233 | public void setUID (String UID) throws NumberFormatException { |
| Akron | d7d7b1f | 2016-06-25 00:31:16 +0200 | [diff] [blame] | 234 | if (UID != null) { |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 235 | this.UID = Integer.parseInt(UID); |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 236 | this.addString("UID", new Integer(this.UID).toString()); |
| Akron | d7d7b1f | 2016-06-25 00:31:16 +0200 | [diff] [blame] | 237 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 238 | }; |
| 239 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 240 | |
| 241 | /** |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 242 | * Get the primary data of the document. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 243 | * |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 244 | * @return The primary data of the document as a string. |
| 245 | */ |
| Akron | d504f21 | 2015-06-20 00:27:54 +0200 | [diff] [blame] | 246 | @JsonIgnore |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 247 | public String getPrimaryData () { |
| 248 | if (this.primaryData == null) |
| 249 | return ""; |
| Nils Diewald | 3e3cbf3 | 2015-02-06 21:30:49 +0000 | [diff] [blame] | 250 | return this.primaryData; |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 251 | }; |
| 252 | |
| 253 | |
| 254 | /** |
| 255 | * Get the primary data of the document, |
| 256 | * starting with a given character offset. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 257 | * |
| 258 | * @param startOffset |
| 259 | * The starting character offset. |
| 260 | * @return The substring of primary data of the document as a |
| 261 | * string. |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 262 | */ |
| Akron | d504f21 | 2015-06-20 00:27:54 +0200 | [diff] [blame] | 263 | @JsonIgnore |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 264 | public String getPrimaryData (int startOffset) { |
| 265 | return this.primaryData.substring(startOffset); |
| 266 | }; |
| 267 | |
| 268 | |
| 269 | /** |
| 270 | * Get the primary data of the document, |
| 271 | * starting with a given character offset and ending |
| 272 | * with a given character offset. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 273 | * |
| 274 | * @param startOffset |
| 275 | * The starting character offset. |
| 276 | * @param endOffset |
| 277 | * The ending character offset. |
| 278 | * @return The substring of the primary data of the document as a |
| 279 | * string. |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 280 | */ |
| Akron | d504f21 | 2015-06-20 00:27:54 +0200 | [diff] [blame] | 281 | @JsonIgnore |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 282 | public String getPrimaryData (int startOffset, int endOffset) { |
| 283 | return this.primaryData.substring(startOffset, endOffset); |
| 284 | }; |
| 285 | |
| 286 | |
| 287 | /** |
| 288 | * Set the primary data of the document. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 289 | * |
| 290 | * @param primary |
| 291 | * The primary data of the document |
| 292 | * as a string. |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 293 | */ |
| Nils Diewald | 3e3cbf3 | 2015-02-06 21:30:49 +0000 | [diff] [blame] | 294 | public void setPrimaryData (String primary) { |
| Akron | 75d0f38 | 2019-03-15 14:56:03 +0100 | [diff] [blame] | 295 | // Java can't work with utf-8 substrings as defined in the input data, |
| 296 | // That's why substringing fails on surrogates. This is a workaround |
| 297 | // to remove surrogates to make substringing work again. |
| 298 | // It would probably be better to fix this before the data hits the index, |
| 299 | // but we have to work with old indices as well. |
| 300 | this.primaryData = primary.replaceAll("[^\u0000-\uffff]", "?"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 301 | }; |
| 302 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 303 | /** |
| 304 | * Get the length of the primary data of the document |
| 305 | * (i.e. the number of characters). |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 306 | * |
| 307 | * @return The length of the primary data of the document as an |
| 308 | * integer. |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 309 | */ |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 310 | @JsonIgnore |
| 311 | public int getPrimaryDataLength () { |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 312 | return this.primaryData.length(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 313 | }; |
| Nils Diewald | 4972994 | 2013-11-27 20:30:07 +0000 | [diff] [blame] | 314 | |
| Nils Diewald | 4972994 | 2013-11-27 20:30:07 +0000 | [diff] [blame] | 315 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 316 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 317 | * Get the text sigle as a string. |
| 318 | * |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 319 | * @return The text sigle as a string. |
| 320 | */ |
| Akron | be9638d | 2019-02-07 17:09:42 +0100 | [diff] [blame] | 321 | @JsonIgnore |
| Nils Diewald | ba197f2 | 2014-11-01 17:21:46 +0000 | [diff] [blame] | 322 | public String getTextSigle () { |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 323 | return this.getFieldValue("textSigle"); |
| Nils Diewald | ba197f2 | 2014-11-01 17:21:46 +0000 | [diff] [blame] | 324 | }; |
| 325 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 326 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 327 | /** |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 328 | * Get the document sigle as a string. |
| 329 | * |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 330 | * @return The document sigle as a string. |
| 331 | */ |
| Akron | be9638d | 2019-02-07 17:09:42 +0100 | [diff] [blame] | 332 | @JsonIgnore |
| Nils Diewald | ba197f2 | 2014-11-01 17:21:46 +0000 | [diff] [blame] | 333 | public String getDocSigle () { |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 334 | return this.getFieldValue("docSigle"); |
| Nils Diewald | ba197f2 | 2014-11-01 17:21:46 +0000 | [diff] [blame] | 335 | }; |
| 336 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 337 | |
| 338 | /** |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 339 | * Get the corpus sigle as a string. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 340 | * |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 341 | * @return The corpus sigle as a string. |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 342 | */ |
| Akron | be9638d | 2019-02-07 17:09:42 +0100 | [diff] [blame] | 343 | @JsonIgnore |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 344 | public String getCorpusSigle () { |
| 345 | return this.getFieldValue("corpusSigle"); |
| Nils Diewald | ba197f2 | 2014-11-01 17:21:46 +0000 | [diff] [blame] | 346 | }; |
| 347 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 348 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 349 | @Deprecated |
| Akron | be9638d | 2019-02-07 17:09:42 +0100 | [diff] [blame] | 350 | @JsonIgnore |
| Akron | bb117b3 | 2019-01-21 13:57:55 +0100 | [diff] [blame] | 351 | public String getAvailability () { |
| 352 | return this.getFieldValue("availability"); |
| 353 | }; |
| 354 | |
| 355 | |
| 356 | @Deprecated |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 357 | @JsonProperty("corpusID") |
| 358 | public String getCorpusID () { |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 359 | return this.getFieldValue("corpusID"); |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 360 | }; |
| 361 | |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 362 | @Deprecated |
| 363 | @JsonProperty("ID") |
| 364 | public String getID () { |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 365 | return this.getFieldValue("ID"); |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 366 | }; |
| 367 | |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 368 | @JsonAnyGetter |
| 369 | public Map<String, JsonNode> getLegacyMetaFields () { |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 370 | |
| 371 | Iterator<MetaField> mfIterator = mFields.iterator(); |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 372 | |
| 373 | HashMap<String, JsonNode> map = new HashMap<>(); |
| 374 | |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 375 | while (mfIterator.hasNext()) { |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 376 | MetaField mf = mfIterator.next(); |
| 377 | if (mf == null) |
| 378 | continue; |
| 379 | String mfs = mf.key; |
| Akron | be9638d | 2019-02-07 17:09:42 +0100 | [diff] [blame] | 380 | |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 381 | String value = this.getFieldValue(mfs); |
| 382 | if (value != null && ( |
| 383 | legacyDateFields.contains(mfs) || |
| 384 | legacyStoredFields.contains(mfs) || |
| 385 | legacyTextFields.contains(mfs) || |
| 386 | legacyStringFields.contains(mfs) || |
| 387 | legacyKeywordsFields.contains(mfs) || |
| 388 | legacyDateFields.contains(mfs) |
| 389 | ) |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 390 | ) { |
| Akron | 1a8bb76 | 2019-01-18 15:48:59 +0100 | [diff] [blame] | 391 | map.put(mfs, new TextNode(value)); |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 392 | } |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 393 | }; |
| 394 | |
| 395 | return map; |
| 396 | } |
| 397 | |
| 398 | |
| Akron | 7410746 | 2019-01-10 11:32:54 +0100 | [diff] [blame] | 399 | @JsonAnySetter |
| 400 | public void setLegacyMetaField (String name, JsonNode value) { |
| 401 | |
| 402 | // Treat legacy string fields |
| 403 | if (legacyStringFields.contains(name)) { |
| 404 | this.addString(name, value.asText()); |
| 405 | } |
| Nils Diewald | 44d5fa1 | 2015-01-15 21:31:52 +0000 | [diff] [blame] | 406 | |
| Akron | 7410746 | 2019-01-10 11:32:54 +0100 | [diff] [blame] | 407 | // Treat legacy text fields |
| 408 | else if (legacyTextFields.contains(name)) { |
| 409 | this.addText(name, value.asText()); |
| 410 | } |
| 411 | |
| 412 | // Treat legacy keyword fields |
| 413 | else if (legacyKeywordsFields.contains(name)) { |
| 414 | this.addKeywords(name, value.asText()); |
| 415 | } |
| 416 | |
| 417 | // Treat legacy stored fields |
| 418 | else if (legacyStoredFields.contains(name)) { |
| 419 | this.addStored(name, value.asText()); |
| 420 | } |
| 421 | |
| 422 | // Treat legacy date fields |
| 423 | else if (legacyDateFields.contains(name)) { |
| 424 | this.addDate(name, value.asText()); |
| 425 | } |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 426 | |
| Akron | 7410746 | 2019-01-10 11:32:54 +0100 | [diff] [blame] | 427 | else if (name.equals("license")) { |
| 428 | this.addString("availability", value.asText()); |
| 429 | } |
| 430 | |
| 431 | // Temporarily - treat legacy store values introduced for Sgbr |
| 432 | else if (name.equals("store")) { |
| 433 | // TODO: Store all values |
| 434 | }; |
| 435 | // |
| 436 | // else { |
| 437 | // System.err.println("Unknown field: " + name); |
| 438 | // }; |
| Nils Diewald | ba197f2 | 2014-11-01 17:21:46 +0000 | [diff] [blame] | 439 | }; |
| Akron | 7410746 | 2019-01-10 11:32:54 +0100 | [diff] [blame] | 440 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 441 | |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 442 | /** |
| 443 | * Serialize response as a {@link JsonNode}. |
| 444 | * |
| 445 | * @return {@link JsonNode} representation of the response |
| 446 | */ |
| 447 | @Override |
| 448 | public JsonNode toJsonNode () { |
| 449 | ObjectNode json = (ObjectNode) super.toJsonNode(); |
| 450 | json.putAll((ObjectNode) mapper.valueToTree(this)); |
| Akron | d7d7b1f | 2016-06-25 00:31:16 +0200 | [diff] [blame] | 451 | |
| 452 | if (this.getUID() == 0) |
| 453 | json.remove("UID"); |
| 454 | |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 455 | return json; |
| 456 | }; |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 457 | |
| 458 | @JsonIgnore |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 459 | public String getFieldValue (String field) { |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 460 | MetaField mf = mFields.get(field); |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 461 | |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 462 | if (mf != null && mf.values.size() > 0) { |
| 463 | return String.join( |
| 464 | " ", |
| 465 | mf.values |
| 466 | ); |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 467 | }; |
| 468 | |
| 469 | return null; |
| 470 | }; |
| 471 | |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 472 | |
| 473 | @JsonIgnore |
| 474 | public KrillDate getFieldValueAsDate (String field) { |
| 475 | String date = this.getFieldValue(field); |
| 476 | |
| 477 | if (date == null) |
| 478 | return null; |
| 479 | |
| 480 | return new KrillDate(date); |
| 481 | }; |
| 482 | |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 483 | @JsonIgnore |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 484 | public void addString (String key, String value) { |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 485 | if (value == null) |
| 486 | return; |
| 487 | |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 488 | mFields.add( |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 489 | new MetaField( |
| 490 | key, |
| 491 | "type:string", |
| 492 | value |
| 493 | ) |
| 494 | ); |
| 495 | }; |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 496 | |
| 497 | |
| 498 | @JsonIgnore |
| 499 | public void addInt (String key, String value) { |
| 500 | if (value == null) |
| 501 | return; |
| 502 | |
| 503 | mFields.add( |
| 504 | new MetaField( |
| 505 | key, |
| 506 | "type:integer", |
| 507 | value |
| 508 | ) |
| 509 | ); |
| 510 | }; |
| 511 | |
| 512 | @JsonIgnore |
| 513 | public void addInt (String key, int value) { |
| 514 | this.addInt(key, new Integer(value).toString()); |
| 515 | }; |
| 516 | |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 517 | |
| 518 | @JsonIgnore |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 519 | public void addStored (String key, String value) { |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 520 | if (value == null) |
| 521 | return; |
| 522 | |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 523 | mFields.add( |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 524 | new MetaField( |
| 525 | key, |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 526 | "type:store", |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 527 | value |
| 528 | ) |
| 529 | ); |
| 530 | }; |
| Akron | 4376e74 | 2019-01-16 15:02:30 +0100 | [diff] [blame] | 531 | |
| 532 | |
| 533 | @JsonIgnore |
| 534 | public void addAttachement (String key, String value) { |
| 535 | if (value == null) |
| 536 | return; |
| 537 | |
| 538 | mFields.add( |
| 539 | new MetaField( |
| 540 | key, |
| 541 | "type:attachement", |
| 542 | value |
| 543 | ) |
| 544 | ); |
| 545 | }; |
| 546 | |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 547 | |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 548 | @JsonIgnore |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 549 | public void addKeywords (String key, String value) { |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 550 | if (value == null) |
| 551 | return; |
| 552 | |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 553 | mFields.add( |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 554 | new MetaField( |
| 555 | key, |
| 556 | "type:keywords", |
| 557 | value |
| 558 | ) |
| 559 | ); |
| 560 | }; |
| 561 | |
| 562 | @JsonIgnore |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 563 | public void addText (String key, String value) { |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 564 | if (value == null) |
| 565 | return; |
| 566 | |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 567 | mFields.add( |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 568 | new MetaField( |
| 569 | key, |
| 570 | "type:text", |
| 571 | value |
| 572 | ) |
| 573 | ); |
| 574 | }; |
| 575 | |
| 576 | @JsonIgnore |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 577 | public void addDate (String key, String value) { |
| Akron | 2b921a6 | 2019-01-14 18:52:45 +0100 | [diff] [blame] | 578 | if (value == null) |
| 579 | return; |
| 580 | |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 581 | KrillDate date = new KrillDate(value); |
| Akron | c7a2abc | 2019-01-17 14:21:34 +0100 | [diff] [blame] | 582 | |
| 583 | if (date == null) |
| 584 | return; |
| 585 | |
| Akron | a6dabb7 | 2019-01-09 13:09:41 +0100 | [diff] [blame] | 586 | mFields.add( |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 587 | new MetaField( |
| 588 | key, |
| 589 | "type:date", |
| Akron | 32b9519 | 2019-01-11 13:58:55 +0100 | [diff] [blame] | 590 | date.toDisplay() |
| Akron | e64cc16 | 2019-01-08 18:40:37 +0100 | [diff] [blame] | 591 | ) |
| 592 | ); |
| 593 | }; |
| Akron | c7a2abc | 2019-01-17 14:21:34 +0100 | [diff] [blame] | 594 | |
| 595 | @JsonIgnore |
| 596 | public void addDate (String key, int value) { |
| 597 | this.addDate(key, new Integer(value).toString()); |
| 598 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 599 | }; |