| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap; |
| 2 | |
| 3 | import java.io.*; |
| 4 | import java.util.*; |
| 5 | |
| 6 | import com.fasterxml.jackson.databind.JsonNode; |
| 7 | import com.fasterxml.jackson.databind.ObjectMapper; |
| 8 | import com.fasterxml.jackson.databind.node.*; |
| 9 | |
| Nils Diewald | 65449ff | 2015-02-27 17:57:29 +0000 | [diff] [blame] | 10 | import de.ids_mannheim.korap.response.SearchContext; |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 11 | import de.ids_mannheim.korap.util.QueryException; |
| 12 | import de.ids_mannheim.korap.response.Notifications; |
| 13 | |
| 14 | import org.slf4j.Logger; |
| 15 | import org.slf4j.LoggerFactory; |
| 16 | |
| 17 | // Todo: Set timeout default value per config file |
| Akron | 98b7854 | 2015-08-06 21:43:08 +0200 | [diff] [blame] | 18 | public final class KrillMeta extends Notifications { |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 19 | // <legacy> |
| 20 | private boolean cutOff = false; |
| 21 | // </legacy> |
| 22 | |
| 23 | private int limit = 0; |
| 24 | private short count = 25, countMax = 50; |
| 25 | private int startIndex = 0; |
| 26 | private short itemsPerResource = 0; |
| 27 | private SearchContext context; |
| 28 | |
| 29 | private HashSet<String> fields; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 30 | HashSet<Integer> highlights; |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 31 | |
| 32 | // Timeout search after milliseconds |
| 33 | private long timeout = (long) 120_000; |
| 34 | // private long timeoutStart = Long.MIN_VALUE; |
| 35 | |
| 36 | // Logger |
| 37 | private final static Logger log = LoggerFactory.getLogger(Krill.class); |
| 38 | |
| 39 | { |
| 40 | fields = new HashSet<String>(16); |
| 41 | |
| Akron | 12f1f5b | 2015-06-24 15:56:52 +0200 | [diff] [blame] | 42 | // TODO: Support @all |
| 43 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 44 | // Lift following fields per default |
| 45 | // These fields are chosen for <legacy /> reasons |
| Akron | 394607a | 2017-05-29 13:27:37 +0200 | [diff] [blame] | 46 | for (String field : new String[] { |
| 47 | "ID", |
| 48 | "UID", |
| 49 | "textSigle", |
| 50 | "corpusID", |
| 51 | "author", |
| 52 | "title", |
| 53 | "subTitle", |
| 54 | "textClass", |
| 55 | "pubPlace", |
| 56 | "pubDate", |
| 57 | // "foundries", |
| Akron | 3e0403f | 2015-06-24 20:59:13 +0200 | [diff] [blame] | 58 | // "tokenization", |
| Akron | 12f1f5b | 2015-06-24 15:56:52 +0200 | [diff] [blame] | 59 | // New: |
| Akron | 394607a | 2017-05-29 13:27:37 +0200 | [diff] [blame] | 60 | "availability", |
| 61 | "layerInfos", |
| 62 | "docSigle", |
| 63 | "corpusSigle" |
| 64 | }) { |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 65 | fields.add(field); |
| 66 | }; |
| 67 | |
| 68 | // Classes used for highlights |
| 69 | highlights = new HashSet<Integer>(3); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 70 | context = new SearchContext(); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 71 | }; |
| 72 | |
| 73 | |
| 74 | public KrillMeta () {}; |
| 75 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 76 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 77 | public KrillMeta (JsonNode json) { |
| 78 | this.fromJson(json); |
| 79 | }; |
| 80 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 81 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 82 | public KrillMeta (String json) { |
| 83 | try { |
| 84 | this.fromJson(json); |
| 85 | } |
| 86 | catch (QueryException q) { |
| 87 | this.addError(q.getErrorCode(), q.getMessage()); |
| 88 | }; |
| 89 | }; |
| 90 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 91 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 92 | public KrillMeta fromJson (String json) throws QueryException { |
| 93 | JsonNode jsonN; |
| 94 | try { |
| 95 | // Read Json string |
| 96 | jsonN = new ObjectMapper().readValue(json, JsonNode.class); |
| 97 | } |
| 98 | |
| 99 | // Something went wrong |
| 100 | catch (IOException e) { |
| 101 | String msg = e.getMessage(); |
| 102 | log.warn("Unable to parse JSON: " + msg.split("\n")[0]); |
| 103 | throw new QueryException(621, "Unable to parse JSON"); |
| 104 | }; |
| 105 | |
| 106 | // Deserialize from node |
| 107 | return this.fromJson(jsonN); |
| 108 | }; |
| 109 | |
| 110 | |
| 111 | public KrillMeta fromJson (JsonNode json) { |
| 112 | // The object type of meta is undefined in KoralQuery, |
| 113 | // so it may or may have no @type |
| 114 | |
| 115 | // The query is nested in a parent query |
| 116 | if (!json.has("@type") && json.has("meta")) |
| 117 | json = json.get("meta"); |
| 118 | |
| 119 | // Defined cutOff |
| 120 | // <legacy> |
| 121 | if (json.has("cutOff")) |
| 122 | this.setCutOff(json.get("cutOff").asBoolean()); |
| 123 | // </legacy> |
| 124 | |
| 125 | // Defined count |
| 126 | if (json.has("count")) |
| 127 | this.setCount(json.get("count").asInt()); |
| 128 | |
| 129 | // Defined startIndex |
| 130 | if (json.has("startIndex")) |
| 131 | this.setStartIndex(json.get("startIndex").asInt()); |
| 132 | |
| 133 | // Defined startPage |
| 134 | if (json.has("startPage")) |
| 135 | this.setStartPage(json.get("startPage").asInt()); |
| 136 | |
| 137 | // Defined timeout |
| 138 | if (json.has("timeout")) |
| 139 | this.setTimeOut(json.get("timeout").asLong()); |
| 140 | |
| 141 | // Defined resource count |
| 142 | if (json.has("itemsPerResource")) |
| 143 | this.setItemsPerResource(json.get("itemsPerResource").asInt()); |
| 144 | |
| 145 | // Defined context |
| 146 | if (json.has("context")) |
| 147 | this.context.fromJson(json.get("context")); |
| 148 | |
| 149 | // Defined highlights |
| 150 | if (json.has("highlight")) { |
| 151 | |
| 152 | // Add highlights |
| 153 | if (json.get("highlight").isArray()) { |
| 154 | for (JsonNode highlight : (JsonNode) json.get("highlight")) { |
| 155 | this.addHighlight(highlight.asInt()); |
| 156 | }; |
| 157 | } |
| 158 | else |
| 159 | this.addHighlight(json.get("highlight").asInt()); |
| 160 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 161 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 162 | // Defined fields to lift from the index |
| 163 | if (json.has("fields")) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 164 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 165 | // Remove default fields |
| 166 | this.fields.clear(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 167 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 168 | // Add fields |
| 169 | if (json.get("fields").isArray()) { |
| 170 | for (JsonNode field : (JsonNode) json.get("fields")) { |
| 171 | this.addField(field.asText()); |
| 172 | }; |
| 173 | } |
| 174 | else |
| 175 | this.addField(json.get("fields").asText()); |
| 176 | }; |
| 177 | |
| 178 | return this; |
| 179 | }; |
| 180 | |
| 181 | |
| 182 | public short getCount () { |
| 183 | return this.count; |
| 184 | }; |
| 185 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 186 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 187 | public KrillMeta setCount (int value) { |
| 188 | // Todo: Maybe update startIndex with known startPage! |
| 189 | this.setCount((short) value); |
| 190 | return this; |
| 191 | }; |
| 192 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 193 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 194 | public KrillMeta setCount (short value) { |
| 195 | if (value > 0) |
| 196 | this.count = (value <= this.countMax) ? value : this.countMax; |
| 197 | return this; |
| 198 | }; |
| 199 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 200 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 201 | public short getCountMax () { |
| 202 | return this.countMax; |
| 203 | }; |
| 204 | |
| 205 | |
| 206 | public int getStartIndex () { |
| 207 | return this.startIndex; |
| 208 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 209 | |
| 210 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 211 | public KrillMeta setStartIndex (int value) { |
| 212 | this.startIndex = (value >= 0) ? value : 0; |
| 213 | return this; |
| 214 | }; |
| 215 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 216 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 217 | public KrillMeta setStartPage (int value) { |
| 218 | if (value >= 0) |
| 219 | this.setStartIndex((value * this.getCount()) - this.getCount()); |
| 220 | else |
| 221 | this.startIndex = 0; |
| 222 | return this; |
| 223 | }; |
| 224 | |
| 225 | |
| 226 | public long getTimeOut () { |
| 227 | return this.timeout; |
| 228 | }; |
| 229 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 230 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 231 | public void setTimeOut (long timeout) { |
| 232 | this.timeout = timeout; |
| 233 | }; |
| 234 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 235 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 236 | public KrillMeta setItemsPerResource (short value) { |
| 237 | if (value >= 0) |
| 238 | this.itemsPerResource = value; |
| 239 | return this; |
| 240 | }; |
| 241 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 242 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 243 | public KrillMeta setItemsPerResource (int value) { |
| 244 | return this.setItemsPerResource((short) value); |
| 245 | }; |
| 246 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 247 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 248 | public short getItemsPerResource () { |
| 249 | return this.itemsPerResource; |
| 250 | }; |
| 251 | |
| 252 | |
| 253 | public SearchContext getContext () { |
| 254 | return this.context; |
| 255 | }; |
| 256 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 257 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 258 | public KrillMeta setContext (SearchContext context) { |
| 259 | this.context = context; |
| 260 | return this; |
| 261 | }; |
| 262 | |
| 263 | |
| 264 | // Get set of fields |
| 265 | /** |
| 266 | * Get the fields as a set |
| 267 | */ |
| 268 | public HashSet<String> getFields () { |
| 269 | return this.fields; |
| 270 | }; |
| 271 | |
| 272 | |
| 273 | /** |
| 274 | * Add a field to the set of fields to retrieve. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 275 | * |
| 276 | * @param field |
| 277 | * The field to retrieve. |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 278 | * @return The {@link Krill} object for chaining. |
| 279 | */ |
| 280 | public KrillMeta addField (String field) { |
| 281 | this.fields.add(field); |
| 282 | return this; |
| 283 | }; |
| 284 | |
| 285 | |
| 286 | /** |
| 287 | * Add class numbers to highlight in KWIC view. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 288 | * |
| 289 | * @param classNumber |
| 290 | * The number of a class to highlight. |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 291 | * @return The {@link Krill} object for chaining. |
| 292 | */ |
| 293 | public KrillMeta addHighlight (int classNumber) { |
| 294 | this.highlights.add(classNumber); |
| 295 | return this; |
| 296 | }; |
| 297 | |
| 298 | |
| 299 | @Deprecated |
| 300 | public boolean doCutOff () { |
| 301 | return this.cutOff; |
| 302 | }; |
| 303 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 304 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 305 | @Deprecated |
| 306 | public KrillMeta setCutOff (boolean cutOff) { |
| 307 | this.cutOff = cutOff; |
| 308 | return this; |
| 309 | }; |
| 310 | |
| 311 | |
| 312 | // TODO: |
| 313 | // This limits the search results with offset |
| 314 | // Maybe can be deprecated! |
| 315 | @Deprecated |
| 316 | public int getLimit () { |
| 317 | return this.limit; |
| 318 | }; |
| 319 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 320 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 321 | // TODO: |
| 322 | // This limits the search results with offset |
| 323 | // Maybe can be deprecated! |
| 324 | @Deprecated |
| 325 | public KrillMeta setLimit (int limit) { |
| 326 | if (limit > 0) |
| 327 | this.limit = limit; |
| 328 | return this; |
| 329 | }; |
| 330 | |
| 331 | |
| 332 | @Override |
| 333 | public JsonNode toJsonNode () { |
| 334 | ObjectMapper mapper = new ObjectMapper(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 335 | ObjectNode json = mapper.createObjectNode(); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 336 | // json.put("@type", "koral:meta"); |
| 337 | |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 338 | // <legacy> |
| 339 | // Add cutOff attribute |
| 340 | if (this.cutOff) |
| 341 | json.put("cutOff", this.doCutOff()); |
| 342 | |
| 343 | // Add limit attribute |
| 344 | if (this.limit > 0) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 345 | json.put("limit", this.getLimit()); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 346 | // </legacy> |
| 347 | |
| 348 | // Add count attribute |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 349 | json.put("count", this.getCount()); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 350 | |
| 351 | // Add startindex attribute |
| 352 | json.put("startIndex", this.getStartIndex()); |
| 353 | |
| 354 | // Add timeout attribute |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 355 | json.put("timeout", this.getTimeOut()); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 356 | |
| 357 | // Add context attribute |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 358 | json.put("context", this.getContext().toJsonNode()); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 359 | |
| 360 | // Add fields attribute |
| Akron | b116644 | 2015-06-27 00:34:19 +0200 | [diff] [blame] | 361 | ArrayNode fieldNode = mapper.createArrayNode(); |
| 362 | Iterator<String> field = this.fields.iterator(); |
| 363 | while (field.hasNext()) |
| 364 | fieldNode.add(field.next()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 365 | json.put("fields", fieldNode); |
| Nils Diewald | f5ab4b2 | 2015-02-25 20:55:16 +0000 | [diff] [blame] | 366 | |
| 367 | // Add itemsPerResource attribute |
| 368 | if (this.itemsPerResource > 0) |
| 369 | json.put("itemsPerResource", (int) this.getItemsPerResource()); |
| 370 | |
| 371 | // Add highlight attribute |
| 372 | if (!this.highlights.isEmpty()) { |
| 373 | ArrayNode highlightNode = mapper.createArrayNode(); |
| 374 | highlightNode.addPOJO(this.highlights); |
| 375 | json.put("highlight", highlightNode); |
| 376 | }; |
| 377 | |
| 378 | return json; |
| 379 | }; |
| 380 | }; |