blob: c014620b91922a40bbb9026fad867ee72cf89c33 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap;
2import java.util.*;
Nils Diewald1e5d5942014-05-20 13:29:53 +00003import java.io.*;
4
Nils Diewalda115a332014-01-07 13:59:09 +00005import java.lang.StringBuffer;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00006import java.nio.ByteBuffer;
Nils Diewaldf399a672013-11-18 17:55:22 +00007
8import com.fasterxml.jackson.annotation.*;
Nils Diewaldcde69082014-01-16 15:46:48 +00009import com.fasterxml.jackson.annotation.JsonInclude.Include;
Nils Diewaldf399a672013-11-18 17:55:22 +000010import com.fasterxml.jackson.databind.ObjectMapper;
Nils Diewaldbfe554b2014-01-09 19:35:05 +000011import com.fasterxml.jackson.databind.JsonNode;
12import com.fasterxml.jackson.databind.node.*;
Nils Diewaldf399a672013-11-18 17:55:22 +000013
14import de.ids_mannheim.korap.index.PositionsToOffset;
Nils Diewald1e5d5942014-05-20 13:29:53 +000015import de.ids_mannheim.korap.index.SearchContext;
Nils Diewaldbfe554b2014-01-09 19:35:05 +000016import de.ids_mannheim.korap.document.KorapPrimaryData;
17
Nils Diewaldf399a672013-11-18 17:55:22 +000018import static de.ids_mannheim.korap.util.KorapHTML.*;
Nils Diewaldcde69082014-01-16 15:46:48 +000019import de.ids_mannheim.korap.index.MatchIdentifier;
Nils Diewald345bdc02014-01-21 21:48:57 +000020import de.ids_mannheim.korap.index.PosIdentifier;
Nils Diewald1e5d5942014-05-20 13:29:53 +000021import de.ids_mannheim.korap.query.SpanElementQuery;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000022
Nils Diewaldf399a672013-11-18 17:55:22 +000023import org.slf4j.Logger;
24import org.slf4j.LoggerFactory;
25
Nils Diewald1e5d5942014-05-20 13:29:53 +000026import org.apache.lucene.index.AtomicReaderContext;
27import org.apache.lucene.index.Term;
28import org.apache.lucene.index.TermContext;
Nils Diewald8c221782013-12-13 19:52:58 +000029import org.apache.lucene.util.FixedBitSet;
Nils Diewald1e5d5942014-05-20 13:29:53 +000030import org.apache.lucene.util.Bits;
Nils Diewaldbfe554b2014-01-09 19:35:05 +000031import org.apache.lucene.document.Document;
Nils Diewald1e5d5942014-05-20 13:29:53 +000032import org.apache.lucene.search.spans.Spans;
Nils Diewald8c221782013-12-13 19:52:58 +000033
Nils Diewaldf399a672013-11-18 17:55:22 +000034/*
35 Todo: The implemented classes and private names are horrible!
36 Refactor, future-me!
Nils Diewald345bdc02014-01-21 21:48:57 +000037
38 The number based Highlighttype is ugly - UGLY!
Nils Diewaldf399a672013-11-18 17:55:22 +000039*/
40
41/**
42 * Representation of Matches in a KorapResult.
43 *
Nils Diewald498d5982014-03-03 20:09:22 +000044 * @author Nils Diewald
Nils Diewaldf399a672013-11-18 17:55:22 +000045 * @see KorapResult
Nils Diewaldf399a672013-11-18 17:55:22 +000046 */
Nils Diewaldcde69082014-01-16 15:46:48 +000047@JsonInclude(Include.NON_NULL)
Nils Diewaldf399a672013-11-18 17:55:22 +000048public class KorapMatch extends KorapDocument {
Nils Diewald82a4b862014-02-20 21:17:41 +000049
Nils Diewald498d5982014-03-03 20:09:22 +000050 // Logger
51 private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
52
53 // This advices the java compiler to ignore all loggings
Nils Diewald84934372014-05-20 13:48:18 +000054 public static final boolean DEBUG = false;
Nils Diewald498d5982014-03-03 20:09:22 +000055
56 // Mapper for JSON serialization
Nils Diewaldf399a672013-11-18 17:55:22 +000057 ObjectMapper mapper = new ObjectMapper();
58
59 // Snippet information
60 @JsonIgnore
Nils Diewald1e5d5942014-05-20 13:29:53 +000061 public SearchContext context;
Nils Diewaldf399a672013-11-18 17:55:22 +000062
Nils Diewaldbfe554b2014-01-09 19:35:05 +000063 // Should be deprecated, but used wildly in tests!
Nils Diewaldf399a672013-11-18 17:55:22 +000064 @JsonIgnore
Nils Diewald498d5982014-03-03 20:09:22 +000065 public int startPos, endPos;
Nils Diewaldf399a672013-11-18 17:55:22 +000066
67 @JsonIgnore
Nils Diewald833fe7e2013-12-14 16:06:33 +000068 public int potentialStartPosChar = -1,
69 potentialEndPosChar = -1;
Nils Diewaldf399a672013-11-18 17:55:22 +000070
Nils Diewaldcde69082014-01-16 15:46:48 +000071 private String error = null;
Nils Diewaldcdd465b2014-02-24 18:47:38 +000072 private String version;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000073
Nils Diewald1e5d5942014-05-20 13:29:53 +000074 // TEMPORARILY
Nils Diewaldcde69082014-01-16 15:46:48 +000075 @JsonIgnore
76 public int localDocID = -1;
77
Nils Diewald345bdc02014-01-21 21:48:57 +000078 HashMap<Integer, String> annotationNumber = new HashMap<>(16);
79 HashMap<Integer, Relation> relationNumber = new HashMap<>(16);
80 HashMap<Integer, Integer> identifierNumber = new HashMap<>(16);
81
82 // -1 is match highlight
Nils Diewaldcde69082014-01-16 15:46:48 +000083 int annotationNumberCounter = 256;
Nils Diewald345bdc02014-01-21 21:48:57 +000084 int relationNumberCounter = 2048;
85 int identifierNumberCounter = -2;
Nils Diewaldbfe554b2014-01-09 19:35:05 +000086
Nils Diewald833fe7e2013-12-14 16:06:33 +000087 private String tempSnippet,
88 snippetHTML,
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000089 snippetBrackets,
90 identifier;
Nils Diewald833fe7e2013-12-14 16:06:33 +000091
Nils Diewaldf399a672013-11-18 17:55:22 +000092 private HighlightCombinator snippetStack;
Nils Diewald833fe7e2013-12-14 16:06:33 +000093
Nils Diewald1e5d5942014-05-20 13:29:53 +000094 public boolean startMore = true,
95 endMore = true;
Nils Diewaldf399a672013-11-18 17:55:22 +000096
97 private Collection<byte[]> payload;
Nils Diewaldcde69082014-01-16 15:46:48 +000098 private ArrayList<Highlight> highlight;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000099 private LinkedList<int[]> span;
Nils Diewaldf399a672013-11-18 17:55:22 +0000100
Nils Diewald833fe7e2013-12-14 16:06:33 +0000101 private PositionsToOffset positionsToOffset;
Nils Diewald3caa00d2013-12-13 02:24:04 +0000102 private boolean processed = false;
103
Nils Diewald833fe7e2013-12-14 16:06:33 +0000104 /**
105 * Constructs a new KorapMatch object.
Nils Diewald1e5d5942014-05-20 13:29:53 +0000106 * Todo: Maybe that's not necessary!
Nils Diewald833fe7e2013-12-14 16:06:33 +0000107 *
108 * @param pto The PositionsToOffset object, containing relevant
109 * positional information for highlighting
Nils Diewald498d5982014-03-03 20:09:22 +0000110 * @param localDocID Document ID based on the atomic reader.
111 * @param startPos Start position of the match in the document.
112 * @param endPos End position of the match in the document.
113 *
Nils Diewald833fe7e2013-12-14 16:06:33 +0000114 * @see #snippetHTML()
115 * @see #snippetBrackets()
116 * @see PositionsToOffset
117 */
118 public KorapMatch (PositionsToOffset pto, int localDocID, int startPos, int endPos) {
Nils Diewald3caa00d2013-12-13 02:24:04 +0000119 this.positionsToOffset = pto;
Nils Diewald1e5d5942014-05-20 13:29:53 +0000120 this.localDocID = localDocID;
121 this.startPos = startPos;
122 this.endPos = endPos;
Nils Diewald3caa00d2013-12-13 02:24:04 +0000123 };
124
Nils Diewald498d5982014-03-03 20:09:22 +0000125
Nils Diewaldf399a672013-11-18 17:55:22 +0000126 /**
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000127 * Constructs a new KorapMatch object.
128 */
129 public KorapMatch () {};
130
Nils Diewald498d5982014-03-03 20:09:22 +0000131
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000132 /**
Nils Diewaldcde69082014-01-16 15:46:48 +0000133 * Constructs a new KorapMatch object.
Nils Diewald498d5982014-03-03 20:09:22 +0000134 *
135 * @param idString Match identifier string as provided by KorapResult.
136 * @param includeHighlights Boolean value indicating if possible provided
137 * highlight information should be ignored or not.
Nils Diewaldcde69082014-01-16 15:46:48 +0000138 */
139 public KorapMatch (String idString, boolean includeHighlights) {
140 MatchIdentifier id = new MatchIdentifier(idString);
141 this.setCorpusID(id.getCorpusID());
142 this.setDocID(id.getDocID());
143 this.setStartPos(id.getStartPos());
144 this.setEndPos(id.getEndPos());
145
146 if (includeHighlights)
Nils Diewald345bdc02014-01-21 21:48:57 +0000147 for (int[] pos : id.getPos()) {
148 if (pos[0] < id.getStartPos() || pos[1] > id.getEndPos())
149 continue;
150
Nils Diewaldcde69082014-01-16 15:46:48 +0000151 this.addHighlight(pos[0], pos[1], pos[2]);
Nils Diewald345bdc02014-01-21 21:48:57 +0000152 };
Nils Diewaldcde69082014-01-16 15:46:48 +0000153 };
154
Nils Diewald498d5982014-03-03 20:09:22 +0000155
156 /**
157 * Private class of highlights.
158 */
Nils Diewaldcde69082014-01-16 15:46:48 +0000159 private class Highlight {
160 public int start, end;
161 public int number = -1;
162
Nils Diewald345bdc02014-01-21 21:48:57 +0000163 // Relational highlight
164 public Highlight (int start, int end, String annotation, int ref) {
165 this.start = start;
Nils Diewald498d5982014-03-03 20:09:22 +0000166 this.end = end;
Nils Diewald345bdc02014-01-21 21:48:57 +0000167 // TODO: This can overflow!
168 this.number = relationNumberCounter++;
169 relationNumber.put(this.number, new Relation(annotation, ref));
170 };
171
172 // Span highlight
Nils Diewaldcde69082014-01-16 15:46:48 +0000173 public Highlight (int start, int end, String annotation) {
174 this.start = start;
Nils Diewald498d5982014-03-03 20:09:22 +0000175 this.end = end;
Nils Diewaldcde69082014-01-16 15:46:48 +0000176 // TODO: This can overflow!
Nils Diewald345bdc02014-01-21 21:48:57 +0000177 if (annotationNumberCounter < 2048) {
178 this.number = annotationNumberCounter++;
179 annotationNumber.put(this.number, annotation);
180 };
Nils Diewaldcde69082014-01-16 15:46:48 +0000181 };
182
Nils Diewald345bdc02014-01-21 21:48:57 +0000183 // Simple highlight
Nils Diewaldcde69082014-01-16 15:46:48 +0000184 public Highlight (int start, int end, int number) {
Nils Diewald498d5982014-03-03 20:09:22 +0000185 this.start = start;
186 this.end = end;
Nils Diewaldcde69082014-01-16 15:46:48 +0000187 this.number = number;
188 };
Nils Diewald345bdc02014-01-21 21:48:57 +0000189 };
190
Nils Diewald498d5982014-03-03 20:09:22 +0000191
192 /**
193 * Private class of relations.
194 */
Nils Diewald345bdc02014-01-21 21:48:57 +0000195 private class Relation {
196 public int ref;
197 public String annotation;
198 public Relation (String annotation, int ref) {
199 this.annotation = annotation;
200 this.ref = ref;
201 };
202 };
203
Nils Diewaldcde69082014-01-16 15:46:48 +0000204
205 /**
Nils Diewaldf399a672013-11-18 17:55:22 +0000206 * Insert a highlight for the snippet view by means of positional
207 * offsets and an optional class number.
208 *
209 * @param start Integer value of a span's positional start offset.
210 * @param end Integer value of a span's positional end offset.
211 * @param number Optional class number of the highlight.
212 */
Nils Diewaldcde69082014-01-16 15:46:48 +0000213 public void addHighlight (int start, int end) {
214 this.addHighlight(new Highlight(start, end, (int) 0));
215 };
216
Nils Diewaldf399a672013-11-18 17:55:22 +0000217 public void addHighlight (int start, int end, byte number) {
Nils Diewaldcde69082014-01-16 15:46:48 +0000218 this.addHighlight(new Highlight(start, end, (int) number));
Nils Diewaldf399a672013-11-18 17:55:22 +0000219 };
220
221 public void addHighlight (int start, int end, short number) {
Nils Diewaldcde69082014-01-16 15:46:48 +0000222 this.addHighlight(new Highlight(start, end, (int) number));
Nils Diewaldf399a672013-11-18 17:55:22 +0000223 };
224
225 public void addHighlight (int start, int end, int number) {
Nils Diewaldcde69082014-01-16 15:46:48 +0000226 this.addHighlight(new Highlight(start, end, number));
227 };
228
Nils Diewald498d5982014-03-03 20:09:22 +0000229
230 /**
231 * Insert a highlight for the snippet view.
232 *
233 * @param hl A highlight object to add to the match.
234 */
Nils Diewaldcde69082014-01-16 15:46:48 +0000235 public void addHighlight (Highlight hl) {
236
Nils Diewaldf399a672013-11-18 17:55:22 +0000237 if (this.highlight == null)
Nils Diewaldcde69082014-01-16 15:46:48 +0000238 this.highlight = new ArrayList<Highlight>(16);
Nils Diewald82a4b862014-02-20 21:17:41 +0000239
240 if (DEBUG)
Nils Diewald498d5982014-03-03 20:09:22 +0000241 log.trace("Add highlight from pos {}-{} of class {}",
242 hl.start, hl.end, hl.number);
Nils Diewaldf399a672013-11-18 17:55:22 +0000243
Nils Diewald498d5982014-03-03 20:09:22 +0000244 // Reset the fetched match data
Nils Diewald833fe7e2013-12-14 16:06:33 +0000245 this._reset();
246
Nils Diewaldcde69082014-01-16 15:46:48 +0000247 this.highlight.add(hl);
Nils Diewaldf399a672013-11-18 17:55:22 +0000248 };
249
Nils Diewaldcde69082014-01-16 15:46:48 +0000250
Nils Diewald498d5982014-03-03 20:09:22 +0000251 /**
252 * Insert a textual annotation for the snippet view by
253 * means of positional offsets and an annotation string.
254 *
255 * @param start Integer value of a span's positional start offset.
256 * @param end Integer value of a span's positional end offset.
257 * @param annotation Annotation string.
258 */
Nils Diewaldcde69082014-01-16 15:46:48 +0000259 public void addAnnotation (int start, int end, String annotation) {
260 this.addHighlight(new Highlight(start, end, annotation));
261 };
262
Nils Diewald498d5982014-03-03 20:09:22 +0000263
264 /**
265 * Insert an annotated relation for the snippet view by
266 * means of relational participant positions and an annotation string.
267 *
268 * @param src Integer value of a span's positional source object.
269 * @param target Integer value of a span's positional target object.
270 * @param annotation Annotation string.
271 */
Nils Diewald345bdc02014-01-21 21:48:57 +0000272 public void addRelation (int src, int target, String annotation) {
273 this.addHighlight(new Highlight(src, src, annotation, target));
274 int id = identifierNumberCounter--;
275 identifierNumber.put(id, target);
276 this.addHighlight(new Highlight(target, target, id));
277 };
278
Nils Diewaldcde69082014-01-16 15:46:48 +0000279
Nils Diewald498d5982014-03-03 20:09:22 +0000280 /**
281 * Populate document meta information with information coming from the index.
282 *
283 * @param doc Document object.
284 * @param field Primary data field.
285 * @param fields Hash object with all supported fields.
286 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000287 public void populateDocument (Document doc, String field, HashSet<String> fields) {
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000288 this.setField(field);
Nils Diewald498d5982014-03-03 20:09:22 +0000289 this.setPrimaryData( new KorapPrimaryData(doc.get(field)) );
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000290 if (fields.contains("corpusID"))
291 this.setCorpusID(doc.get("corpusID"));
292 if (fields.contains("ID"))
293 this.setDocID(doc.get("ID"));
294 if (fields.contains("author"))
295 this.setAuthor(doc.get("author"));
296 if (fields.contains("textClass"))
297 this.setTextClass(doc.get("textClass"));
298 if (fields.contains("title"))
299 this.setTitle(doc.get("title"));
300 if (fields.contains("subTitle"))
301 this.setSubTitle(doc.get("subTitle"));
302 if (fields.contains("pubDate"))
303 this.setPubDate(doc.get("pubDate"));
304 if (fields.contains("pubPlace"))
305 this.setPubPlace(doc.get("pubPlace"));
306
307 // Temporary (later meta fields in term vector)
308 if (fields.contains("foundries"))
309 this.setFoundries(doc.get("foundries"));
310 if (fields.contains("tokenization"))
311 this.setTokenization(doc.get("tokenization"));
312 if (fields.contains("layerInfo"))
313 this.setLayerInfo(doc.get("layerInfo"));
314 };
315
Nils Diewald498d5982014-03-03 20:09:22 +0000316
317 /**
318 * Get document id.
319 */
Nils Diewald010c10f2013-12-17 01:58:31 +0000320 @JsonProperty("docID")
321 public String getDocID () {
322 return super.getID();
323 };
324
Nils Diewald498d5982014-03-03 20:09:22 +0000325
326 /**
327 * Set document id.
328 *
329 * @param id String representation of document ID.
330 */
Nils Diewald364eb642013-12-22 15:03:01 +0000331 public void setDocID (String id) {
332 super.setID(id);
333 };
334
Nils Diewald498d5982014-03-03 20:09:22 +0000335
336 /**
337 * Set version of the index
338 */
339 @JsonIgnore
340 public String getVersion () {
341 if (this.version == null)
342 return null;
343 StringBuilder sb = new StringBuilder("lucene-backend-");
344 return sb.append(this.version).toString();
345 };
346
347
348 /**
349 * Set version number.
350 *
351 * @param version The version number of the index as
352 * a string representation.
353 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000354 @JsonIgnore
Nils Diewaldcdd465b2014-02-24 18:47:38 +0000355 public void setVersion (String version) {
356 this.version = version;
357 };
358
Nils Diewaldcdd465b2014-02-24 18:47:38 +0000359
Nils Diewald498d5982014-03-03 20:09:22 +0000360 /**
361 * Get the positional start offset of the match.
362 */
Nils Diewaldcdd465b2014-02-24 18:47:38 +0000363 @JsonIgnore
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000364 public int getStartPos() {
365 return this.startPos;
366 };
367
Nils Diewald498d5982014-03-03 20:09:22 +0000368
369 /**
370 * Set the positional start offset of the match.
371 *
372 * @param pos The positional offset.
373 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000374 @JsonIgnore
375 public void setStartPos(int pos) {
376 this.startPos = pos;
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000377 };
378
Nils Diewald498d5982014-03-03 20:09:22 +0000379
380 /**
381 * Get the positional end offset of the match.
382 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000383 @JsonIgnore
384 public int getEndPos() {
385 return this.endPos;
386 };
387
Nils Diewald498d5982014-03-03 20:09:22 +0000388
389 /**
390 * Set the positional end offset of the match.
391 *
392 * @param pos The positional offset.
393 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000394 @JsonIgnore
395 public void setEndPos(int pos) {
396 this.endPos = pos;
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000397 };
398
Nils Diewald498d5982014-03-03 20:09:22 +0000399
400 /**
401 * Get the local (i.e. Lucene given) ID of the document.
402 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000403 @JsonIgnore
404 public int getLocalDocID () {
405 return this.localDocID;
406 };
407
Nils Diewald498d5982014-03-03 20:09:22 +0000408
409 /**
410 * Set the local (i.e. Lucene given) ID of the document.
411 *
412 * @param id The id of the document.
413 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000414 @JsonIgnore
415 public void setLocalDocID (int id) {
416 this.localDocID = id;
417 };
418
Nils Diewald498d5982014-03-03 20:09:22 +0000419
420 /**
421 * Get the PositionsToOffset object.
422 *
423 * @see PositionsToOffset
424 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000425 @JsonIgnore
426 public PositionsToOffset getPositionsToOffset () {
427 return this.positionsToOffset;
428 };
429
Nils Diewald498d5982014-03-03 20:09:22 +0000430
431 /**
432 * Set the PositionsToOffset object.
433 *
434 * @param pto The PositionsToOffset object
435 * @see PositionsToOffset
436 */
437 @JsonIgnore
438 public void setPositionsToOffset (PositionsToOffset pto) {
439 this.positionsToOffset = pto;
440 };
441
442
443 /**
444 * Get match ID (for later retrieval).
445 *
446 * @see MatchIdentifier
447 */
Nils Diewald010c10f2013-12-17 01:58:31 +0000448 @Override
449 @JsonProperty("ID")
450 public String getID () {
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000451
Nils Diewald498d5982014-03-03 20:09:22 +0000452 // Identifier already given
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000453 if (this.identifier != null)
454 return this.identifier;
455
Nils Diewald498d5982014-03-03 20:09:22 +0000456 // No, nada, nix
Nils Diewaldcde69082014-01-16 15:46:48 +0000457 if (this.localDocID == -1)
458 return null;
459
460 MatchIdentifier id = new MatchIdentifier();
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000461
462 // Get prefix string corpus/doc
Nils Diewaldcde69082014-01-16 15:46:48 +0000463 id.setCorpusID(this.getCorpusID());
464 id.setDocID(this.getDocID());
465 id.setStartPos(startPos);
466 id.setEndPos(endPos);
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000467
Nils Diewald498d5982014-03-03 20:09:22 +0000468 // There are highlights to integrate
Nils Diewalda115a332014-01-07 13:59:09 +0000469 if (this.highlight != null) {
Nils Diewaldcde69082014-01-16 15:46:48 +0000470 for (Highlight h : this.highlight) {
471 if (h.number >= 256)
472 continue;
473
Nils Diewald498d5982014-03-03 20:09:22 +0000474 // Add highlight to the snippet
Nils Diewaldcde69082014-01-16 15:46:48 +0000475 id.addPos(h.start, h.end, h.number);
Nils Diewalda115a332014-01-07 13:59:09 +0000476 };
477 };
478
Nils Diewaldcde69082014-01-16 15:46:48 +0000479 return (this.identifier = id.toString());
Nils Diewald010c10f2013-12-17 01:58:31 +0000480 };
481
Nils Diewald498d5982014-03-03 20:09:22 +0000482
483 /**
484 * Get identifier for a specific position.
485 *
486 * @param int Position to get identifier on.
487 */
Nils Diewald345bdc02014-01-21 21:48:57 +0000488 @JsonIgnore
489 public String getPosID (int pos) {
Nils Diewald498d5982014-03-03 20:09:22 +0000490
491 // Identifier already given
Nils Diewald345bdc02014-01-21 21:48:57 +0000492 if (this.identifier != null)
493 return this.identifier;
494
Nils Diewald498d5982014-03-03 20:09:22 +0000495 // Nothing here
Nils Diewald345bdc02014-01-21 21:48:57 +0000496 if (this.localDocID == -1)
497 return null;
498
499 PosIdentifier id = new PosIdentifier();
500
501 // Get prefix string corpus/doc
502 id.setCorpusID(this.getCorpusID());
503 id.setDocID(this.getDocID());
504 id.setPos(pos);
505
506 return id.toString();
507 };
508
Nils Diewald498d5982014-03-03 20:09:22 +0000509 /**
510 * Get possible error message.
511 */
512 // Identical to KorapResult
513 public String getError () {
514 return this.error;
515 };
516
517 /**
518 * Set error message.
519 *
520 * @param msg The error message.
521 */
522 public void setError (String msg) {
523 this.error = msg;
524 };
525
526
Nils Diewald1e5d5942014-05-20 13:29:53 +0000527 public KorapMatch setContext (SearchContext context) {
528 this.context = context;
529 return this;
530 };
531
532 @JsonIgnore
533 public SearchContext getContext () {
534 if (this.context == null)
535 this.context = new SearchContext();
536 return this.context;
537 };
538
539
540 // Expand the context to a span
541 public int[] expandContextToSpan (String element) {
542
543 // TODO: THE BITS HAVE TO BE SET!
544
545 if (this.positionsToOffset != null)
546 return this.expandContextToSpan(
547 this.positionsToOffset.getAtomicReader(),
548 (Bits) null,
549 "tokens",
550 element
551 );
552 return new int[]{0,0,0,0};
553 };
554
555 // Expand the context to a span
Nils Diewald84934372014-05-20 13:48:18 +0000556 // THIS IS NOT VERY CLEVER - MAKE IT MORE CLEVER!
Nils Diewald1e5d5942014-05-20 13:29:53 +0000557 public int[] expandContextToSpan (AtomicReaderContext atomic,
558 Bits bitset,
559 String field,
560 String element) {
561
562 try {
563 // Store character offsets in ByteBuffer
564 ByteBuffer bb = ByteBuffer.allocate(8);
565
566 SpanElementQuery cquery =
567 new SpanElementQuery(field, element);
568
569 Spans contextSpans = cquery.getSpans(
570 atomic,
571 bitset,
572 new HashMap<Term, TermContext>()
573 );
574
575 int newStart = -1,
576 newEnd = -1;
577 int newStartChar = -1,
578 newEndChar = -1;
579
580 if (DEBUG)
581 log.trace("Extend match to context boundary with {} in {}",
582 cquery.toString(),
583 this.localDocID);
584
585 while (true) {
586
587 // Game over
588 if (contextSpans.next() != true)
589 break;
590
591 if (contextSpans.doc() != this.localDocID) {
592 contextSpans.skipTo(this.localDocID);
593 if (contextSpans.doc() != this.localDocID)
594 break;
595 };
596
597 // There's a <context> found -- I'm curious,
598 // if it's closer to the match than everything before
599 if (contextSpans.start() <= this.getStartPos() &&
600 contextSpans.end() >= this.getStartPos()) {
601
602 // Set as newStart
603 newStart = contextSpans.start() > newStart ?
604 contextSpans.start() : newStart;
605
Nils Diewald84934372014-05-20 13:48:18 +0000606 if (DEBUG)
607 log.trace("NewStart is at {}", newStart);
608
Nils Diewald1e5d5942014-05-20 13:29:53 +0000609 // Get character offset (start)
610 if (contextSpans.isPayloadAvailable()) {
611 try {
612 bb.rewind();
613 for (byte[] b : contextSpans.getPayload()) {
614
615 // Not an element span
616 if (b.length != 8)
617 continue;
618
619 bb.put(b);
620 bb.rewind();
621 newStartChar = bb.getInt();
622 newEndChar = bb.getInt();
623 break;
624 };
625 }
626 catch (Exception e) {
627 log.warn(e.getMessage());
628 };
629 };
630 }
631 else {
632 // Has to be resettet to avoid multiple readings of the payload
633 newEndChar = 0;
634 };
635
636 // There's an s found, that ends after the match
637 if (contextSpans.end() >= this.getEndPos()) {
638 newEnd = contextSpans.end();
639
640 // Get character offset (end)
641 if (newEndChar == 0 && contextSpans.isPayloadAvailable()) {
642 try {
643 bb.rewind();
644 for (byte[] b : contextSpans.getPayload()) {
645
646 // Not an element span
647 if (b.length != 8)
648 continue;
649
650 bb.put(b);
651 bb.rewind();
652 newEndChar = bb.getInt(1);
653 break;
654 };
655 }
656 catch (Exception e) {
657 log.warn(e.getMessage());
658 };
659 };
660 break;
661 };
662 };
663
664 // We have a new match surrounding
665 if (DEBUG)
666 log.trace("New match spans from {}-{}/{}-{}", newStart, newEnd, newStartChar, newEndChar);
667
668 return new int[]{newStart, newEnd, newStartChar, newEndChar};
669 }
670 catch (IOException e) {
671 log.error(e.getMessage());
672 };
673
674 return new int[]{-1,-1,-1,-1};
675 };
676
Nils Diewald498d5982014-03-03 20:09:22 +0000677
678 // Reset all internal data
Nils Diewald833fe7e2013-12-14 16:06:33 +0000679 private void _reset () {
Nils Diewald498d5982014-03-03 20:09:22 +0000680 this.processed = false;
681 this.snippetHTML = null;
Nils Diewald833fe7e2013-12-14 16:06:33 +0000682 this.snippetBrackets = null;
Nils Diewald498d5982014-03-03 20:09:22 +0000683 this.identifier = null;
684
685 // Delete all spans
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000686 if (this.span != null)
687 this.span.clear();
Nils Diewaldf399a672013-11-18 17:55:22 +0000688 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000689
Nils Diewald498d5982014-03-03 20:09:22 +0000690
Nils Diewald833fe7e2013-12-14 16:06:33 +0000691 // Start building highlighted snippets
Nils Diewaldcde69082014-01-16 15:46:48 +0000692 private boolean _processHighlight () {
Nils Diewald3caa00d2013-12-13 02:24:04 +0000693 if (processed)
Nils Diewaldcde69082014-01-16 15:46:48 +0000694 return true;
695
Nils Diewald498d5982014-03-03 20:09:22 +0000696 // Relevant details are missing
Nils Diewaldcde69082014-01-16 15:46:48 +0000697 if (this.positionsToOffset == null || this.localDocID == -1) {
698 log.warn("You have to define " +
699 "positionsToOffset and localDocID first " +
700 "before");
701 return false;
702 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000703
Nils Diewald82a4b862014-02-20 21:17:41 +0000704 if (DEBUG)
Nils Diewald498d5982014-03-03 20:09:22 +0000705 log.trace("--- Start highlight processing ...");
Nils Diewaldcde69082014-01-16 15:46:48 +0000706
Nils Diewald498d5982014-03-03 20:09:22 +0000707 // Get pto object
Nils Diewaldcde69082014-01-16 15:46:48 +0000708 PositionsToOffset pto = this.positionsToOffset;
709 pto.add(this.localDocID, this.getStartPos());
710 pto.add(this.localDocID, this.getEndPos() - 1);
711
Nils Diewald82a4b862014-02-20 21:17:41 +0000712 if (DEBUG)
Nils Diewald498d5982014-03-03 20:09:22 +0000713 log.trace("PTO will retrieve {} & {} (Match boundary)",
714 this.getStartPos(),
715 this.getEndPos());
Nils Diewaldcde69082014-01-16 15:46:48 +0000716
Nils Diewald498d5982014-03-03 20:09:22 +0000717 // Add all highlights for character retrieval
Nils Diewaldcde69082014-01-16 15:46:48 +0000718 if (this.highlight != null) {
719 for (Highlight hl : this.highlight) {
720 pto.add(this.localDocID, hl.start);
721 pto.add(this.localDocID, hl.end);
Nils Diewald498d5982014-03-03 20:09:22 +0000722
723 if (DEBUG)
724 log.trace("PTO will retrieve {} & {} (Highlight boundary)",
725 hl.start, hl.end);
Nils Diewaldcde69082014-01-16 15:46:48 +0000726 };
727 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000728
729 // Get the list of spans for matches and highlighting
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000730 if (this.span == null || this.span.size() == 0) {
Nils Diewald1e5d5942014-05-20 13:29:53 +0000731 if (!this._processHighlightSpans())
Nils Diewaldcde69082014-01-16 15:46:48 +0000732 return false;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000733 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000734
Nils Diewald498d5982014-03-03 20:09:22 +0000735 // Create a stack for highlighted elements
736 // (opening and closing elements)
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000737 ArrayList<int[]> stack = this._processHighlightStack();
Nils Diewaldf399a672013-11-18 17:55:22 +0000738
Nils Diewald1e5d5942014-05-20 13:29:53 +0000739 if (DEBUG)
740 log.trace("The snippet is {}", this.tempSnippet);
741
742
Nils Diewald498d5982014-03-03 20:09:22 +0000743 // The temporary snippet is empty, nothing to do
Nils Diewald3caa00d2013-12-13 02:24:04 +0000744 if (this.tempSnippet == null) {
745 processed = true;
Nils Diewaldcde69082014-01-16 15:46:48 +0000746 return false;
Nils Diewald3caa00d2013-12-13 02:24:04 +0000747 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000748
Nils Diewald833fe7e2013-12-14 16:06:33 +0000749 // Merge the element stack with the primary textual data
Nils Diewaldf399a672013-11-18 17:55:22 +0000750 this._processHighlightSnippet(this.tempSnippet, stack);
751
Nils Diewald833fe7e2013-12-14 16:06:33 +0000752 // Match is processed - done
Nils Diewaldcde69082014-01-16 15:46:48 +0000753 return (processed = true);
Nils Diewaldf399a672013-11-18 17:55:22 +0000754 };
755
Nils Diewald498d5982014-03-03 20:09:22 +0000756
Nils Diewald833fe7e2013-12-14 16:06:33 +0000757 /*
758 Comparator class for opening tags
759 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000760 private class OpeningTagComparator implements Comparator<int[]> {
761 @Override
762 public int compare (int[] arg0, int[] arg1) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000763 // Check start positions
Nils Diewaldf399a672013-11-18 17:55:22 +0000764 if (arg0[0] > arg1[0]) {
765 return 1;
766 }
767 else if (arg0[0] == arg1[0]) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000768 // Check endpositions
Nils Diewaldf399a672013-11-18 17:55:22 +0000769 if (arg0[1] > arg1[1])
770 return -1;
771 return 1;
772 };
773 return -1;
774 };
775 };
776
Nils Diewald833fe7e2013-12-14 16:06:33 +0000777 /*
778 Comparator class for closing tags
779 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000780 private class ClosingTagComparator implements Comparator<int[]> {
781 @Override
782 public int compare (int[] arg0, int[] arg1) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000783 // Check end positions
Nils Diewaldf399a672013-11-18 17:55:22 +0000784 if (arg0[1] > arg1[1]) {
785 return 1;
786 }
787 else if (arg0[1] == arg1[1]) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000788 // Check start positions
Nils Diewaldf399a672013-11-18 17:55:22 +0000789 if (arg0[0] < arg1[0])
790 return 1;
791 return -1;
792 };
793 return -1;
794 };
795 };
796
Nils Diewald833fe7e2013-12-14 16:06:33 +0000797 /*
798 Private class for elements with highlighting information
799 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000800 private class HighlightCombinatorElement {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000801
802 // Type 0: Textual data
803 // Type 1: Opening
804 // Type 2: Closing
805 private byte type;
806
807 private int number = 0;
Nils Diewald833fe7e2013-12-14 16:06:33 +0000808
Nils Diewaldf399a672013-11-18 17:55:22 +0000809 private String characters;
Nils Diewald8c221782013-12-13 19:52:58 +0000810 private boolean terminal = true;
Nils Diewaldf399a672013-11-18 17:55:22 +0000811
Nils Diewald833fe7e2013-12-14 16:06:33 +0000812 // Constructor for highlighting elements
813 public HighlightCombinatorElement (byte type, int number) {
Nils Diewaldf399a672013-11-18 17:55:22 +0000814 this.type = type;
815 this.number = number;
816 };
817
Nils Diewald833fe7e2013-12-14 16:06:33 +0000818 // Constructor for highlighting elements,
819 // that may not be terminal, i.e. they were closed and will
820 // be reopened for overlapping issues.
821 public HighlightCombinatorElement (byte type, int number, boolean terminal) {
822 this.type = type;
823 this.number = number;
Nils Diewald8c221782013-12-13 19:52:58 +0000824 this.terminal = terminal;
825 };
826
Nils Diewald833fe7e2013-12-14 16:06:33 +0000827 // Constructor for textual data
Nils Diewaldf399a672013-11-18 17:55:22 +0000828 public HighlightCombinatorElement (String characters) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000829 this.type = (byte) 0;
Nils Diewaldf399a672013-11-18 17:55:22 +0000830 this.characters = characters;
831 };
832
Nils Diewald833fe7e2013-12-14 16:06:33 +0000833 // Return html fragment for this combinator element
Nils Diewald345bdc02014-01-21 21:48:57 +0000834 public String toHTML (KorapMatch match, FixedBitSet level, byte[] levelCache) {
Nils Diewald8c221782013-12-13 19:52:58 +0000835 // Opening
Nils Diewaldf399a672013-11-18 17:55:22 +0000836 if (this.type == 1) {
837 StringBuilder sb = new StringBuilder();
Nils Diewaldf399a672013-11-18 17:55:22 +0000838 if (this.number == -1) {
Nils Diewald3caa00d2013-12-13 02:24:04 +0000839 sb.append("<span class=\"match\">");
Nils Diewaldf399a672013-11-18 17:55:22 +0000840 }
Nils Diewald345bdc02014-01-21 21:48:57 +0000841
842 else if (this.number < -1) {
843 sb.append("<span xml:id=\"")
844 .append(match.getPosID(
845 identifierNumber.get(this.number)))
Nils Diewaldcde69082014-01-16 15:46:48 +0000846 .append("\">");
847 }
Nils Diewald345bdc02014-01-21 21:48:57 +0000848
849 else if (this.number >= 256) {
850 sb.append("<span ");
851 if (this.number < 2048) {
852 sb.append("title=\"")
853 .append(annotationNumber.get(this.number))
854 .append('"');
855 }
856 else {
857 Relation rel = relationNumber.get(this.number);
858 sb.append("xlink:title=\"")
859 .append(rel.annotation)
860 .append('"');
861 sb.append(" xlink:type=\"simple\"");
862 sb.append(" xlink:href=\"#");
863 sb.append(match.getPosID(rel.ref));
864 sb.append('"');
865 };
866 sb.append('>');
867 }
Nils Diewaldf399a672013-11-18 17:55:22 +0000868 else {
Nils Diewald8c221782013-12-13 19:52:58 +0000869 // Get the first free level slot
870 byte pos;
871 if (levelCache[this.number] != '\0') {
872 pos = levelCache[this.number];
873 }
874 else {
875 pos = (byte) level.nextSetBit(0);
876 level.clear(pos);
877 levelCache[this.number] = pos;
Nils Diewald3caa00d2013-12-13 02:24:04 +0000878 };
879 sb.append("<em class=\"class-")
880 .append(this.number)
881 .append(" level-")
Nils Diewald8c221782013-12-13 19:52:58 +0000882 .append(pos)
Nils Diewald3caa00d2013-12-13 02:24:04 +0000883 .append("\">");
Nils Diewaldf399a672013-11-18 17:55:22 +0000884 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000885 return sb.toString();
886 }
Nils Diewald8c221782013-12-13 19:52:58 +0000887 // Closing
Nils Diewaldf399a672013-11-18 17:55:22 +0000888 else if (this.type == 2) {
Nils Diewald345bdc02014-01-21 21:48:57 +0000889 if (this.number <= -1 || this.number >= 256)
Nils Diewald3caa00d2013-12-13 02:24:04 +0000890 return "</span>";
Nils Diewald8c221782013-12-13 19:52:58 +0000891
892 if (this.terminal)
893 level.set((int) levelCache[this.number]);
Nils Diewald3caa00d2013-12-13 02:24:04 +0000894 return "</em>";
Nils Diewaldf399a672013-11-18 17:55:22 +0000895 };
Nils Diewald833fe7e2013-12-14 16:06:33 +0000896
897 // HTML encode primary data
Nils Diewaldf399a672013-11-18 17:55:22 +0000898 return encodeHTML(this.characters);
899 };
900
Nils Diewald833fe7e2013-12-14 16:06:33 +0000901 // Return bracket fragment for this combinator element
Nils Diewaldf399a672013-11-18 17:55:22 +0000902 public String toBrackets () {
903 if (this.type == 1) {
904 StringBuilder sb = new StringBuilder();
Nils Diewald345bdc02014-01-21 21:48:57 +0000905
906 // Match
Nils Diewaldf399a672013-11-18 17:55:22 +0000907 if (this.number == -1) {
908 sb.append("[");
909 }
Nils Diewald345bdc02014-01-21 21:48:57 +0000910
911 // Identifier
912 else if (this.number < -1) {
913 sb.append("{#");
914 sb.append(identifierNumber.get(this.number));
915 sb.append(':');
916 }
917
918 // Highlight, Relation, Span
Nils Diewaldf399a672013-11-18 17:55:22 +0000919 else {
920 sb.append("{");
Nils Diewald345bdc02014-01-21 21:48:57 +0000921 if (this.number >= 256) {
922 if (this.number < 2048)
923 sb.append(annotationNumber.get(this.number));
924 else {
925 Relation rel = relationNumber.get(this.number);
926 sb.append(rel.annotation);
927 sb.append('>').append(rel.ref);
928 };
929 sb.append(':');
930 }
Nils Diewaldcde69082014-01-16 15:46:48 +0000931 else if (this.number != 0)
Nils Diewaldf399a672013-11-18 17:55:22 +0000932 sb.append(this.number).append(':');
933 };
934 return sb.toString();
935 }
936 else if (this.type == 2) {
Nils Diewald3caa00d2013-12-13 02:24:04 +0000937 if (this.number == -1)
Nils Diewaldf399a672013-11-18 17:55:22 +0000938 return "]";
Nils Diewaldf399a672013-11-18 17:55:22 +0000939 return "}";
940 };
941 return this.characters;
942 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000943 };
944
Nils Diewald833fe7e2013-12-14 16:06:33 +0000945 /*
946 Private class for combining highlighting elements
947 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000948 private class HighlightCombinator {
949 private LinkedList<HighlightCombinatorElement> combine;
950 private LinkedList<Integer> balanceStack = new LinkedList<>();
951 private ArrayList<Integer> tempStack = new ArrayList<>(32);
952
Nils Diewald833fe7e2013-12-14 16:06:33 +0000953 // Empty constructor
Nils Diewaldf399a672013-11-18 17:55:22 +0000954 public HighlightCombinator () {
955 this.combine = new LinkedList<>();
956 };
957
Nils Diewald833fe7e2013-12-14 16:06:33 +0000958 // Return the combination stack
Nils Diewaldf399a672013-11-18 17:55:22 +0000959 public LinkedList<HighlightCombinatorElement> stack () {
960 return this.combine;
961 };
962
Nils Diewald833fe7e2013-12-14 16:06:33 +0000963 // get the first element (without removing)
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000964 public HighlightCombinatorElement getFirst () {
965 return this.combine.getFirst();
966 };
967
Nils Diewald833fe7e2013-12-14 16:06:33 +0000968 // get the last element (without removing)
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000969 public HighlightCombinatorElement getLast () {
970 return this.combine.getLast();
971 };
972
Nils Diewald833fe7e2013-12-14 16:06:33 +0000973 // get an element by index (without removing)
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000974 public HighlightCombinatorElement get (int index) {
975 return this.combine.get(index);
976 };
977
Nils Diewald833fe7e2013-12-14 16:06:33 +0000978 // Get the size of te combinator stack
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000979 public short size () {
980 return (short) this.combine.size();
981 };
982
Nils Diewald833fe7e2013-12-14 16:06:33 +0000983 // Add primary data to the stack
Nils Diewaldf399a672013-11-18 17:55:22 +0000984 public void addString (String characters) {
985 this.combine.add(new HighlightCombinatorElement(characters));
986 };
987
Nils Diewald833fe7e2013-12-14 16:06:33 +0000988 // Add opening highlight combinator to the stack
Nils Diewaldf399a672013-11-18 17:55:22 +0000989 public void addOpen (int number) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000990 this.combine.add(new HighlightCombinatorElement((byte) 1, number));
Nils Diewaldf399a672013-11-18 17:55:22 +0000991 this.balanceStack.add(number);
992 };
993
Nils Diewald833fe7e2013-12-14 16:06:33 +0000994 // Add closing highlight combinator to the stack
Nils Diewaldf399a672013-11-18 17:55:22 +0000995 public void addClose (int number) {
996 HighlightCombinatorElement lastComb;
997 this.tempStack.clear();
Nils Diewald4fca3ff2013-12-29 22:59:13 +0000998
Nils Diewald20607ab2014-03-20 23:28:36 +0000999 // Shouldn't happen
1000 if (this.balanceStack.size() == 0) {
1001 if (DEBUG)
1002 log.trace("The balance stack is empty");
1003 return;
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001004 };
Nils Diewald20607ab2014-03-20 23:28:36 +00001005
1006 if (DEBUG) {
1007 StringBuilder sb = new StringBuilder(
1008 "Stack for checking with class "
1009 );
1010 sb.append(number).append(" is ");
1011 for (int s : this.balanceStack) {
1012 sb.append('[').append(s).append(']');
1013 };
Nils Diewald82a4b862014-02-20 21:17:41 +00001014 log.trace(sb.toString());
Nils Diewald20607ab2014-03-20 23:28:36 +00001015 };
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001016
1017 // class number of the last element
Nils Diewaldf399a672013-11-18 17:55:22 +00001018 int eold = this.balanceStack.removeLast();
Nils Diewald8c221782013-12-13 19:52:58 +00001019
1020 // the closing element is not balanced
Nils Diewaldf399a672013-11-18 17:55:22 +00001021 while (eold != number) {
Nils Diewald8c221782013-12-13 19:52:58 +00001022
1023 // Retrieve last combinator on stack
Nils Diewaldf399a672013-11-18 17:55:22 +00001024 lastComb = this.combine.peekLast();
Nils Diewald8c221782013-12-13 19:52:58 +00001025
Nils Diewald82a4b862014-02-20 21:17:41 +00001026 if (DEBUG)
1027 log.trace("Closing element is unbalanced - {} " +
1028 "!= {} with lastComb {}|{}|{}",
1029 eold,
1030 number,
1031 lastComb.type,
1032 lastComb.number,
1033 lastComb.characters);
Nils Diewald010c10f2013-12-17 01:58:31 +00001034
Nils Diewald8c221782013-12-13 19:52:58 +00001035 // combinator is opening and the number is not equal to the last
1036 // element on the balanceStack
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001037 if (lastComb.type == 1 && lastComb.number == eold) {
1038
Nils Diewald8c221782013-12-13 19:52:58 +00001039 // Remove the last element - it's empty and uninteresting!
Nils Diewaldf399a672013-11-18 17:55:22 +00001040 this.combine.removeLast();
1041 }
Nils Diewald8c221782013-12-13 19:52:58 +00001042
1043 // combinator is either closing (??) or another opener
Nils Diewaldf399a672013-11-18 17:55:22 +00001044 else {
Nils Diewald8c221782013-12-13 19:52:58 +00001045
Nils Diewald82a4b862014-02-20 21:17:41 +00001046 if (DEBUG)
1047 log.trace("close element a) {}", eold);
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001048
Nils Diewald8c221782013-12-13 19:52:58 +00001049 // Add a closer for the old element (this has following elements)
Nils Diewald833fe7e2013-12-14 16:06:33 +00001050 this.combine.add(new HighlightCombinatorElement((byte) 2, eold, false));
Nils Diewaldf399a672013-11-18 17:55:22 +00001051 };
Nils Diewald8c221782013-12-13 19:52:58 +00001052
1053 // add this element number temporarily on the stack
Nils Diewaldf399a672013-11-18 17:55:22 +00001054 tempStack.add(eold);
Nils Diewald8c221782013-12-13 19:52:58 +00001055
1056 // Check next element
Nils Diewaldf399a672013-11-18 17:55:22 +00001057 eold = this.balanceStack.removeLast();
1058 };
Nils Diewald8c221782013-12-13 19:52:58 +00001059
1060 // Get last combinator on the stack
Nils Diewaldf399a672013-11-18 17:55:22 +00001061 lastComb = this.combine.peekLast();
Nils Diewald8c221782013-12-13 19:52:58 +00001062
Nils Diewald82a4b862014-02-20 21:17:41 +00001063 if (DEBUG) {
1064 log.trace("LastComb: " + lastComb.type + '|' + lastComb.number + '|' + lastComb.characters + " for " + number);
1065 log.trace("Stack for checking 2: {}|{}|{}|{}", lastComb.type, lastComb.number, lastComb.characters, number);
1066 };
Nils Diewald010c10f2013-12-17 01:58:31 +00001067
1068 if (lastComb.type == 1 && lastComb.number == number) {
1069 while (lastComb.type == 1 && lastComb.number == number) {
1070 // Remove the damn thing - It's empty and uninteresting!
1071 this.combine.removeLast();
1072 lastComb = this.combine.peekLast();
1073 };
1074 }
1075 else {
Nils Diewald82a4b862014-02-20 21:17:41 +00001076 if (DEBUG)
1077 log.trace("close element b) {}", number);
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001078
Nils Diewald010c10f2013-12-17 01:58:31 +00001079 // Add a closer
1080 this.combine.add(new HighlightCombinatorElement((byte) 2, number));
1081 };
1082
1083
Nils Diewald8c221782013-12-13 19:52:58 +00001084 // Fetch everything from the tempstack and reopen it
Nils Diewaldf399a672013-11-18 17:55:22 +00001085 for (int e : tempStack) {
Nils Diewald82a4b862014-02-20 21:17:41 +00001086 if (DEBUG)
1087 log.trace("Reopen element {}", e);
Nils Diewald833fe7e2013-12-14 16:06:33 +00001088 combine.add(new HighlightCombinatorElement((byte) 1, e));
Nils Diewaldf399a672013-11-18 17:55:22 +00001089 balanceStack.add(e);
1090 };
1091 };
1092
Nils Diewald833fe7e2013-12-14 16:06:33 +00001093 // Get all combined elements as a string
Nils Diewaldf399a672013-11-18 17:55:22 +00001094 public String toString () {
1095 StringBuilder sb = new StringBuilder();
1096 for (HighlightCombinatorElement e : combine) {
1097 sb.append(e.toString()).append("\n");
1098 };
1099 return sb.toString();
1100 };
1101 };
1102
Nils Diewald498d5982014-03-03 20:09:22 +00001103 private void _processHighlightSnippet (String clean,
1104 ArrayList<int[]> stack) {
Nils Diewaldf399a672013-11-18 17:55:22 +00001105
Nils Diewald82a4b862014-02-20 21:17:41 +00001106 if (DEBUG)
Nils Diewald498d5982014-03-03 20:09:22 +00001107 log.trace("--- Process Highlight snippet");
1108
1109 int pos = 0,
1110 oldPos = 0;
Nils Diewaldf399a672013-11-18 17:55:22 +00001111
1112 this.snippetStack = new HighlightCombinator();
1113
1114 for (int[] element : stack) {
1115 pos = element[3] != 0 ? element[0] : element[1];
1116
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001117 if (pos > oldPos) {
Nils Diewaldda1722b2014-02-17 00:12:05 +00001118
Nils Diewald23bf4602014-02-18 15:47:20 +00001119 if (pos > clean.length()) {
Nils Diewaldda1722b2014-02-17 00:12:05 +00001120 pos = clean.length() - 1;
1121 };
1122
Nils Diewaldf399a672013-11-18 17:55:22 +00001123 snippetStack.addString(clean.substring(oldPos, pos));
1124
1125 oldPos = pos;
1126 };
1127
1128 if (element[3] != 0) {
1129 snippetStack.addOpen(element[2]);
1130 }
1131 else {
1132 snippetStack.addClose(element[2]);
1133 };
1134 };
1135
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001136 if (clean.length() > pos) {
1137 snippetStack.addString(clean.substring(pos));
1138 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001139 };
1140
1141 @Deprecated
1142 public String snippetHTML () {
1143 return this.getSnippetHTML();
1144 };
1145
1146 @JsonProperty("snippet")
1147 public String getSnippetHTML () {
Nils Diewaldcde69082014-01-16 15:46:48 +00001148
1149 if (!this._processHighlight())
1150 return null;
Nils Diewald3caa00d2013-12-13 02:24:04 +00001151
Nils Diewald833fe7e2013-12-14 16:06:33 +00001152 if (this.processed && this.snippetHTML != null)
Nils Diewaldf399a672013-11-18 17:55:22 +00001153 return this.snippetHTML;
1154
Nils Diewald82a4b862014-02-20 21:17:41 +00001155 if (DEBUG)
1156 log.trace("Create HTML Snippet");
Nils Diewald833fe7e2013-12-14 16:06:33 +00001157
Nils Diewaldf399a672013-11-18 17:55:22 +00001158 StringBuilder sb = new StringBuilder();
Nils Diewaldf399a672013-11-18 17:55:22 +00001159
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001160 short start = (short) 0;
Nils Diewald3caa00d2013-12-13 02:24:04 +00001161 short end = this.snippetStack.size();
Nils Diewald8c221782013-12-13 19:52:58 +00001162 FixedBitSet level = new FixedBitSet(16);
1163 level.set(0, 15);
1164 byte[] levelCache = new byte[16];
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001165
1166 HighlightCombinatorElement elem = this.snippetStack.getFirst();
1167
Nils Diewald3caa00d2013-12-13 02:24:04 +00001168 // Create context
1169 sb.append("<span class=\"context-left\">");
1170 if (startMore)
1171 sb.append("<span class=\"more\"></span>");
1172
1173 if (elem.type == 0) {
Nils Diewald345bdc02014-01-21 21:48:57 +00001174 sb.append(elem.toHTML(this, level, levelCache));
Nils Diewald3caa00d2013-12-13 02:24:04 +00001175 start++;
Nils Diewaldf399a672013-11-18 17:55:22 +00001176 };
Nils Diewald3caa00d2013-12-13 02:24:04 +00001177 sb.append("</span>");
Nils Diewaldf399a672013-11-18 17:55:22 +00001178
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001179 elem = this.snippetStack.getLast();
1180
1181 StringBuilder rightContext = new StringBuilder();
1182
1183 // Create context, if trhere is any
Nils Diewald3caa00d2013-12-13 02:24:04 +00001184 rightContext.append("<span class=\"context-right\">");
1185 if (elem != null && elem.type == 0) {
Nils Diewald345bdc02014-01-21 21:48:57 +00001186 rightContext.append(elem.toHTML(this, level, levelCache));
Nils Diewald3caa00d2013-12-13 02:24:04 +00001187 end--;
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001188 };
Nils Diewald3caa00d2013-12-13 02:24:04 +00001189 if (endMore)
1190 rightContext.append("<span class=\"more\"></span>");
1191 rightContext.append("</span>");
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001192
1193 for (short i = start; i < end; i++) {
Nils Diewald345bdc02014-01-21 21:48:57 +00001194 sb.append(this.snippetStack.get(i).toHTML(this, level,levelCache));
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001195 };
1196
Nils Diewald3caa00d2013-12-13 02:24:04 +00001197 sb.append(rightContext);
Nils Diewaldf399a672013-11-18 17:55:22 +00001198
1199 return (this.snippetHTML = sb.toString());
1200 };
1201
1202 @Deprecated
1203 public String snippetBrackets () {
1204 return this.getSnippetBrackets();
1205 };
1206
1207 @JsonIgnore
1208 public String getSnippetBrackets () {
Nils Diewald3caa00d2013-12-13 02:24:04 +00001209
Nils Diewaldcde69082014-01-16 15:46:48 +00001210 if (!this._processHighlight())
1211 return null;
Nils Diewald3caa00d2013-12-13 02:24:04 +00001212
Nils Diewald833fe7e2013-12-14 16:06:33 +00001213 if (this.processed && this.snippetBrackets != null)
Nils Diewaldf399a672013-11-18 17:55:22 +00001214 return this.snippetBrackets;
1215
1216 StringBuilder sb = new StringBuilder();
1217
1218 if (startMore)
1219 sb.append("... ");
1220
1221 for (HighlightCombinatorElement hce : this.snippetStack.stack()) {
1222 sb.append(hce.toBrackets());
1223 };
1224
1225 if (endMore)
1226 sb.append(" ...");
1227
1228 return (this.snippetBrackets = sb.toString());
1229 };
1230
1231
Nils Diewald3caa00d2013-12-13 02:24:04 +00001232 // This sorts all highlight and match spans to make them nesting correctly,
1233 // even in case they overlap
1234 // TODO: Not very fast - improve!
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00001235 private ArrayList<int[]> _processHighlightStack () {
Nils Diewald82a4b862014-02-20 21:17:41 +00001236 if (DEBUG)
Nils Diewald498d5982014-03-03 20:09:22 +00001237 log.trace("--- Process Highlight stack");
Nils Diewaldf399a672013-11-18 17:55:22 +00001238
Nils Diewaldf399a672013-11-18 17:55:22 +00001239 LinkedList<int[]> openList = new LinkedList<int[]>();
1240 LinkedList<int[]> closeList = new LinkedList<int[]>();
1241
Nils Diewaldd216a032014-04-30 17:40:19 +00001242 // Filter multiple identifiers, that may be introduced and would
1243 // result in invalid xml
Nils Diewald50389b02014-04-11 16:27:52 +00001244 this._filterMultipleIdentifiers();
1245
Nils Diewald498d5982014-03-03 20:09:22 +00001246 // Add highlight spans to balance lists
1247 openList.addAll(this.span);
1248 closeList.addAll(this.span);
Nils Diewaldf399a672013-11-18 17:55:22 +00001249
Nils Diewald498d5982014-03-03 20:09:22 +00001250 // Sort balance lists
Nils Diewaldf399a672013-11-18 17:55:22 +00001251 Collections.sort(openList, new OpeningTagComparator());
1252 Collections.sort(closeList, new ClosingTagComparator());
1253
Nils Diewald498d5982014-03-03 20:09:22 +00001254 // New stack array
Nils Diewaldf399a672013-11-18 17:55:22 +00001255 ArrayList<int[]> stack = new ArrayList<>(openList.size() * 2);
1256
Nils Diewald3caa00d2013-12-13 02:24:04 +00001257 // Create stack unless both lists are empty
Nils Diewaldf399a672013-11-18 17:55:22 +00001258 while (!openList.isEmpty() || !closeList.isEmpty()) {
1259
1260 if (openList.isEmpty()) {
1261 stack.addAll(closeList);
1262 break;
Nils Diewald20607ab2014-03-20 23:28:36 +00001263 }
1264
1265 // Not sure about this, but it can happen
1266 else if (closeList.isEmpty()) {
1267 break;
Nils Diewaldf399a672013-11-18 17:55:22 +00001268 };
1269
1270 if (openList.peekFirst()[0] < closeList.peekFirst()[1]) {
1271 int[] e = openList.removeFirst().clone();
1272 e[3] = 1;
1273 stack.add(e);
1274 }
1275 else {
1276 stack.add(closeList.removeFirst());
1277 };
1278 };
1279 return stack;
1280 };
1281
Nils Diewald498d5982014-03-03 20:09:22 +00001282 /**
1283 * This will retrieve character offsets for all spans.
1284 */
Nils Diewald1e5d5942014-05-20 13:29:53 +00001285 private boolean _processHighlightSpans () {
Nils Diewald498d5982014-03-03 20:09:22 +00001286
1287 if (DEBUG)
1288 log.trace("--- Process Highlight spans");
1289
Nils Diewald498d5982014-03-03 20:09:22 +00001290 // Local document ID
Nils Diewaldf399a672013-11-18 17:55:22 +00001291 int ldid = this.localDocID;
1292
Nils Diewald1e5d5942014-05-20 13:29:53 +00001293 int startPosChar = -1, endPosChar = -1;
1294
Nils Diewald498d5982014-03-03 20:09:22 +00001295 // No positionsToOffset object found
Nils Diewaldcde69082014-01-16 15:46:48 +00001296 if (this.positionsToOffset == null)
1297 return false;
1298
Nils Diewaldf399a672013-11-18 17:55:22 +00001299 // Match position
Nils Diewald3caa00d2013-12-13 02:24:04 +00001300 startPosChar = this.positionsToOffset.start(ldid, this.startPos);
Nils Diewald498d5982014-03-03 20:09:22 +00001301
Nils Diewald20607ab2014-03-20 23:28:36 +00001302 if (DEBUG)
1303 log.trace("Unaltered startPosChar is {}", startPosChar);
1304
Nils Diewaldf399a672013-11-18 17:55:22 +00001305 // Check potential differing start characters
1306 // e.g. from element spans
Nils Diewald498d5982014-03-03 20:09:22 +00001307 if (potentialStartPosChar != -1 &&
Nils Diewald1e5d5942014-05-20 13:29:53 +00001308 (startPosChar > this.potentialStartPosChar))
1309 startPosChar = this.potentialStartPosChar;
Nils Diewaldf399a672013-11-18 17:55:22 +00001310
Nils Diewald3caa00d2013-12-13 02:24:04 +00001311 endPosChar = this.positionsToOffset.end(ldid, this.endPos - 1);
Nils Diewald20607ab2014-03-20 23:28:36 +00001312
Nils Diewald498d5982014-03-03 20:09:22 +00001313 if (DEBUG)
Nils Diewald20607ab2014-03-20 23:28:36 +00001314 log.trace("Unaltered endPosChar is {}", endPosChar);
1315
1316 // Potential end characters may come from spans with
1317 // defined character offsets like sentences including .", ... etc.
1318 if (endPosChar < potentialEndPosChar)
1319 endPosChar = potentialEndPosChar;
1320
1321 if (DEBUG)
1322 log.trace("Refined: Match offset is pos {}-{} (chars {}-{})",
Nils Diewald498d5982014-03-03 20:09:22 +00001323 this.startPos,
1324 this.endPos,
1325 startPosChar,
1326 endPosChar);
Nils Diewaldcde69082014-01-16 15:46:48 +00001327
Nils Diewald1e5d5942014-05-20 13:29:53 +00001328 this.identifier = null;
Nils Diewald498d5982014-03-03 20:09:22 +00001329
1330 // No spans yet
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00001331 if (this.span == null)
1332 this.span = new LinkedList<int[]>();
1333
Nils Diewald1e5d5942014-05-20 13:29:53 +00001334 // Process offset char findings
1335 int[] intArray = this._processOffsetChars(ldid, startPosChar, endPosChar);
Nils Diewaldf399a672013-11-18 17:55:22 +00001336
Nils Diewald1e5d5942014-05-20 13:29:53 +00001337 // Recalculate startOffsetChar
1338 int startOffsetChar = startPosChar - intArray[0];
Nils Diewald20607ab2014-03-20 23:28:36 +00001339
Nils Diewald498d5982014-03-03 20:09:22 +00001340 // Add match span
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00001341 this.span.add(intArray);
Nils Diewaldf399a672013-11-18 17:55:22 +00001342
1343 // highlights
Nils Diewald3caa00d2013-12-13 02:24:04 +00001344 // -- I'm not sure about this.
Nils Diewaldf399a672013-11-18 17:55:22 +00001345 if (this.highlight != null) {
Nils Diewald20607ab2014-03-20 23:28:36 +00001346 if (DEBUG)
1347 log.trace("There are highlights!");
Nils Diewald50389b02014-04-11 16:27:52 +00001348
Nils Diewaldcde69082014-01-16 15:46:48 +00001349 for (Highlight highlight : this.highlight) {
Nils Diewald498d5982014-03-03 20:09:22 +00001350 int start = this.positionsToOffset.start(
1351 ldid, highlight.start
1352 );
1353
1354 int end = this.positionsToOffset.end(
1355 ldid,
1356 highlight.end
1357 );
Nils Diewaldf399a672013-11-18 17:55:22 +00001358
Nils Diewald498d5982014-03-03 20:09:22 +00001359 if (DEBUG)
1360 log.trace("PTO has retrieved {}-{} for class {}",
1361 start,
1362 end,
1363 highlight.number);
1364
1365 start -= startOffsetChar;
1366 end -= startOffsetChar;
1367
Nils Diewald3ef9a472013-12-02 16:06:09 +00001368 if (start < 0 || end < 0)
Nils Diewaldf399a672013-11-18 17:55:22 +00001369 continue;
1370
Nils Diewald498d5982014-03-03 20:09:22 +00001371 // Create intArray for highlight
Nils Diewald3ef9a472013-12-02 16:06:09 +00001372 intArray = new int[]{
1373 start,
1374 end,
Nils Diewaldcde69082014-01-16 15:46:48 +00001375 highlight.number,
Nils Diewald3ef9a472013-12-02 16:06:09 +00001376 0 // Dummy value for later
1377 };
1378
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00001379 this.span.add(intArray);
Nils Diewaldf399a672013-11-18 17:55:22 +00001380 };
1381 };
Nils Diewaldcde69082014-01-16 15:46:48 +00001382 return true;
1383 };
1384
Nils Diewaldbfe554b2014-01-09 19:35:05 +00001385
Nils Diewald1e5d5942014-05-20 13:29:53 +00001386 // Pass the local docid to retrieve character positions for the offset
1387 private int[] _processOffsetChars (int ldid, int startPosChar, int endPosChar) {
1388
1389 int startOffsetChar = -1, endOffsetChar = -1;
1390 int startOffset = -1, endOffset = -1;
1391
1392 // The offset is defined by a span
1393 if (this.getContext().isSpanDefined()) {
1394
1395 if (DEBUG)
1396 log.trace("Try to expand to <{}>",
1397 this.context.getSpanContext());
1398
1399 this.startMore = false;
1400 this.endMore = false;
1401
1402 int [] spanContext = this.expandContextToSpan(
1403 this.positionsToOffset.getAtomicReader(),
1404 (Bits) null,
1405 "tokens",
1406 this.context.getSpanContext()
1407 );
1408 startOffset = spanContext[0];
1409 endOffset = spanContext[1];
1410 startOffsetChar = spanContext[2];
1411 endOffsetChar = spanContext[3];
1412 if (DEBUG)
1413 log.trace("Got context is based from span {}-{}/{}-{}",
1414 startOffset, endOffset, startOffsetChar, endOffsetChar);
1415 };
1416
1417 // The offset is defined by tokens or characters
1418 if (endOffset == -1) {
1419
1420 PositionsToOffset pto = this.positionsToOffset;
1421
1422 // The left offset is defined by tokens
1423 if (this.context.left.isToken()) {
1424 startOffset = this.startPos - this.context.left.getLength();
1425 if (DEBUG)
1426 log.trace("PTO will retrieve {} (Left context)", startOffset);
1427 pto.add(ldid, startOffset);
1428 }
1429
1430 // The left offset is defined by characters
1431 else {
1432 startOffsetChar = startPosChar - this.context.left.getLength();
1433 };
1434
1435 // The right context is defined by tokens
1436 if (this.context.right.isToken()) {
1437 endOffset = this.endPos + this.context.right.getLength() -1;
1438 if (DEBUG)
1439 log.trace("PTO will retrieve {} (Right context)", endOffset);
1440 pto.add(ldid, endOffset);
1441
1442 }
1443
1444 // The right context is defined by characters
1445 else {
1446 endOffsetChar = (endPosChar == -1) ? -1 :
1447 endPosChar + this.context.right.getLength();
1448 };
1449
1450 if (startOffset != -1)
1451 startOffsetChar = pto.start(ldid, startOffset);
1452
1453 if (endOffset != -1)
1454 endOffsetChar = pto.end(ldid, endOffset);
1455 };
1456
1457 if (DEBUG)
1458 log.trace("Premature found offsets at {}-{}",
1459 startOffsetChar,
1460 endOffsetChar);
1461
1462
1463 // This can happen in case of non-token characters
1464 // in the match and null offsets
1465 if (startOffsetChar > startPosChar)
1466 startOffsetChar = startPosChar;
1467 else if (startOffsetChar < 0)
1468 startOffsetChar = 0;
1469
1470 // No "..." at the beginning
1471 if (startOffsetChar == 0)
1472 this.startMore = false;
1473
1474 if (endOffsetChar != -1 && endOffsetChar < endPosChar)
1475 endOffsetChar = endPosChar;
1476
1477 if (DEBUG)
1478 log.trace("The context spans from chars {}-{}",
1479 startOffsetChar, endOffsetChar);
1480
1481 // Get snippet information from the primary data
1482 if (endOffsetChar > -1 &&
1483 (endOffsetChar < this.getPrimaryDataLength())) {
1484 this.tempSnippet = this.getPrimaryData(
1485 startOffsetChar,
1486 endOffsetChar
1487 );
1488 }
1489 else {
1490 this.tempSnippet = this.getPrimaryData(startOffsetChar);
1491 this.endMore = false;
1492 };
1493
1494 if (DEBUG)
1495 log.trace("Snippet: '" + this.tempSnippet + "'");
1496
1497 if (DEBUG)
1498 log.trace("The match entry is {}-{} ({}-{}) with absolute offsetChars {}-{}",
1499 startPosChar - startOffsetChar,
1500 endPosChar - startOffsetChar,
1501 startPosChar,
1502 endPosChar,
1503 startOffsetChar,
1504 endOffsetChar);
1505
1506 // TODO: Simplify
1507 return new int[]{
1508 startPosChar - startOffsetChar,
1509 endPosChar - startOffsetChar,
1510 -1,
1511 0};
1512 };
1513
1514
Nils Diewaldbfe554b2014-01-09 19:35:05 +00001515 // Identical to KorapResult!
1516 public String toJSON () {
1517 ObjectNode json = (ObjectNode) mapper.valueToTree(this);
1518
Nils Diewaldcde69082014-01-16 15:46:48 +00001519 // Match was no match
1520 if (json.size() == 0)
1521 return "{}";
1522
Nils Diewald1e5d5942014-05-20 13:29:53 +00001523 json.put("context", this.getContext().toJSON());
Nils Diewaldbfe554b2014-01-09 19:35:05 +00001524
Nils Diewaldcdd465b2014-02-24 18:47:38 +00001525 if (this.version != null)
1526 json.put("version", this.getVersion());
1527
Nils Diewaldbfe554b2014-01-09 19:35:05 +00001528 try {
1529 return mapper.writeValueAsString(json);
1530 }
1531 catch (Exception e) {
1532 log.warn(e.getLocalizedMessage());
1533 };
1534
1535 return "{}";
1536 };
Nils Diewald50389b02014-04-11 16:27:52 +00001537
1538
1539 // Remove duplicate identifiers
1540 // Yeah ... I mean ... why not?
1541 private void _filterMultipleIdentifiers () {
1542 ArrayList<Integer> removeDuplicate = new ArrayList<>(10);
1543 HashSet<Integer> identifiers = new HashSet<>(20);
1544 for (int i = 0; i < this.span.size(); i++) {
1545 // span is an int array: [Start, End, Number, Dummy]
1546 int highlightNumber = this.span.get(i)[2];
1547
Nils Diewaldd216a032014-04-30 17:40:19 +00001548 // Number is an identifier
Nils Diewald50389b02014-04-11 16:27:52 +00001549 if (highlightNumber < -1) {
Nils Diewaldd216a032014-04-30 17:40:19 +00001550
1551 // Get the real identifier
Nils Diewald50389b02014-04-11 16:27:52 +00001552 int idNumber = identifierNumber.get(highlightNumber);
1553 if (identifiers.contains(idNumber)) {
1554 removeDuplicate.add(i);
1555 }
1556 else {
1557 identifiers.add(idNumber);
1558 };
1559 };
1560 };
1561
Nils Diewaldd216a032014-04-30 17:40:19 +00001562 // Order the duplicates to filter from the tail
Nils Diewald50389b02014-04-11 16:27:52 +00001563 Collections.sort(removeDuplicate);
1564 Collections.reverse(removeDuplicate);
1565
1566 // Delete all duplicate identifiers
1567 for (int delete : removeDuplicate) {
1568 this.span.remove(delete);
1569 };
1570 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001571};