blob: 993912c4c05f102d2c009d2511bf6f3b1bcbfdb5 [file] [log] [blame]
Nils Diewaldf399a672013-11-18 17:55:22 +00001package de.ids_mannheim.korap;
2import java.util.*;
Nils Diewald1e5d5942014-05-20 13:29:53 +00003import java.io.*;
4
Nils Diewalda115a332014-01-07 13:59:09 +00005import java.lang.StringBuffer;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00006import java.nio.ByteBuffer;
Nils Diewaldf399a672013-11-18 17:55:22 +00007
8import com.fasterxml.jackson.annotation.*;
Nils Diewaldcde69082014-01-16 15:46:48 +00009import com.fasterxml.jackson.annotation.JsonInclude.Include;
Nils Diewaldf399a672013-11-18 17:55:22 +000010import com.fasterxml.jackson.databind.ObjectMapper;
Nils Diewaldbfe554b2014-01-09 19:35:05 +000011import com.fasterxml.jackson.databind.JsonNode;
12import com.fasterxml.jackson.databind.node.*;
Nils Diewaldf399a672013-11-18 17:55:22 +000013
14import de.ids_mannheim.korap.index.PositionsToOffset;
Nils Diewald1e5d5942014-05-20 13:29:53 +000015import de.ids_mannheim.korap.index.SearchContext;
Nils Diewaldbfe554b2014-01-09 19:35:05 +000016import de.ids_mannheim.korap.document.KorapPrimaryData;
17
Nils Diewaldf399a672013-11-18 17:55:22 +000018import static de.ids_mannheim.korap.util.KorapHTML.*;
Nils Diewaldcde69082014-01-16 15:46:48 +000019import de.ids_mannheim.korap.index.MatchIdentifier;
Nils Diewald345bdc02014-01-21 21:48:57 +000020import de.ids_mannheim.korap.index.PosIdentifier;
Nils Diewald1e5d5942014-05-20 13:29:53 +000021import de.ids_mannheim.korap.query.SpanElementQuery;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000022
Nils Diewaldf399a672013-11-18 17:55:22 +000023import org.slf4j.Logger;
24import org.slf4j.LoggerFactory;
25
Nils Diewald1e5d5942014-05-20 13:29:53 +000026import org.apache.lucene.index.AtomicReaderContext;
27import org.apache.lucene.index.Term;
28import org.apache.lucene.index.TermContext;
Nils Diewald8c221782013-12-13 19:52:58 +000029import org.apache.lucene.util.FixedBitSet;
Nils Diewald1e5d5942014-05-20 13:29:53 +000030import org.apache.lucene.util.Bits;
Nils Diewaldbfe554b2014-01-09 19:35:05 +000031import org.apache.lucene.document.Document;
Nils Diewald1e5d5942014-05-20 13:29:53 +000032import org.apache.lucene.search.spans.Spans;
Nils Diewald8c221782013-12-13 19:52:58 +000033
Nils Diewaldf399a672013-11-18 17:55:22 +000034/*
35 Todo: The implemented classes and private names are horrible!
36 Refactor, future-me!
Nils Diewald345bdc02014-01-21 21:48:57 +000037
38 The number based Highlighttype is ugly - UGLY!
Nils Diewaldf399a672013-11-18 17:55:22 +000039*/
40
41/**
42 * Representation of Matches in a KorapResult.
43 *
Nils Diewald498d5982014-03-03 20:09:22 +000044 * @author Nils Diewald
Nils Diewaldf399a672013-11-18 17:55:22 +000045 * @see KorapResult
Nils Diewaldf399a672013-11-18 17:55:22 +000046 */
Nils Diewaldcde69082014-01-16 15:46:48 +000047@JsonInclude(Include.NON_NULL)
Nils Diewaldf399a672013-11-18 17:55:22 +000048public class KorapMatch extends KorapDocument {
Nils Diewald82a4b862014-02-20 21:17:41 +000049
Nils Diewald498d5982014-03-03 20:09:22 +000050 // Logger
51 private final static Logger log = LoggerFactory.getLogger(KorapMatch.class);
52
53 // This advices the java compiler to ignore all loggings
Nils Diewald0b809f82014-06-16 17:05:16 +000054 public static final boolean DEBUG = false;
Nils Diewald498d5982014-03-03 20:09:22 +000055
56 // Mapper for JSON serialization
Nils Diewaldf399a672013-11-18 17:55:22 +000057 ObjectMapper mapper = new ObjectMapper();
58
59 // Snippet information
60 @JsonIgnore
Nils Diewald1e5d5942014-05-20 13:29:53 +000061 public SearchContext context;
Nils Diewaldf399a672013-11-18 17:55:22 +000062
Nils Diewaldbfe554b2014-01-09 19:35:05 +000063 // Should be deprecated, but used wildly in tests!
Nils Diewaldf399a672013-11-18 17:55:22 +000064 @JsonIgnore
Nils Diewald66b8b7a2014-06-16 17:17:46 +000065 public int startPos, endPos = -1;
Nils Diewaldf399a672013-11-18 17:55:22 +000066
67 @JsonIgnore
Nils Diewald833fe7e2013-12-14 16:06:33 +000068 public int potentialStartPosChar = -1,
69 potentialEndPosChar = -1;
Nils Diewaldf399a672013-11-18 17:55:22 +000070
Nils Diewaldcde69082014-01-16 15:46:48 +000071 private String error = null;
Nils Diewaldcdd465b2014-02-24 18:47:38 +000072 private String version;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000073
Nils Diewald1e5d5942014-05-20 13:29:53 +000074 // TEMPORARILY
Nils Diewaldcde69082014-01-16 15:46:48 +000075 @JsonIgnore
76 public int localDocID = -1;
77
Nils Diewald345bdc02014-01-21 21:48:57 +000078 HashMap<Integer, String> annotationNumber = new HashMap<>(16);
79 HashMap<Integer, Relation> relationNumber = new HashMap<>(16);
80 HashMap<Integer, Integer> identifierNumber = new HashMap<>(16);
81
82 // -1 is match highlight
Nils Diewaldcde69082014-01-16 15:46:48 +000083 int annotationNumberCounter = 256;
Nils Diewald345bdc02014-01-21 21:48:57 +000084 int relationNumberCounter = 2048;
85 int identifierNumberCounter = -2;
Nils Diewaldbfe554b2014-01-09 19:35:05 +000086
Nils Diewald833fe7e2013-12-14 16:06:33 +000087 private String tempSnippet,
88 snippetHTML,
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000089 snippetBrackets,
90 identifier;
Nils Diewald833fe7e2013-12-14 16:06:33 +000091
Nils Diewaldf399a672013-11-18 17:55:22 +000092 private HighlightCombinator snippetStack;
Nils Diewald833fe7e2013-12-14 16:06:33 +000093
Nils Diewald1e5d5942014-05-20 13:29:53 +000094 public boolean startMore = true,
95 endMore = true;
Nils Diewaldf399a672013-11-18 17:55:22 +000096
97 private Collection<byte[]> payload;
Nils Diewaldcde69082014-01-16 15:46:48 +000098 private ArrayList<Highlight> highlight;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +000099 private LinkedList<int[]> span;
Nils Diewaldf399a672013-11-18 17:55:22 +0000100
Nils Diewald833fe7e2013-12-14 16:06:33 +0000101 private PositionsToOffset positionsToOffset;
Nils Diewald3caa00d2013-12-13 02:24:04 +0000102 private boolean processed = false;
103
Nils Diewald833fe7e2013-12-14 16:06:33 +0000104 /**
105 * Constructs a new KorapMatch object.
Nils Diewald1e5d5942014-05-20 13:29:53 +0000106 * Todo: Maybe that's not necessary!
Nils Diewald833fe7e2013-12-14 16:06:33 +0000107 *
108 * @param pto The PositionsToOffset object, containing relevant
109 * positional information for highlighting
Nils Diewald498d5982014-03-03 20:09:22 +0000110 * @param localDocID Document ID based on the atomic reader.
111 * @param startPos Start position of the match in the document.
112 * @param endPos End position of the match in the document.
113 *
Nils Diewald833fe7e2013-12-14 16:06:33 +0000114 * @see #snippetHTML()
115 * @see #snippetBrackets()
116 * @see PositionsToOffset
117 */
118 public KorapMatch (PositionsToOffset pto, int localDocID, int startPos, int endPos) {
Nils Diewald3caa00d2013-12-13 02:24:04 +0000119 this.positionsToOffset = pto;
Nils Diewald1e5d5942014-05-20 13:29:53 +0000120 this.localDocID = localDocID;
121 this.startPos = startPos;
122 this.endPos = endPos;
Nils Diewald3caa00d2013-12-13 02:24:04 +0000123 };
124
Nils Diewald498d5982014-03-03 20:09:22 +0000125
Nils Diewaldf399a672013-11-18 17:55:22 +0000126 /**
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000127 * Constructs a new KorapMatch object.
128 */
129 public KorapMatch () {};
130
Nils Diewald498d5982014-03-03 20:09:22 +0000131
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000132 /**
Nils Diewaldcde69082014-01-16 15:46:48 +0000133 * Constructs a new KorapMatch object.
Nils Diewald498d5982014-03-03 20:09:22 +0000134 *
135 * @param idString Match identifier string as provided by KorapResult.
136 * @param includeHighlights Boolean value indicating if possible provided
137 * highlight information should be ignored or not.
Nils Diewaldcde69082014-01-16 15:46:48 +0000138 */
139 public KorapMatch (String idString, boolean includeHighlights) {
140 MatchIdentifier id = new MatchIdentifier(idString);
Nils Diewald66b8b7a2014-06-16 17:17:46 +0000141 if (id.getStartPos() > -1) {
142 this.setCorpusID(id.getCorpusID());
143 this.setDocID(id.getDocID());
144 this.setStartPos(id.getStartPos());
145 this.setEndPos(id.getEndPos());
Nils Diewaldcde69082014-01-16 15:46:48 +0000146
Nils Diewald66b8b7a2014-06-16 17:17:46 +0000147 if (includeHighlights)
148 for (int[] pos : id.getPos()) {
149 if (pos[0] < id.getStartPos() || pos[1] > id.getEndPos())
150 continue;
151
152 this.addHighlight(pos[0], pos[1], pos[2]);
153 };
154 };
Nils Diewaldcde69082014-01-16 15:46:48 +0000155 };
156
Nils Diewald498d5982014-03-03 20:09:22 +0000157
158 /**
159 * Private class of highlights.
160 */
Nils Diewaldcde69082014-01-16 15:46:48 +0000161 private class Highlight {
162 public int start, end;
163 public int number = -1;
164
Nils Diewald345bdc02014-01-21 21:48:57 +0000165 // Relational highlight
166 public Highlight (int start, int end, String annotation, int ref) {
167 this.start = start;
Nils Diewald498d5982014-03-03 20:09:22 +0000168 this.end = end;
Nils Diewald345bdc02014-01-21 21:48:57 +0000169 // TODO: This can overflow!
170 this.number = relationNumberCounter++;
171 relationNumber.put(this.number, new Relation(annotation, ref));
172 };
173
174 // Span highlight
Nils Diewaldcde69082014-01-16 15:46:48 +0000175 public Highlight (int start, int end, String annotation) {
176 this.start = start;
Nils Diewald498d5982014-03-03 20:09:22 +0000177 this.end = end;
Nils Diewaldcde69082014-01-16 15:46:48 +0000178 // TODO: This can overflow!
Nils Diewald345bdc02014-01-21 21:48:57 +0000179 if (annotationNumberCounter < 2048) {
180 this.number = annotationNumberCounter++;
181 annotationNumber.put(this.number, annotation);
182 };
Nils Diewaldcde69082014-01-16 15:46:48 +0000183 };
184
Nils Diewald345bdc02014-01-21 21:48:57 +0000185 // Simple highlight
Nils Diewaldcde69082014-01-16 15:46:48 +0000186 public Highlight (int start, int end, int number) {
Nils Diewald498d5982014-03-03 20:09:22 +0000187 this.start = start;
188 this.end = end;
Nils Diewaldcde69082014-01-16 15:46:48 +0000189 this.number = number;
190 };
Nils Diewald345bdc02014-01-21 21:48:57 +0000191 };
192
Nils Diewald498d5982014-03-03 20:09:22 +0000193
194 /**
195 * Private class of relations.
196 */
Nils Diewald345bdc02014-01-21 21:48:57 +0000197 private class Relation {
198 public int ref;
199 public String annotation;
200 public Relation (String annotation, int ref) {
201 this.annotation = annotation;
202 this.ref = ref;
203 };
204 };
205
Nils Diewaldcde69082014-01-16 15:46:48 +0000206
207 /**
Nils Diewaldf399a672013-11-18 17:55:22 +0000208 * Insert a highlight for the snippet view by means of positional
209 * offsets and an optional class number.
210 *
211 * @param start Integer value of a span's positional start offset.
212 * @param end Integer value of a span's positional end offset.
213 * @param number Optional class number of the highlight.
214 */
Nils Diewaldcde69082014-01-16 15:46:48 +0000215 public void addHighlight (int start, int end) {
216 this.addHighlight(new Highlight(start, end, (int) 0));
217 };
218
Nils Diewaldf399a672013-11-18 17:55:22 +0000219 public void addHighlight (int start, int end, byte number) {
Nils Diewaldcde69082014-01-16 15:46:48 +0000220 this.addHighlight(new Highlight(start, end, (int) number));
Nils Diewaldf399a672013-11-18 17:55:22 +0000221 };
222
223 public void addHighlight (int start, int end, short number) {
Nils Diewaldcde69082014-01-16 15:46:48 +0000224 this.addHighlight(new Highlight(start, end, (int) number));
Nils Diewaldf399a672013-11-18 17:55:22 +0000225 };
226
227 public void addHighlight (int start, int end, int number) {
Nils Diewaldcde69082014-01-16 15:46:48 +0000228 this.addHighlight(new Highlight(start, end, number));
229 };
230
Nils Diewald498d5982014-03-03 20:09:22 +0000231
232 /**
233 * Insert a highlight for the snippet view.
234 *
235 * @param hl A highlight object to add to the match.
236 */
Nils Diewaldcde69082014-01-16 15:46:48 +0000237 public void addHighlight (Highlight hl) {
238
Nils Diewaldf399a672013-11-18 17:55:22 +0000239 if (this.highlight == null)
Nils Diewaldcde69082014-01-16 15:46:48 +0000240 this.highlight = new ArrayList<Highlight>(16);
Nils Diewald82a4b862014-02-20 21:17:41 +0000241
242 if (DEBUG)
Nils Diewald498d5982014-03-03 20:09:22 +0000243 log.trace("Add highlight from pos {}-{} of class {}",
244 hl.start, hl.end, hl.number);
Nils Diewaldf399a672013-11-18 17:55:22 +0000245
Nils Diewald498d5982014-03-03 20:09:22 +0000246 // Reset the fetched match data
Nils Diewald833fe7e2013-12-14 16:06:33 +0000247 this._reset();
248
Nils Diewaldcde69082014-01-16 15:46:48 +0000249 this.highlight.add(hl);
Nils Diewaldf399a672013-11-18 17:55:22 +0000250 };
251
Nils Diewaldcde69082014-01-16 15:46:48 +0000252
Nils Diewald498d5982014-03-03 20:09:22 +0000253 /**
254 * Insert a textual annotation for the snippet view by
255 * means of positional offsets and an annotation string.
256 *
257 * @param start Integer value of a span's positional start offset.
258 * @param end Integer value of a span's positional end offset.
259 * @param annotation Annotation string.
260 */
Nils Diewaldcde69082014-01-16 15:46:48 +0000261 public void addAnnotation (int start, int end, String annotation) {
262 this.addHighlight(new Highlight(start, end, annotation));
263 };
264
Nils Diewald498d5982014-03-03 20:09:22 +0000265
266 /**
267 * Insert an annotated relation for the snippet view by
268 * means of relational participant positions and an annotation string.
269 *
270 * @param src Integer value of a span's positional source object.
271 * @param target Integer value of a span's positional target object.
272 * @param annotation Annotation string.
273 */
Nils Diewald345bdc02014-01-21 21:48:57 +0000274 public void addRelation (int src, int target, String annotation) {
275 this.addHighlight(new Highlight(src, src, annotation, target));
276 int id = identifierNumberCounter--;
277 identifierNumber.put(id, target);
278 this.addHighlight(new Highlight(target, target, id));
279 };
280
Nils Diewaldcde69082014-01-16 15:46:48 +0000281
Nils Diewald498d5982014-03-03 20:09:22 +0000282 /**
283 * Populate document meta information with information coming from the index.
284 *
285 * @param doc Document object.
286 * @param field Primary data field.
287 * @param fields Hash object with all supported fields.
288 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000289 public void populateDocument (Document doc, String field, HashSet<String> fields) {
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000290 this.setField(field);
Nils Diewald498d5982014-03-03 20:09:22 +0000291 this.setPrimaryData( new KorapPrimaryData(doc.get(field)) );
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000292 if (fields.contains("corpusID"))
293 this.setCorpusID(doc.get("corpusID"));
294 if (fields.contains("ID"))
295 this.setDocID(doc.get("ID"));
296 if (fields.contains("author"))
297 this.setAuthor(doc.get("author"));
298 if (fields.contains("textClass"))
299 this.setTextClass(doc.get("textClass"));
300 if (fields.contains("title"))
301 this.setTitle(doc.get("title"));
302 if (fields.contains("subTitle"))
303 this.setSubTitle(doc.get("subTitle"));
304 if (fields.contains("pubDate"))
305 this.setPubDate(doc.get("pubDate"));
306 if (fields.contains("pubPlace"))
307 this.setPubPlace(doc.get("pubPlace"));
308
309 // Temporary (later meta fields in term vector)
310 if (fields.contains("foundries"))
311 this.setFoundries(doc.get("foundries"));
312 if (fields.contains("tokenization"))
313 this.setTokenization(doc.get("tokenization"));
314 if (fields.contains("layerInfo"))
315 this.setLayerInfo(doc.get("layerInfo"));
316 };
317
Nils Diewald498d5982014-03-03 20:09:22 +0000318
319 /**
320 * Get document id.
321 */
Nils Diewald010c10f2013-12-17 01:58:31 +0000322 @JsonProperty("docID")
323 public String getDocID () {
324 return super.getID();
325 };
326
Nils Diewald498d5982014-03-03 20:09:22 +0000327
328 /**
329 * Set document id.
330 *
331 * @param id String representation of document ID.
332 */
Nils Diewald364eb642013-12-22 15:03:01 +0000333 public void setDocID (String id) {
334 super.setID(id);
335 };
336
Nils Diewald498d5982014-03-03 20:09:22 +0000337
338 /**
339 * Set version of the index
340 */
341 @JsonIgnore
342 public String getVersion () {
343 if (this.version == null)
344 return null;
345 StringBuilder sb = new StringBuilder("lucene-backend-");
346 return sb.append(this.version).toString();
347 };
348
349
350 /**
351 * Set version number.
352 *
353 * @param version The version number of the index as
354 * a string representation.
355 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000356 @JsonIgnore
Nils Diewaldcdd465b2014-02-24 18:47:38 +0000357 public void setVersion (String version) {
358 this.version = version;
359 };
360
Nils Diewaldcdd465b2014-02-24 18:47:38 +0000361
Nils Diewald498d5982014-03-03 20:09:22 +0000362 /**
363 * Get the positional start offset of the match.
364 */
Nils Diewaldcdd465b2014-02-24 18:47:38 +0000365 @JsonIgnore
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000366 public int getStartPos() {
367 return this.startPos;
368 };
369
Nils Diewald498d5982014-03-03 20:09:22 +0000370
371 /**
372 * Set the positional start offset of the match.
373 *
374 * @param pos The positional offset.
375 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000376 @JsonIgnore
377 public void setStartPos(int pos) {
378 this.startPos = pos;
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000379 };
380
Nils Diewald498d5982014-03-03 20:09:22 +0000381
382 /**
383 * Get the positional end offset of the match.
384 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000385 @JsonIgnore
386 public int getEndPos() {
387 return this.endPos;
388 };
389
Nils Diewald498d5982014-03-03 20:09:22 +0000390
391 /**
392 * Set the positional end offset of the match.
393 *
394 * @param pos The positional offset.
395 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000396 @JsonIgnore
397 public void setEndPos(int pos) {
398 this.endPos = pos;
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000399 };
400
Nils Diewald498d5982014-03-03 20:09:22 +0000401
402 /**
403 * Get the local (i.e. Lucene given) ID of the document.
404 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000405 @JsonIgnore
406 public int getLocalDocID () {
407 return this.localDocID;
408 };
409
Nils Diewald498d5982014-03-03 20:09:22 +0000410
411 /**
412 * Set the local (i.e. Lucene given) ID of the document.
413 *
414 * @param id The id of the document.
415 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000416 @JsonIgnore
417 public void setLocalDocID (int id) {
418 this.localDocID = id;
419 };
420
Nils Diewald498d5982014-03-03 20:09:22 +0000421
422 /**
423 * Get the PositionsToOffset object.
424 *
425 * @see PositionsToOffset
426 */
Nils Diewaldbfe554b2014-01-09 19:35:05 +0000427 @JsonIgnore
428 public PositionsToOffset getPositionsToOffset () {
429 return this.positionsToOffset;
430 };
431
Nils Diewald498d5982014-03-03 20:09:22 +0000432
433 /**
434 * Set the PositionsToOffset object.
435 *
436 * @param pto The PositionsToOffset object
437 * @see PositionsToOffset
438 */
439 @JsonIgnore
440 public void setPositionsToOffset (PositionsToOffset pto) {
441 this.positionsToOffset = pto;
442 };
443
444
445 /**
446 * Get match ID (for later retrieval).
447 *
448 * @see MatchIdentifier
449 */
Nils Diewald010c10f2013-12-17 01:58:31 +0000450 @Override
451 @JsonProperty("ID")
452 public String getID () {
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000453
Nils Diewald498d5982014-03-03 20:09:22 +0000454 // Identifier already given
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000455 if (this.identifier != null)
456 return this.identifier;
457
Nils Diewald498d5982014-03-03 20:09:22 +0000458 // No, nada, nix
Nils Diewaldcde69082014-01-16 15:46:48 +0000459 if (this.localDocID == -1)
460 return null;
461
462 MatchIdentifier id = new MatchIdentifier();
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000463
464 // Get prefix string corpus/doc
Nils Diewaldcde69082014-01-16 15:46:48 +0000465 id.setCorpusID(this.getCorpusID());
466 id.setDocID(this.getDocID());
467 id.setStartPos(startPos);
468 id.setEndPos(endPos);
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000469
Nils Diewald498d5982014-03-03 20:09:22 +0000470 // There are highlights to integrate
Nils Diewalda115a332014-01-07 13:59:09 +0000471 if (this.highlight != null) {
Nils Diewaldcde69082014-01-16 15:46:48 +0000472 for (Highlight h : this.highlight) {
473 if (h.number >= 256)
474 continue;
475
Nils Diewald498d5982014-03-03 20:09:22 +0000476 // Add highlight to the snippet
Nils Diewaldcde69082014-01-16 15:46:48 +0000477 id.addPos(h.start, h.end, h.number);
Nils Diewalda115a332014-01-07 13:59:09 +0000478 };
479 };
480
Nils Diewaldcde69082014-01-16 15:46:48 +0000481 return (this.identifier = id.toString());
Nils Diewald010c10f2013-12-17 01:58:31 +0000482 };
483
Nils Diewald498d5982014-03-03 20:09:22 +0000484
485 /**
486 * Get identifier for a specific position.
487 *
488 * @param int Position to get identifier on.
489 */
Nils Diewald345bdc02014-01-21 21:48:57 +0000490 @JsonIgnore
491 public String getPosID (int pos) {
Nils Diewald498d5982014-03-03 20:09:22 +0000492
493 // Identifier already given
Nils Diewald345bdc02014-01-21 21:48:57 +0000494 if (this.identifier != null)
495 return this.identifier;
496
Nils Diewald498d5982014-03-03 20:09:22 +0000497 // Nothing here
Nils Diewald345bdc02014-01-21 21:48:57 +0000498 if (this.localDocID == -1)
499 return null;
500
501 PosIdentifier id = new PosIdentifier();
502
503 // Get prefix string corpus/doc
504 id.setCorpusID(this.getCorpusID());
505 id.setDocID(this.getDocID());
506 id.setPos(pos);
507
508 return id.toString();
509 };
510
Nils Diewald498d5982014-03-03 20:09:22 +0000511 /**
512 * Get possible error message.
513 */
514 // Identical to KorapResult
515 public String getError () {
516 return this.error;
517 };
518
519 /**
520 * Set error message.
521 *
522 * @param msg The error message.
523 */
524 public void setError (String msg) {
525 this.error = msg;
526 };
527
528
Nils Diewald1e5d5942014-05-20 13:29:53 +0000529 public KorapMatch setContext (SearchContext context) {
530 this.context = context;
531 return this;
532 };
533
534 @JsonIgnore
535 public SearchContext getContext () {
536 if (this.context == null)
537 this.context = new SearchContext();
538 return this.context;
539 };
540
541
542 // Expand the context to a span
543 public int[] expandContextToSpan (String element) {
544
545 // TODO: THE BITS HAVE TO BE SET!
546
547 if (this.positionsToOffset != null)
548 return this.expandContextToSpan(
549 this.positionsToOffset.getAtomicReader(),
550 (Bits) null,
551 "tokens",
552 element
553 );
554 return new int[]{0,0,0,0};
555 };
556
557 // Expand the context to a span
Nils Diewald84934372014-05-20 13:48:18 +0000558 // THIS IS NOT VERY CLEVER - MAKE IT MORE CLEVER!
Nils Diewald1e5d5942014-05-20 13:29:53 +0000559 public int[] expandContextToSpan (AtomicReaderContext atomic,
560 Bits bitset,
561 String field,
562 String element) {
563
564 try {
565 // Store character offsets in ByteBuffer
566 ByteBuffer bb = ByteBuffer.allocate(8);
567
568 SpanElementQuery cquery =
569 new SpanElementQuery(field, element);
570
571 Spans contextSpans = cquery.getSpans(
572 atomic,
573 bitset,
574 new HashMap<Term, TermContext>()
575 );
576
577 int newStart = -1,
578 newEnd = -1;
579 int newStartChar = -1,
580 newEndChar = -1;
581
582 if (DEBUG)
583 log.trace("Extend match to context boundary with {} in {}",
584 cquery.toString(),
585 this.localDocID);
586
587 while (true) {
588
589 // Game over
590 if (contextSpans.next() != true)
591 break;
592
593 if (contextSpans.doc() != this.localDocID) {
594 contextSpans.skipTo(this.localDocID);
595 if (contextSpans.doc() != this.localDocID)
596 break;
597 };
598
599 // There's a <context> found -- I'm curious,
600 // if it's closer to the match than everything before
601 if (contextSpans.start() <= this.getStartPos() &&
602 contextSpans.end() >= this.getStartPos()) {
603
604 // Set as newStart
605 newStart = contextSpans.start() > newStart ?
606 contextSpans.start() : newStart;
607
Nils Diewald84934372014-05-20 13:48:18 +0000608 if (DEBUG)
609 log.trace("NewStart is at {}", newStart);
610
Nils Diewald1e5d5942014-05-20 13:29:53 +0000611 // Get character offset (start)
612 if (contextSpans.isPayloadAvailable()) {
613 try {
614 bb.rewind();
615 for (byte[] b : contextSpans.getPayload()) {
616
617 // Not an element span
618 if (b.length != 8)
619 continue;
620
621 bb.put(b);
622 bb.rewind();
623 newStartChar = bb.getInt();
624 newEndChar = bb.getInt();
625 break;
626 };
627 }
628 catch (Exception e) {
629 log.warn(e.getMessage());
630 };
631 };
632 }
633 else {
634 // Has to be resettet to avoid multiple readings of the payload
635 newEndChar = 0;
636 };
637
638 // There's an s found, that ends after the match
639 if (contextSpans.end() >= this.getEndPos()) {
640 newEnd = contextSpans.end();
641
642 // Get character offset (end)
643 if (newEndChar == 0 && contextSpans.isPayloadAvailable()) {
644 try {
645 bb.rewind();
646 for (byte[] b : contextSpans.getPayload()) {
647
648 // Not an element span
649 if (b.length != 8)
650 continue;
651
652 bb.put(b);
653 bb.rewind();
654 newEndChar = bb.getInt(1);
655 break;
656 };
657 }
658 catch (Exception e) {
659 log.warn(e.getMessage());
660 };
661 };
662 break;
663 };
664 };
665
666 // We have a new match surrounding
667 if (DEBUG)
668 log.trace("New match spans from {}-{}/{}-{}", newStart, newEnd, newStartChar, newEndChar);
669
670 return new int[]{newStart, newEnd, newStartChar, newEndChar};
671 }
672 catch (IOException e) {
673 log.error(e.getMessage());
674 };
675
676 return new int[]{-1,-1,-1,-1};
677 };
678
Nils Diewald498d5982014-03-03 20:09:22 +0000679
680 // Reset all internal data
Nils Diewald833fe7e2013-12-14 16:06:33 +0000681 private void _reset () {
Nils Diewald498d5982014-03-03 20:09:22 +0000682 this.processed = false;
683 this.snippetHTML = null;
Nils Diewald833fe7e2013-12-14 16:06:33 +0000684 this.snippetBrackets = null;
Nils Diewald498d5982014-03-03 20:09:22 +0000685 this.identifier = null;
686
687 // Delete all spans
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000688 if (this.span != null)
689 this.span.clear();
Nils Diewaldf399a672013-11-18 17:55:22 +0000690 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000691
Nils Diewald498d5982014-03-03 20:09:22 +0000692
Nils Diewald833fe7e2013-12-14 16:06:33 +0000693 // Start building highlighted snippets
Nils Diewaldcde69082014-01-16 15:46:48 +0000694 private boolean _processHighlight () {
Nils Diewald3caa00d2013-12-13 02:24:04 +0000695 if (processed)
Nils Diewaldcde69082014-01-16 15:46:48 +0000696 return true;
697
Nils Diewald498d5982014-03-03 20:09:22 +0000698 // Relevant details are missing
Nils Diewaldcde69082014-01-16 15:46:48 +0000699 if (this.positionsToOffset == null || this.localDocID == -1) {
700 log.warn("You have to define " +
701 "positionsToOffset and localDocID first " +
702 "before");
703 return false;
704 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000705
Nils Diewald82a4b862014-02-20 21:17:41 +0000706 if (DEBUG)
Nils Diewald498d5982014-03-03 20:09:22 +0000707 log.trace("--- Start highlight processing ...");
Nils Diewaldcde69082014-01-16 15:46:48 +0000708
Nils Diewald498d5982014-03-03 20:09:22 +0000709 // Get pto object
Nils Diewaldcde69082014-01-16 15:46:48 +0000710 PositionsToOffset pto = this.positionsToOffset;
711 pto.add(this.localDocID, this.getStartPos());
712 pto.add(this.localDocID, this.getEndPos() - 1);
713
Nils Diewald82a4b862014-02-20 21:17:41 +0000714 if (DEBUG)
Nils Diewald498d5982014-03-03 20:09:22 +0000715 log.trace("PTO will retrieve {} & {} (Match boundary)",
716 this.getStartPos(),
717 this.getEndPos());
Nils Diewaldcde69082014-01-16 15:46:48 +0000718
Nils Diewald498d5982014-03-03 20:09:22 +0000719 // Add all highlights for character retrieval
Nils Diewaldcde69082014-01-16 15:46:48 +0000720 if (this.highlight != null) {
721 for (Highlight hl : this.highlight) {
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000722 if (hl.start >= this.getStartPos() && hl.end <= this.getEndPos()) {
723 pto.add(this.localDocID, hl.start);
724 pto.add(this.localDocID, hl.end);
Nils Diewald498d5982014-03-03 20:09:22 +0000725
Nils Diewaldd4401ec2014-06-16 17:04:02 +0000726 if (DEBUG)
727 log.trace("PTO will retrieve {} & {} (Highlight boundary)",
728 hl.start, hl.end);
729 };
Nils Diewaldcde69082014-01-16 15:46:48 +0000730 };
731 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000732
733 // Get the list of spans for matches and highlighting
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000734 if (this.span == null || this.span.size() == 0) {
Nils Diewald1e5d5942014-05-20 13:29:53 +0000735 if (!this._processHighlightSpans())
Nils Diewaldcde69082014-01-16 15:46:48 +0000736 return false;
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000737 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000738
Nils Diewald498d5982014-03-03 20:09:22 +0000739 // Create a stack for highlighted elements
740 // (opening and closing elements)
Nils Diewald2cd1c3d2014-01-08 22:53:08 +0000741 ArrayList<int[]> stack = this._processHighlightStack();
Nils Diewaldf399a672013-11-18 17:55:22 +0000742
Nils Diewald1e5d5942014-05-20 13:29:53 +0000743 if (DEBUG)
744 log.trace("The snippet is {}", this.tempSnippet);
745
746
Nils Diewald498d5982014-03-03 20:09:22 +0000747 // The temporary snippet is empty, nothing to do
Nils Diewald3caa00d2013-12-13 02:24:04 +0000748 if (this.tempSnippet == null) {
749 processed = true;
Nils Diewaldcde69082014-01-16 15:46:48 +0000750 return false;
Nils Diewald3caa00d2013-12-13 02:24:04 +0000751 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000752
Nils Diewald833fe7e2013-12-14 16:06:33 +0000753 // Merge the element stack with the primary textual data
Nils Diewaldf399a672013-11-18 17:55:22 +0000754 this._processHighlightSnippet(this.tempSnippet, stack);
755
Nils Diewald833fe7e2013-12-14 16:06:33 +0000756 // Match is processed - done
Nils Diewaldcde69082014-01-16 15:46:48 +0000757 return (processed = true);
Nils Diewaldf399a672013-11-18 17:55:22 +0000758 };
759
Nils Diewald498d5982014-03-03 20:09:22 +0000760
Nils Diewald833fe7e2013-12-14 16:06:33 +0000761 /*
762 Comparator class for opening tags
763 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000764 private class OpeningTagComparator implements Comparator<int[]> {
765 @Override
766 public int compare (int[] arg0, int[] arg1) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000767 // Check start positions
Nils Diewaldf399a672013-11-18 17:55:22 +0000768 if (arg0[0] > arg1[0]) {
769 return 1;
770 }
771 else if (arg0[0] == arg1[0]) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000772 // Check endpositions
Nils Diewald439eae82014-07-15 18:35:34 +0000773 if (arg0[1] > arg1[1]) {
Nils Diewaldf399a672013-11-18 17:55:22 +0000774 return -1;
Nils Diewald439eae82014-07-15 18:35:34 +0000775 }
776 else if (arg0[1] == arg1[1]) {
777 return 0;
778 }
Nils Diewaldf399a672013-11-18 17:55:22 +0000779 return 1;
780 };
781 return -1;
782 };
783 };
784
Nils Diewald833fe7e2013-12-14 16:06:33 +0000785 /*
786 Comparator class for closing tags
787 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000788 private class ClosingTagComparator implements Comparator<int[]> {
789 @Override
790 public int compare (int[] arg0, int[] arg1) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000791 // Check end positions
Nils Diewaldf399a672013-11-18 17:55:22 +0000792 if (arg0[1] > arg1[1]) {
793 return 1;
794 }
795 else if (arg0[1] == arg1[1]) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000796 // Check start positions
Nils Diewald439eae82014-07-15 18:35:34 +0000797 if (arg0[0] < arg1[0]) {
Nils Diewaldf399a672013-11-18 17:55:22 +0000798 return 1;
Nils Diewald439eae82014-07-15 18:35:34 +0000799 }
800 else if (arg0[0] == arg1[0]) {
801 return 0;
802 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000803 return -1;
804 };
805 return -1;
806 };
807 };
808
Nils Diewald833fe7e2013-12-14 16:06:33 +0000809 /*
810 Private class for elements with highlighting information
811 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000812 private class HighlightCombinatorElement {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000813
814 // Type 0: Textual data
815 // Type 1: Opening
816 // Type 2: Closing
817 private byte type;
818
819 private int number = 0;
Nils Diewald833fe7e2013-12-14 16:06:33 +0000820
Nils Diewaldf399a672013-11-18 17:55:22 +0000821 private String characters;
Nils Diewald8c221782013-12-13 19:52:58 +0000822 private boolean terminal = true;
Nils Diewaldf399a672013-11-18 17:55:22 +0000823
Nils Diewald833fe7e2013-12-14 16:06:33 +0000824 // Constructor for highlighting elements
825 public HighlightCombinatorElement (byte type, int number) {
Nils Diewaldf399a672013-11-18 17:55:22 +0000826 this.type = type;
827 this.number = number;
828 };
829
Nils Diewald833fe7e2013-12-14 16:06:33 +0000830 // Constructor for highlighting elements,
831 // that may not be terminal, i.e. they were closed and will
832 // be reopened for overlapping issues.
833 public HighlightCombinatorElement (byte type, int number, boolean terminal) {
834 this.type = type;
835 this.number = number;
Nils Diewald8c221782013-12-13 19:52:58 +0000836 this.terminal = terminal;
837 };
838
Nils Diewald833fe7e2013-12-14 16:06:33 +0000839 // Constructor for textual data
Nils Diewaldf399a672013-11-18 17:55:22 +0000840 public HighlightCombinatorElement (String characters) {
Nils Diewald833fe7e2013-12-14 16:06:33 +0000841 this.type = (byte) 0;
Nils Diewaldf399a672013-11-18 17:55:22 +0000842 this.characters = characters;
843 };
844
Nils Diewald833fe7e2013-12-14 16:06:33 +0000845 // Return html fragment for this combinator element
Nils Diewald345bdc02014-01-21 21:48:57 +0000846 public String toHTML (KorapMatch match, FixedBitSet level, byte[] levelCache) {
Nils Diewald8c221782013-12-13 19:52:58 +0000847 // Opening
Nils Diewaldf399a672013-11-18 17:55:22 +0000848 if (this.type == 1) {
849 StringBuilder sb = new StringBuilder();
Nils Diewaldf399a672013-11-18 17:55:22 +0000850 if (this.number == -1) {
Nils Diewald3caa00d2013-12-13 02:24:04 +0000851 sb.append("<span class=\"match\">");
Nils Diewaldf399a672013-11-18 17:55:22 +0000852 }
Nils Diewald345bdc02014-01-21 21:48:57 +0000853
854 else if (this.number < -1) {
855 sb.append("<span xml:id=\"")
856 .append(match.getPosID(
857 identifierNumber.get(this.number)))
Nils Diewaldcde69082014-01-16 15:46:48 +0000858 .append("\">");
859 }
Nils Diewald345bdc02014-01-21 21:48:57 +0000860
861 else if (this.number >= 256) {
862 sb.append("<span ");
863 if (this.number < 2048) {
864 sb.append("title=\"")
865 .append(annotationNumber.get(this.number))
866 .append('"');
867 }
868 else {
869 Relation rel = relationNumber.get(this.number);
870 sb.append("xlink:title=\"")
871 .append(rel.annotation)
872 .append('"');
873 sb.append(" xlink:type=\"simple\"");
874 sb.append(" xlink:href=\"#");
875 sb.append(match.getPosID(rel.ref));
876 sb.append('"');
877 };
878 sb.append('>');
879 }
Nils Diewaldf399a672013-11-18 17:55:22 +0000880 else {
Nils Diewald8c221782013-12-13 19:52:58 +0000881 // Get the first free level slot
882 byte pos;
883 if (levelCache[this.number] != '\0') {
884 pos = levelCache[this.number];
885 }
886 else {
887 pos = (byte) level.nextSetBit(0);
888 level.clear(pos);
889 levelCache[this.number] = pos;
Nils Diewald3caa00d2013-12-13 02:24:04 +0000890 };
891 sb.append("<em class=\"class-")
892 .append(this.number)
893 .append(" level-")
Nils Diewald8c221782013-12-13 19:52:58 +0000894 .append(pos)
Nils Diewald3caa00d2013-12-13 02:24:04 +0000895 .append("\">");
Nils Diewaldf399a672013-11-18 17:55:22 +0000896 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000897 return sb.toString();
898 }
Nils Diewald8c221782013-12-13 19:52:58 +0000899 // Closing
Nils Diewaldf399a672013-11-18 17:55:22 +0000900 else if (this.type == 2) {
Nils Diewald345bdc02014-01-21 21:48:57 +0000901 if (this.number <= -1 || this.number >= 256)
Nils Diewald3caa00d2013-12-13 02:24:04 +0000902 return "</span>";
Nils Diewald8c221782013-12-13 19:52:58 +0000903
904 if (this.terminal)
905 level.set((int) levelCache[this.number]);
Nils Diewald3caa00d2013-12-13 02:24:04 +0000906 return "</em>";
Nils Diewaldf399a672013-11-18 17:55:22 +0000907 };
Nils Diewald833fe7e2013-12-14 16:06:33 +0000908
909 // HTML encode primary data
Nils Diewaldf399a672013-11-18 17:55:22 +0000910 return encodeHTML(this.characters);
911 };
912
Nils Diewald833fe7e2013-12-14 16:06:33 +0000913 // Return bracket fragment for this combinator element
Nils Diewaldf399a672013-11-18 17:55:22 +0000914 public String toBrackets () {
915 if (this.type == 1) {
916 StringBuilder sb = new StringBuilder();
Nils Diewald345bdc02014-01-21 21:48:57 +0000917
918 // Match
Nils Diewaldf399a672013-11-18 17:55:22 +0000919 if (this.number == -1) {
920 sb.append("[");
921 }
Nils Diewald345bdc02014-01-21 21:48:57 +0000922
923 // Identifier
924 else if (this.number < -1) {
925 sb.append("{#");
926 sb.append(identifierNumber.get(this.number));
927 sb.append(':');
928 }
929
930 // Highlight, Relation, Span
Nils Diewaldf399a672013-11-18 17:55:22 +0000931 else {
932 sb.append("{");
Nils Diewald345bdc02014-01-21 21:48:57 +0000933 if (this.number >= 256) {
934 if (this.number < 2048)
935 sb.append(annotationNumber.get(this.number));
936 else {
937 Relation rel = relationNumber.get(this.number);
938 sb.append(rel.annotation);
939 sb.append('>').append(rel.ref);
940 };
941 sb.append(':');
942 }
Nils Diewaldcde69082014-01-16 15:46:48 +0000943 else if (this.number != 0)
Nils Diewaldf399a672013-11-18 17:55:22 +0000944 sb.append(this.number).append(':');
945 };
946 return sb.toString();
947 }
948 else if (this.type == 2) {
Nils Diewald3caa00d2013-12-13 02:24:04 +0000949 if (this.number == -1)
Nils Diewaldf399a672013-11-18 17:55:22 +0000950 return "]";
Nils Diewaldf399a672013-11-18 17:55:22 +0000951 return "}";
952 };
953 return this.characters;
954 };
Nils Diewaldf399a672013-11-18 17:55:22 +0000955 };
956
Nils Diewald833fe7e2013-12-14 16:06:33 +0000957 /*
958 Private class for combining highlighting elements
959 */
Nils Diewaldf399a672013-11-18 17:55:22 +0000960 private class HighlightCombinator {
961 private LinkedList<HighlightCombinatorElement> combine;
962 private LinkedList<Integer> balanceStack = new LinkedList<>();
963 private ArrayList<Integer> tempStack = new ArrayList<>(32);
964
Nils Diewald833fe7e2013-12-14 16:06:33 +0000965 // Empty constructor
Nils Diewaldf399a672013-11-18 17:55:22 +0000966 public HighlightCombinator () {
967 this.combine = new LinkedList<>();
968 };
969
Nils Diewald833fe7e2013-12-14 16:06:33 +0000970 // Return the combination stack
Nils Diewaldf399a672013-11-18 17:55:22 +0000971 public LinkedList<HighlightCombinatorElement> stack () {
972 return this.combine;
973 };
974
Nils Diewald833fe7e2013-12-14 16:06:33 +0000975 // get the first element (without removing)
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000976 public HighlightCombinatorElement getFirst () {
977 return this.combine.getFirst();
978 };
979
Nils Diewald833fe7e2013-12-14 16:06:33 +0000980 // get the last element (without removing)
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000981 public HighlightCombinatorElement getLast () {
982 return this.combine.getLast();
983 };
984
Nils Diewald833fe7e2013-12-14 16:06:33 +0000985 // get an element by index (without removing)
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000986 public HighlightCombinatorElement get (int index) {
987 return this.combine.get(index);
988 };
989
Nils Diewald833fe7e2013-12-14 16:06:33 +0000990 // Get the size of te combinator stack
Nils Diewaldf3b30ae2013-11-27 17:42:37 +0000991 public short size () {
992 return (short) this.combine.size();
993 };
994
Nils Diewald833fe7e2013-12-14 16:06:33 +0000995 // Add primary data to the stack
Nils Diewaldf399a672013-11-18 17:55:22 +0000996 public void addString (String characters) {
997 this.combine.add(new HighlightCombinatorElement(characters));
998 };
999
Nils Diewald833fe7e2013-12-14 16:06:33 +00001000 // Add opening highlight combinator to the stack
Nils Diewaldf399a672013-11-18 17:55:22 +00001001 public void addOpen (int number) {
Nils Diewald833fe7e2013-12-14 16:06:33 +00001002 this.combine.add(new HighlightCombinatorElement((byte) 1, number));
Nils Diewaldf399a672013-11-18 17:55:22 +00001003 this.balanceStack.add(number);
1004 };
1005
Nils Diewald833fe7e2013-12-14 16:06:33 +00001006 // Add closing highlight combinator to the stack
Nils Diewaldf399a672013-11-18 17:55:22 +00001007 public void addClose (int number) {
1008 HighlightCombinatorElement lastComb;
1009 this.tempStack.clear();
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001010
Nils Diewald20607ab2014-03-20 23:28:36 +00001011 // Shouldn't happen
1012 if (this.balanceStack.size() == 0) {
1013 if (DEBUG)
1014 log.trace("The balance stack is empty");
1015 return;
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001016 };
Nils Diewald20607ab2014-03-20 23:28:36 +00001017
1018 if (DEBUG) {
1019 StringBuilder sb = new StringBuilder(
1020 "Stack for checking with class "
1021 );
1022 sb.append(number).append(" is ");
1023 for (int s : this.balanceStack) {
1024 sb.append('[').append(s).append(']');
1025 };
Nils Diewald82a4b862014-02-20 21:17:41 +00001026 log.trace(sb.toString());
Nils Diewald20607ab2014-03-20 23:28:36 +00001027 };
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001028
1029 // class number of the last element
Nils Diewaldf399a672013-11-18 17:55:22 +00001030 int eold = this.balanceStack.removeLast();
Nils Diewald8c221782013-12-13 19:52:58 +00001031
1032 // the closing element is not balanced
Nils Diewaldf399a672013-11-18 17:55:22 +00001033 while (eold != number) {
Nils Diewald8c221782013-12-13 19:52:58 +00001034
1035 // Retrieve last combinator on stack
Nils Diewaldf399a672013-11-18 17:55:22 +00001036 lastComb = this.combine.peekLast();
Nils Diewald8c221782013-12-13 19:52:58 +00001037
Nils Diewald82a4b862014-02-20 21:17:41 +00001038 if (DEBUG)
1039 log.trace("Closing element is unbalanced - {} " +
1040 "!= {} with lastComb {}|{}|{}",
1041 eold,
1042 number,
1043 lastComb.type,
1044 lastComb.number,
1045 lastComb.characters);
Nils Diewald010c10f2013-12-17 01:58:31 +00001046
Nils Diewald8c221782013-12-13 19:52:58 +00001047 // combinator is opening and the number is not equal to the last
1048 // element on the balanceStack
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001049 if (lastComb.type == 1 && lastComb.number == eold) {
1050
Nils Diewald8c221782013-12-13 19:52:58 +00001051 // Remove the last element - it's empty and uninteresting!
Nils Diewaldf399a672013-11-18 17:55:22 +00001052 this.combine.removeLast();
1053 }
Nils Diewald8c221782013-12-13 19:52:58 +00001054
1055 // combinator is either closing (??) or another opener
Nils Diewaldf399a672013-11-18 17:55:22 +00001056 else {
Nils Diewald8c221782013-12-13 19:52:58 +00001057
Nils Diewald82a4b862014-02-20 21:17:41 +00001058 if (DEBUG)
1059 log.trace("close element a) {}", eold);
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001060
Nils Diewald8c221782013-12-13 19:52:58 +00001061 // Add a closer for the old element (this has following elements)
Nils Diewald833fe7e2013-12-14 16:06:33 +00001062 this.combine.add(new HighlightCombinatorElement((byte) 2, eold, false));
Nils Diewaldf399a672013-11-18 17:55:22 +00001063 };
Nils Diewald8c221782013-12-13 19:52:58 +00001064
1065 // add this element number temporarily on the stack
Nils Diewaldf399a672013-11-18 17:55:22 +00001066 tempStack.add(eold);
Nils Diewald8c221782013-12-13 19:52:58 +00001067
1068 // Check next element
Nils Diewaldf399a672013-11-18 17:55:22 +00001069 eold = this.balanceStack.removeLast();
1070 };
Nils Diewald8c221782013-12-13 19:52:58 +00001071
1072 // Get last combinator on the stack
Nils Diewaldf399a672013-11-18 17:55:22 +00001073 lastComb = this.combine.peekLast();
Nils Diewald8c221782013-12-13 19:52:58 +00001074
Nils Diewald82a4b862014-02-20 21:17:41 +00001075 if (DEBUG) {
1076 log.trace("LastComb: " + lastComb.type + '|' + lastComb.number + '|' + lastComb.characters + " for " + number);
1077 log.trace("Stack for checking 2: {}|{}|{}|{}", lastComb.type, lastComb.number, lastComb.characters, number);
1078 };
Nils Diewald010c10f2013-12-17 01:58:31 +00001079
1080 if (lastComb.type == 1 && lastComb.number == number) {
1081 while (lastComb.type == 1 && lastComb.number == number) {
1082 // Remove the damn thing - It's empty and uninteresting!
1083 this.combine.removeLast();
1084 lastComb = this.combine.peekLast();
1085 };
1086 }
1087 else {
Nils Diewald82a4b862014-02-20 21:17:41 +00001088 if (DEBUG)
1089 log.trace("close element b) {}", number);
Nils Diewald4fca3ff2013-12-29 22:59:13 +00001090
Nils Diewald010c10f2013-12-17 01:58:31 +00001091 // Add a closer
1092 this.combine.add(new HighlightCombinatorElement((byte) 2, number));
1093 };
1094
1095
Nils Diewald8c221782013-12-13 19:52:58 +00001096 // Fetch everything from the tempstack and reopen it
Nils Diewaldf399a672013-11-18 17:55:22 +00001097 for (int e : tempStack) {
Nils Diewald82a4b862014-02-20 21:17:41 +00001098 if (DEBUG)
1099 log.trace("Reopen element {}", e);
Nils Diewald833fe7e2013-12-14 16:06:33 +00001100 combine.add(new HighlightCombinatorElement((byte) 1, e));
Nils Diewaldf399a672013-11-18 17:55:22 +00001101 balanceStack.add(e);
1102 };
1103 };
1104
Nils Diewald833fe7e2013-12-14 16:06:33 +00001105 // Get all combined elements as a string
Nils Diewaldf399a672013-11-18 17:55:22 +00001106 public String toString () {
1107 StringBuilder sb = new StringBuilder();
1108 for (HighlightCombinatorElement e : combine) {
1109 sb.append(e.toString()).append("\n");
1110 };
1111 return sb.toString();
1112 };
1113 };
1114
Nils Diewald498d5982014-03-03 20:09:22 +00001115 private void _processHighlightSnippet (String clean,
1116 ArrayList<int[]> stack) {
Nils Diewaldf399a672013-11-18 17:55:22 +00001117
Nils Diewald82a4b862014-02-20 21:17:41 +00001118 if (DEBUG)
Nils Diewald498d5982014-03-03 20:09:22 +00001119 log.trace("--- Process Highlight snippet");
1120
1121 int pos = 0,
1122 oldPos = 0;
Nils Diewaldf399a672013-11-18 17:55:22 +00001123
1124 this.snippetStack = new HighlightCombinator();
1125
1126 for (int[] element : stack) {
1127 pos = element[3] != 0 ? element[0] : element[1];
1128
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001129 if (pos > oldPos) {
Nils Diewaldda1722b2014-02-17 00:12:05 +00001130
Nils Diewald23bf4602014-02-18 15:47:20 +00001131 if (pos > clean.length()) {
Nils Diewaldda1722b2014-02-17 00:12:05 +00001132 pos = clean.length() - 1;
1133 };
1134
Nils Diewaldf399a672013-11-18 17:55:22 +00001135 snippetStack.addString(clean.substring(oldPos, pos));
1136
1137 oldPos = pos;
1138 };
1139
1140 if (element[3] != 0) {
1141 snippetStack.addOpen(element[2]);
1142 }
1143 else {
1144 snippetStack.addClose(element[2]);
1145 };
1146 };
1147
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001148 if (clean.length() > pos) {
1149 snippetStack.addString(clean.substring(pos));
1150 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001151 };
1152
1153 @Deprecated
1154 public String snippetHTML () {
1155 return this.getSnippetHTML();
1156 };
1157
1158 @JsonProperty("snippet")
1159 public String getSnippetHTML () {
Nils Diewaldcde69082014-01-16 15:46:48 +00001160
1161 if (!this._processHighlight())
1162 return null;
Nils Diewald3caa00d2013-12-13 02:24:04 +00001163
Nils Diewald833fe7e2013-12-14 16:06:33 +00001164 if (this.processed && this.snippetHTML != null)
Nils Diewaldf399a672013-11-18 17:55:22 +00001165 return this.snippetHTML;
1166
Nils Diewald82a4b862014-02-20 21:17:41 +00001167 if (DEBUG)
1168 log.trace("Create HTML Snippet");
Nils Diewald833fe7e2013-12-14 16:06:33 +00001169
Nils Diewaldf399a672013-11-18 17:55:22 +00001170 StringBuilder sb = new StringBuilder();
Nils Diewaldf399a672013-11-18 17:55:22 +00001171
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001172 short start = (short) 0;
Nils Diewald3caa00d2013-12-13 02:24:04 +00001173 short end = this.snippetStack.size();
Nils Diewald8c221782013-12-13 19:52:58 +00001174 FixedBitSet level = new FixedBitSet(16);
1175 level.set(0, 15);
1176 byte[] levelCache = new byte[16];
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001177
1178 HighlightCombinatorElement elem = this.snippetStack.getFirst();
1179
Nils Diewald3caa00d2013-12-13 02:24:04 +00001180 // Create context
1181 sb.append("<span class=\"context-left\">");
1182 if (startMore)
1183 sb.append("<span class=\"more\"></span>");
1184
1185 if (elem.type == 0) {
Nils Diewald345bdc02014-01-21 21:48:57 +00001186 sb.append(elem.toHTML(this, level, levelCache));
Nils Diewald3caa00d2013-12-13 02:24:04 +00001187 start++;
Nils Diewaldf399a672013-11-18 17:55:22 +00001188 };
Nils Diewald3caa00d2013-12-13 02:24:04 +00001189 sb.append("</span>");
Nils Diewaldf399a672013-11-18 17:55:22 +00001190
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001191 elem = this.snippetStack.getLast();
1192
1193 StringBuilder rightContext = new StringBuilder();
1194
1195 // Create context, if trhere is any
Nils Diewald3caa00d2013-12-13 02:24:04 +00001196 rightContext.append("<span class=\"context-right\">");
1197 if (elem != null && elem.type == 0) {
Nils Diewald345bdc02014-01-21 21:48:57 +00001198 rightContext.append(elem.toHTML(this, level, levelCache));
Nils Diewald3caa00d2013-12-13 02:24:04 +00001199 end--;
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001200 };
Nils Diewald3caa00d2013-12-13 02:24:04 +00001201 if (endMore)
1202 rightContext.append("<span class=\"more\"></span>");
1203 rightContext.append("</span>");
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001204
1205 for (short i = start; i < end; i++) {
Nils Diewald345bdc02014-01-21 21:48:57 +00001206 sb.append(this.snippetStack.get(i).toHTML(this, level,levelCache));
Nils Diewaldf3b30ae2013-11-27 17:42:37 +00001207 };
1208
Nils Diewald3caa00d2013-12-13 02:24:04 +00001209 sb.append(rightContext);
Nils Diewaldf399a672013-11-18 17:55:22 +00001210
1211 return (this.snippetHTML = sb.toString());
1212 };
1213
1214 @Deprecated
1215 public String snippetBrackets () {
1216 return this.getSnippetBrackets();
1217 };
1218
1219 @JsonIgnore
1220 public String getSnippetBrackets () {
Nils Diewald3caa00d2013-12-13 02:24:04 +00001221
Nils Diewaldcde69082014-01-16 15:46:48 +00001222 if (!this._processHighlight())
1223 return null;
Nils Diewald3caa00d2013-12-13 02:24:04 +00001224
Nils Diewald833fe7e2013-12-14 16:06:33 +00001225 if (this.processed && this.snippetBrackets != null)
Nils Diewaldf399a672013-11-18 17:55:22 +00001226 return this.snippetBrackets;
1227
1228 StringBuilder sb = new StringBuilder();
1229
1230 if (startMore)
1231 sb.append("... ");
1232
1233 for (HighlightCombinatorElement hce : this.snippetStack.stack()) {
1234 sb.append(hce.toBrackets());
1235 };
1236
1237 if (endMore)
1238 sb.append(" ...");
1239
1240 return (this.snippetBrackets = sb.toString());
1241 };
1242
1243
Nils Diewald3caa00d2013-12-13 02:24:04 +00001244 // This sorts all highlight and match spans to make them nesting correctly,
1245 // even in case they overlap
1246 // TODO: Not very fast - improve!
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00001247 private ArrayList<int[]> _processHighlightStack () {
Nils Diewald82a4b862014-02-20 21:17:41 +00001248 if (DEBUG)
Nils Diewald498d5982014-03-03 20:09:22 +00001249 log.trace("--- Process Highlight stack");
Nils Diewaldf399a672013-11-18 17:55:22 +00001250
Nils Diewaldf399a672013-11-18 17:55:22 +00001251 LinkedList<int[]> openList = new LinkedList<int[]>();
1252 LinkedList<int[]> closeList = new LinkedList<int[]>();
1253
Nils Diewaldd216a032014-04-30 17:40:19 +00001254 // Filter multiple identifiers, that may be introduced and would
1255 // result in invalid xml
Nils Diewald50389b02014-04-11 16:27:52 +00001256 this._filterMultipleIdentifiers();
1257
Nils Diewald498d5982014-03-03 20:09:22 +00001258 // Add highlight spans to balance lists
1259 openList.addAll(this.span);
1260 closeList.addAll(this.span);
Nils Diewaldf399a672013-11-18 17:55:22 +00001261
Nils Diewald498d5982014-03-03 20:09:22 +00001262 // Sort balance lists
Nils Diewaldf399a672013-11-18 17:55:22 +00001263 Collections.sort(openList, new OpeningTagComparator());
1264 Collections.sort(closeList, new ClosingTagComparator());
1265
Nils Diewald498d5982014-03-03 20:09:22 +00001266 // New stack array
Nils Diewaldf399a672013-11-18 17:55:22 +00001267 ArrayList<int[]> stack = new ArrayList<>(openList.size() * 2);
1268
Nils Diewald3caa00d2013-12-13 02:24:04 +00001269 // Create stack unless both lists are empty
Nils Diewaldf399a672013-11-18 17:55:22 +00001270 while (!openList.isEmpty() || !closeList.isEmpty()) {
1271
1272 if (openList.isEmpty()) {
1273 stack.addAll(closeList);
1274 break;
Nils Diewald20607ab2014-03-20 23:28:36 +00001275 }
1276
1277 // Not sure about this, but it can happen
1278 else if (closeList.isEmpty()) {
1279 break;
Nils Diewaldf399a672013-11-18 17:55:22 +00001280 };
1281
1282 if (openList.peekFirst()[0] < closeList.peekFirst()[1]) {
1283 int[] e = openList.removeFirst().clone();
1284 e[3] = 1;
1285 stack.add(e);
1286 }
1287 else {
1288 stack.add(closeList.removeFirst());
1289 };
1290 };
1291 return stack;
1292 };
1293
Nils Diewald498d5982014-03-03 20:09:22 +00001294 /**
1295 * This will retrieve character offsets for all spans.
1296 */
Nils Diewald1e5d5942014-05-20 13:29:53 +00001297 private boolean _processHighlightSpans () {
Nils Diewald498d5982014-03-03 20:09:22 +00001298
1299 if (DEBUG)
1300 log.trace("--- Process Highlight spans");
1301
Nils Diewald498d5982014-03-03 20:09:22 +00001302 // Local document ID
Nils Diewaldf399a672013-11-18 17:55:22 +00001303 int ldid = this.localDocID;
1304
Nils Diewald1e5d5942014-05-20 13:29:53 +00001305 int startPosChar = -1, endPosChar = -1;
1306
Nils Diewald498d5982014-03-03 20:09:22 +00001307 // No positionsToOffset object found
Nils Diewaldcde69082014-01-16 15:46:48 +00001308 if (this.positionsToOffset == null)
1309 return false;
1310
Nils Diewaldf399a672013-11-18 17:55:22 +00001311 // Match position
Nils Diewald3caa00d2013-12-13 02:24:04 +00001312 startPosChar = this.positionsToOffset.start(ldid, this.startPos);
Nils Diewald498d5982014-03-03 20:09:22 +00001313
Nils Diewald20607ab2014-03-20 23:28:36 +00001314 if (DEBUG)
1315 log.trace("Unaltered startPosChar is {}", startPosChar);
1316
Nils Diewaldf399a672013-11-18 17:55:22 +00001317 // Check potential differing start characters
1318 // e.g. from element spans
Nils Diewald498d5982014-03-03 20:09:22 +00001319 if (potentialStartPosChar != -1 &&
Nils Diewald1e5d5942014-05-20 13:29:53 +00001320 (startPosChar > this.potentialStartPosChar))
1321 startPosChar = this.potentialStartPosChar;
Nils Diewaldf399a672013-11-18 17:55:22 +00001322
Nils Diewald3caa00d2013-12-13 02:24:04 +00001323 endPosChar = this.positionsToOffset.end(ldid, this.endPos - 1);
Nils Diewald20607ab2014-03-20 23:28:36 +00001324
Nils Diewald498d5982014-03-03 20:09:22 +00001325 if (DEBUG)
Nils Diewald20607ab2014-03-20 23:28:36 +00001326 log.trace("Unaltered endPosChar is {}", endPosChar);
1327
1328 // Potential end characters may come from spans with
1329 // defined character offsets like sentences including .", ... etc.
1330 if (endPosChar < potentialEndPosChar)
1331 endPosChar = potentialEndPosChar;
1332
1333 if (DEBUG)
1334 log.trace("Refined: Match offset is pos {}-{} (chars {}-{})",
Nils Diewald498d5982014-03-03 20:09:22 +00001335 this.startPos,
1336 this.endPos,
1337 startPosChar,
1338 endPosChar);
Nils Diewaldcde69082014-01-16 15:46:48 +00001339
Nils Diewald1e5d5942014-05-20 13:29:53 +00001340 this.identifier = null;
Nils Diewald498d5982014-03-03 20:09:22 +00001341
1342 // No spans yet
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00001343 if (this.span == null)
1344 this.span = new LinkedList<int[]>();
1345
Nils Diewald1e5d5942014-05-20 13:29:53 +00001346 // Process offset char findings
1347 int[] intArray = this._processOffsetChars(ldid, startPosChar, endPosChar);
Nils Diewaldf399a672013-11-18 17:55:22 +00001348
Nils Diewald1e5d5942014-05-20 13:29:53 +00001349 // Recalculate startOffsetChar
1350 int startOffsetChar = startPosChar - intArray[0];
Nils Diewald20607ab2014-03-20 23:28:36 +00001351
Nils Diewald498d5982014-03-03 20:09:22 +00001352 // Add match span
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00001353 this.span.add(intArray);
Nils Diewaldf399a672013-11-18 17:55:22 +00001354
1355 // highlights
Nils Diewald3caa00d2013-12-13 02:24:04 +00001356 // -- I'm not sure about this.
Nils Diewaldf399a672013-11-18 17:55:22 +00001357 if (this.highlight != null) {
Nils Diewald20607ab2014-03-20 23:28:36 +00001358 if (DEBUG)
1359 log.trace("There are highlights!");
Nils Diewald50389b02014-04-11 16:27:52 +00001360
Nils Diewaldcde69082014-01-16 15:46:48 +00001361 for (Highlight highlight : this.highlight) {
Nils Diewald498d5982014-03-03 20:09:22 +00001362 int start = this.positionsToOffset.start(
1363 ldid, highlight.start
1364 );
1365
1366 int end = this.positionsToOffset.end(
1367 ldid,
1368 highlight.end
1369 );
Nils Diewaldf399a672013-11-18 17:55:22 +00001370
Nils Diewald498d5982014-03-03 20:09:22 +00001371 if (DEBUG)
1372 log.trace("PTO has retrieved {}-{} for class {}",
1373 start,
1374 end,
1375 highlight.number);
1376
1377 start -= startOffsetChar;
1378 end -= startOffsetChar;
1379
Nils Diewald3ef9a472013-12-02 16:06:09 +00001380 if (start < 0 || end < 0)
Nils Diewaldf399a672013-11-18 17:55:22 +00001381 continue;
1382
Nils Diewald498d5982014-03-03 20:09:22 +00001383 // Create intArray for highlight
Nils Diewald3ef9a472013-12-02 16:06:09 +00001384 intArray = new int[]{
1385 start,
1386 end,
Nils Diewaldcde69082014-01-16 15:46:48 +00001387 highlight.number,
Nils Diewald3ef9a472013-12-02 16:06:09 +00001388 0 // Dummy value for later
1389 };
1390
Nils Diewald2cd1c3d2014-01-08 22:53:08 +00001391 this.span.add(intArray);
Nils Diewaldf399a672013-11-18 17:55:22 +00001392 };
1393 };
Nils Diewaldcde69082014-01-16 15:46:48 +00001394 return true;
1395 };
1396
Nils Diewaldbfe554b2014-01-09 19:35:05 +00001397
Nils Diewald1e5d5942014-05-20 13:29:53 +00001398 // Pass the local docid to retrieve character positions for the offset
1399 private int[] _processOffsetChars (int ldid, int startPosChar, int endPosChar) {
1400
1401 int startOffsetChar = -1, endOffsetChar = -1;
1402 int startOffset = -1, endOffset = -1;
1403
1404 // The offset is defined by a span
1405 if (this.getContext().isSpanDefined()) {
1406
1407 if (DEBUG)
1408 log.trace("Try to expand to <{}>",
1409 this.context.getSpanContext());
1410
1411 this.startMore = false;
1412 this.endMore = false;
1413
1414 int [] spanContext = this.expandContextToSpan(
1415 this.positionsToOffset.getAtomicReader(),
1416 (Bits) null,
1417 "tokens",
1418 this.context.getSpanContext()
1419 );
1420 startOffset = spanContext[0];
1421 endOffset = spanContext[1];
1422 startOffsetChar = spanContext[2];
1423 endOffsetChar = spanContext[3];
1424 if (DEBUG)
1425 log.trace("Got context is based from span {}-{}/{}-{}",
1426 startOffset, endOffset, startOffsetChar, endOffsetChar);
1427 };
1428
1429 // The offset is defined by tokens or characters
1430 if (endOffset == -1) {
1431
1432 PositionsToOffset pto = this.positionsToOffset;
1433
1434 // The left offset is defined by tokens
1435 if (this.context.left.isToken()) {
1436 startOffset = this.startPos - this.context.left.getLength();
1437 if (DEBUG)
1438 log.trace("PTO will retrieve {} (Left context)", startOffset);
1439 pto.add(ldid, startOffset);
1440 }
1441
1442 // The left offset is defined by characters
1443 else {
1444 startOffsetChar = startPosChar - this.context.left.getLength();
1445 };
1446
1447 // The right context is defined by tokens
1448 if (this.context.right.isToken()) {
1449 endOffset = this.endPos + this.context.right.getLength() -1;
1450 if (DEBUG)
1451 log.trace("PTO will retrieve {} (Right context)", endOffset);
1452 pto.add(ldid, endOffset);
1453
1454 }
1455
1456 // The right context is defined by characters
1457 else {
1458 endOffsetChar = (endPosChar == -1) ? -1 :
1459 endPosChar + this.context.right.getLength();
1460 };
1461
1462 if (startOffset != -1)
1463 startOffsetChar = pto.start(ldid, startOffset);
1464
1465 if (endOffset != -1)
1466 endOffsetChar = pto.end(ldid, endOffset);
1467 };
1468
1469 if (DEBUG)
1470 log.trace("Premature found offsets at {}-{}",
1471 startOffsetChar,
1472 endOffsetChar);
1473
1474
1475 // This can happen in case of non-token characters
1476 // in the match and null offsets
1477 if (startOffsetChar > startPosChar)
1478 startOffsetChar = startPosChar;
1479 else if (startOffsetChar < 0)
1480 startOffsetChar = 0;
1481
1482 // No "..." at the beginning
1483 if (startOffsetChar == 0)
1484 this.startMore = false;
1485
1486 if (endOffsetChar != -1 && endOffsetChar < endPosChar)
1487 endOffsetChar = endPosChar;
1488
1489 if (DEBUG)
1490 log.trace("The context spans from chars {}-{}",
1491 startOffsetChar, endOffsetChar);
1492
1493 // Get snippet information from the primary data
1494 if (endOffsetChar > -1 &&
1495 (endOffsetChar < this.getPrimaryDataLength())) {
1496 this.tempSnippet = this.getPrimaryData(
1497 startOffsetChar,
1498 endOffsetChar
1499 );
1500 }
1501 else {
1502 this.tempSnippet = this.getPrimaryData(startOffsetChar);
1503 this.endMore = false;
1504 };
1505
1506 if (DEBUG)
1507 log.trace("Snippet: '" + this.tempSnippet + "'");
1508
1509 if (DEBUG)
1510 log.trace("The match entry is {}-{} ({}-{}) with absolute offsetChars {}-{}",
1511 startPosChar - startOffsetChar,
1512 endPosChar - startOffsetChar,
1513 startPosChar,
1514 endPosChar,
1515 startOffsetChar,
1516 endOffsetChar);
1517
1518 // TODO: Simplify
1519 return new int[]{
1520 startPosChar - startOffsetChar,
1521 endPosChar - startOffsetChar,
1522 -1,
1523 0};
1524 };
1525
1526
Nils Diewaldbfe554b2014-01-09 19:35:05 +00001527 // Identical to KorapResult!
1528 public String toJSON () {
1529 ObjectNode json = (ObjectNode) mapper.valueToTree(this);
1530
Nils Diewaldcde69082014-01-16 15:46:48 +00001531 // Match was no match
1532 if (json.size() == 0)
1533 return "{}";
1534
Nils Diewald54187632014-06-11 14:39:29 +00001535 if (this.context != null)
1536 json.put("context", this.getContext().toJSON());
Nils Diewaldbfe554b2014-01-09 19:35:05 +00001537
Nils Diewaldcdd465b2014-02-24 18:47:38 +00001538 if (this.version != null)
1539 json.put("version", this.getVersion());
1540
Nils Diewaldbfe554b2014-01-09 19:35:05 +00001541 try {
1542 return mapper.writeValueAsString(json);
1543 }
1544 catch (Exception e) {
1545 log.warn(e.getLocalizedMessage());
1546 };
1547
1548 return "{}";
1549 };
Nils Diewald50389b02014-04-11 16:27:52 +00001550
1551
1552 // Remove duplicate identifiers
1553 // Yeah ... I mean ... why not?
1554 private void _filterMultipleIdentifiers () {
1555 ArrayList<Integer> removeDuplicate = new ArrayList<>(10);
1556 HashSet<Integer> identifiers = new HashSet<>(20);
1557 for (int i = 0; i < this.span.size(); i++) {
1558 // span is an int array: [Start, End, Number, Dummy]
1559 int highlightNumber = this.span.get(i)[2];
1560
Nils Diewaldd216a032014-04-30 17:40:19 +00001561 // Number is an identifier
Nils Diewald50389b02014-04-11 16:27:52 +00001562 if (highlightNumber < -1) {
Nils Diewaldd216a032014-04-30 17:40:19 +00001563
1564 // Get the real identifier
Nils Diewald50389b02014-04-11 16:27:52 +00001565 int idNumber = identifierNumber.get(highlightNumber);
1566 if (identifiers.contains(idNumber)) {
1567 removeDuplicate.add(i);
1568 }
1569 else {
1570 identifiers.add(idNumber);
1571 };
1572 };
1573 };
1574
Nils Diewaldd216a032014-04-30 17:40:19 +00001575 // Order the duplicates to filter from the tail
Nils Diewald50389b02014-04-11 16:27:52 +00001576 Collections.sort(removeDuplicate);
1577 Collections.reverse(removeDuplicate);
1578
1579 // Delete all duplicate identifiers
1580 for (int delete : removeDuplicate) {
1581 this.span.remove(delete);
1582 };
1583 };
Nils Diewaldf399a672013-11-18 17:55:22 +00001584};