| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 1 | package de.ids_mannheim.korap.response; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 2 | |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 3 | import java.io.IOException; |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 4 | import java.nio.ByteBuffer; |
| Akron | 6590c32 | 2015-07-02 16:08:13 +0200 | [diff] [blame] | 5 | import java.util.*; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 6 | |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 7 | import org.apache.lucene.index.LeafReaderContext; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 8 | import org.apache.lucene.index.Term; |
| 9 | import org.apache.lucene.index.TermContext; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 10 | import org.apache.lucene.search.spans.Spans; |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 11 | import org.apache.lucene.search.spans.SpanTermQuery; |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 12 | import org.apache.lucene.util.Bits; |
| 13 | import org.apache.lucene.util.FixedBitSet; |
| 14 | import org.slf4j.Logger; |
| 15 | import org.slf4j.LoggerFactory; |
| 16 | |
| 17 | import com.fasterxml.jackson.annotation.JsonIgnore; |
| 18 | import com.fasterxml.jackson.annotation.JsonInclude; |
| 19 | import com.fasterxml.jackson.annotation.JsonInclude.Include; |
| 20 | import com.fasterxml.jackson.annotation.JsonProperty; |
| 21 | import com.fasterxml.jackson.databind.ObjectMapper; |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 22 | import com.fasterxml.jackson.databind.JsonNode; |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 23 | import com.fasterxml.jackson.databind.node.ArrayNode; |
| 24 | import com.fasterxml.jackson.databind.node.ObjectNode; |
| 25 | |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 26 | import static de.ids_mannheim.korap.util.KrillByte.*; |
| margaretha | 50c7633 | 2015-03-19 10:10:39 +0100 | [diff] [blame] | 27 | import de.ids_mannheim.korap.index.AbstractDocument; |
| 28 | import de.ids_mannheim.korap.index.PositionsToOffset; |
| 29 | import de.ids_mannheim.korap.query.SpanElementQuery; |
| 30 | import de.ids_mannheim.korap.response.match.HighlightCombinator; |
| 31 | import de.ids_mannheim.korap.response.match.HighlightCombinatorElement; |
| 32 | import de.ids_mannheim.korap.response.match.MatchIdentifier; |
| 33 | import de.ids_mannheim.korap.response.match.PosIdentifier; |
| 34 | import de.ids_mannheim.korap.response.match.Relation; |
| Nils Diewald | 8c22178 | 2013-12-13 19:52:58 +0000 | [diff] [blame] | 35 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 36 | /* |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 37 | * The snippet building algorithm is quite complicated for now |
| 38 | * and should probably be refactored. |
| 39 | * It works like this: |
| 40 | * |
| 41 | * 1. For all spans and highlights, pagebreaks etc. all necessary |
| 42 | * positions are collected (processHighlight) |
| 43 | * 2. For all collected positions the character offsets are retrieved |
| 44 | * and based on that for all spans and highlights a list |
| 45 | * is created with arrays of the spans with the structure |
| 46 | * [startchar, endchar, highlightClass] (processHighlightSpans) |
| 47 | * 2.1 The primary data and optional context information is retrieved |
| 48 | * (processOffsetChars) |
| 49 | * 3. Based on the collected spans 2 lists are created for opening and |
| 50 | * closing tags (pretty much clones of the initial span list), |
| 51 | * sorted for opening resp. closing, and processed in parallel |
| 52 | * to form an open/close stack. The new structure on the stack is |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 53 | * [startchar, endchar, highlightclass, close=0/open=1/empty=2] |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 54 | * (processHighlightStack) |
| 55 | * 3.1. If the element is a relation with an identifier, this may |
| 56 | * be removed if duplicate (filterMultipleIdentifiers) |
| 57 | * 4. Based on the stack and the primary data the snippet is created. |
| 58 | * (processHighlightSnippet) |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 59 | * 4.1. To avoid unbalanced elements, all open/close/empty tags |
| 60 | * are balanced (i.e. closed and reopened if overlaps occur). |
| 61 | * (Highlightcombinator) |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 62 | */ |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 63 | |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 64 | /* |
| 65 | * Todo: The implemented classes and private names are horrible! |
| 66 | * Refactor, future-me! |
| 67 | * |
| 68 | * The number based Highlighttype is ugly - UGLY! |
| 69 | * |
| 70 | * substrings may be out of range - e.g. if snippets are not lifted! |
| 71 | */ |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 72 | |
| 73 | /** |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 74 | * Representation of Matches in a Result. |
| Akron | 75ee2b8 | 2016-06-20 21:20:34 +0200 | [diff] [blame] | 75 | * <strong>Warning:</strong> This is currently highly dependent |
| Akron | 3e0403f | 2015-06-24 20:59:13 +0200 | [diff] [blame] | 76 | * on DeReKo data and will change in the future. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 77 | * |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 78 | * @author Nils Diewald |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 79 | * @see Result |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 80 | */ |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 81 | @JsonInclude(Include.NON_NULL) |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 82 | public class Match extends AbstractDocument { |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 83 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 84 | // Logger |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 85 | private final static Logger log = LoggerFactory.getLogger(Match.class); |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 86 | |
| Akron | 70ce0c0 | 2018-05-25 23:44:26 +0200 | [diff] [blame] | 87 | private static final int MAX_MATCH_TOKENS = 50; |
| 88 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 89 | // end marker of highlights that are pagebreaks |
| 90 | private static final int PB_MARKER = -99999; |
| 91 | |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 92 | // Textual elements that are in context |
| 93 | private static final int CONTEXT = -99998; |
| 94 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 95 | // This advices the java compiler to ignore all loggings |
| Akron | 04f0095 | 2018-03-06 18:56:54 +0100 | [diff] [blame] | 96 | public static final boolean DEBUG = false; |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 97 | |
| 98 | // Mapper for JSON serialization |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 99 | ObjectMapper mapper = new ObjectMapper(); |
| 100 | |
| 101 | // Snippet information |
| 102 | @JsonIgnore |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 103 | public SearchContext context; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 104 | |
| Nils Diewald | 7534fdf | 2014-11-27 02:28:10 +0000 | [diff] [blame] | 105 | // Public, while used wildly in tests! |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 106 | @JsonIgnore |
| Nils Diewald | 66b8b7a | 2014-06-16 17:17:46 +0000 | [diff] [blame] | 107 | public int startPos, endPos = -1; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 108 | |
| 109 | @JsonIgnore |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 110 | private int innerMatchStartPos, innerMatchEndPos = -1; |
| 111 | |
| 112 | @JsonIgnore |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 113 | public int potentialStartPosChar = -1, potentialEndPosChar = -1; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 114 | |
| Akron | 70ce0c0 | 2018-05-25 23:44:26 +0200 | [diff] [blame] | 115 | @JsonIgnore |
| 116 | public boolean cutted = false; |
| 117 | |
| Nils Diewald | cdd465b | 2014-02-24 18:47:38 +0000 | [diff] [blame] | 118 | private String version; |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 119 | |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 120 | // TEMPORARILY |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 121 | @JsonIgnore |
| 122 | public int localDocID = -1; |
| 123 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 124 | private HashMap<Integer, String> annotationNumber = new HashMap<>(16); |
| 125 | private HashMap<Integer, Relation> relationNumber = new HashMap<>(16); |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 126 | private HashMap<Integer, String> identifierNumber = new HashMap<>(16); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 127 | |
| 128 | // -1 is match highlight |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 129 | int annotationNumberCounter = 256; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 130 | int relationNumberCounter = 2048; |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 131 | int identifierNumberCounter = -2; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 132 | |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 133 | private int startPage = -1; |
| 134 | private int endPage = -1; |
| 135 | |
| Akron | 7e75097 | 2018-03-23 14:21:21 +0100 | [diff] [blame] | 136 | private String tempSnippet, |
| 137 | snippetHTML, |
| 138 | snippetBrackets, |
| 139 | identifier, |
| 140 | mirrorIdentifier; |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 141 | |
| Nils Diewald | 4679010 | 2014-09-18 16:05:42 +0000 | [diff] [blame] | 142 | private HighlightCombinator snippetArray; |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 143 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 144 | public boolean startMore = true, endMore = true; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 145 | |
| 146 | private Collection<byte[]> payload; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 147 | private ArrayList<Highlight> highlight; |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 148 | private LinkedList<int[]> span; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 149 | |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 150 | private PositionsToOffset positionsToOffset; |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 151 | private boolean processed = false; |
| 152 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 153 | |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 154 | /** |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 155 | * Constructs a new Match object. |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 156 | * Todo: Maybe that's not necessary! |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 157 | * |
| 158 | * @param pto |
| 159 | * The PositionsToOffset object, containing relevant |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 160 | * positional information for highlighting |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 161 | * @param localDocID |
| 162 | * Document ID based on the atomic reader. |
| 163 | * @param startPos |
| 164 | * Start position of the match in the document. |
| 165 | * @param endPos |
| 166 | * End position of the match in the document. |
| 167 | * |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 168 | * @see #snippetHTML() |
| 169 | * @see #snippetBrackets() |
| 170 | * @see PositionsToOffset |
| 171 | */ |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 172 | public Match (PositionsToOffset pto, int localDocID, int startPos, |
| 173 | int endPos) { |
| 174 | this.positionsToOffset = pto; |
| 175 | this.localDocID = localDocID; |
| Akron | 70ce0c0 | 2018-05-25 23:44:26 +0200 | [diff] [blame] | 176 | this.setStartPos(startPos); |
| 177 | this.setEndPos(endPos); |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 178 | }; |
| 179 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 180 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 181 | /** |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 182 | * Constructs a new Match object. |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 183 | */ |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 184 | public Match () {}; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 185 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 186 | |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 187 | /** |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 188 | * Constructs a new Match object. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 189 | * |
| 190 | * @param idString |
| 191 | * Match identifier string as provided by Result. |
| 192 | * @param includeHighlights |
| 193 | * Boolean value indicating if possible provided |
| 194 | * highlight information should be ignored or not. |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 195 | */ |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 196 | public Match (String idString, boolean includeHighlights) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 197 | MatchIdentifier id = new MatchIdentifier(idString); |
| Akron | 7e75097 | 2018-03-23 14:21:21 +0100 | [diff] [blame] | 198 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 199 | if (id.getStartPos() > -1) { |
| Akron | 7e75097 | 2018-03-23 14:21:21 +0100 | [diff] [blame] | 200 | this.mirrorIdentifier = id.toString(); |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 201 | |
| 202 | if (id.getTextSigle() != null) |
| 203 | this.setTextSigle(id.getTextSigle()); |
| 204 | |
| 205 | // <legacy> |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 206 | this.setCorpusID(id.getCorpusID()); |
| 207 | this.setDocID(id.getDocID()); |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 208 | // </legacy> |
| 209 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 210 | this.setStartPos(id.getStartPos()); |
| 211 | this.setEndPos(id.getEndPos()); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 212 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 213 | if (includeHighlights) |
| 214 | for (int[] pos : id.getPos()) { |
| 215 | if (pos[0] < id.getStartPos() || pos[1] > id.getEndPos()) |
| 216 | continue; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 217 | this.addHighlight(pos[0], pos[1], pos[2]); |
| Akron | 7e75097 | 2018-03-23 14:21:21 +0100 | [diff] [blame] | 218 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 219 | }; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 220 | }; |
| 221 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 222 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 223 | /** |
| 224 | * Private class of highlights. |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 225 | * TODO: This should probably be renamed, as it not only contains highlights |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 226 | * but also annotations, pagebreaks and relations |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 227 | */ |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 228 | private class Highlight { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 229 | public int start, end; |
| 230 | public int number = -1; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 231 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 232 | // Relational highlight |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 233 | public Highlight (int start, int end, String annotation, int refStart, int refEnd) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 234 | this.start = start; |
| 235 | this.end = end; |
| 236 | // TODO: This can overflow! |
| 237 | this.number = relationNumberCounter++; |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 238 | |
| 239 | if (DEBUG) { |
| 240 | log.trace("Add relation (2) '{}': source={}-{} >> target={}-{}", |
| 241 | annotation, start, end, refStart, refEnd); |
| 242 | }; |
| 243 | |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 244 | relationNumber.put(this.number, new Relation(annotation, refStart, refEnd)); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 245 | }; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 246 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 247 | |
| 248 | // Span highlight |
| 249 | public Highlight (int start, int end, String annotation) { |
| 250 | this.start = start; |
| 251 | this.end = end; |
| Akron | cb1093a | 2016-07-28 16:27:59 +0200 | [diff] [blame] | 252 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 253 | // TODO: This can overflow! |
| 254 | if (annotationNumberCounter < 2048) { |
| 255 | this.number = annotationNumberCounter++; |
| 256 | annotationNumber.put(this.number, annotation); |
| 257 | }; |
| 258 | }; |
| 259 | |
| 260 | |
| 261 | // Simple highlight |
| 262 | public Highlight (int start, int end, int number) { |
| 263 | this.start = start; |
| 264 | this.end = end; |
| 265 | this.number = number; |
| 266 | }; |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 267 | |
| 268 | // Pagebreak |
| 269 | public Highlight (int start, int pagenumber) { |
| 270 | this.start = start; |
| 271 | this.end = PB_MARKER; |
| 272 | this.number = pagenumber; |
| 273 | }; |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 274 | }; |
| 275 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 276 | |
| Nils Diewald | c7b6063 | 2014-09-05 19:59:01 +0000 | [diff] [blame] | 277 | // TODO: Here are offsets and highlight offsets! |
| 278 | // <> payloads have 12 bytes (iii) or 8!? |
| 279 | // highlightoffsets have 11 bytes (iis)! |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 280 | public void addPayload (List<byte[]> payload) { |
| 281 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 282 | if (DEBUG) |
| 283 | log.trace("Add payloads to match"); |
| Nils Diewald | a206b2e | 2014-11-05 17:24:47 +0000 | [diff] [blame] | 284 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 285 | // Reverse to make embedding of highlights correct |
| 286 | Collections.reverse(payload); |
| 287 | try { |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 288 | |
| 289 | ByteBuffer bb = ByteBuffer.allocate(24); |
| Nils Diewald | 67f5404 | 2014-09-27 14:53:38 +0000 | [diff] [blame] | 290 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 291 | // TODO: REVERSE ITERATOR! |
| 292 | for (byte[] b : payload) { |
| Nils Diewald | c7b6063 | 2014-09-05 19:59:01 +0000 | [diff] [blame] | 293 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 294 | if (DEBUG) |
| Akron | 6d2c469 | 2016-02-03 18:29:10 +0100 | [diff] [blame] | 295 | log.trace("Found a payload of pti {}", b[0]); |
| Nils Diewald | c7b6063 | 2014-09-05 19:59:01 +0000 | [diff] [blame] | 296 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 297 | // Todo element searches! |
| Nils Diewald | c7b6063 | 2014-09-05 19:59:01 +0000 | [diff] [blame] | 298 | |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 299 | // Highlights! This is a class PTI |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 300 | if (b[0] == 0) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 301 | bb.put(b); |
| Akron | 5f04403 | 2015-12-18 00:35:38 +0100 | [diff] [blame] | 302 | bb.position(1); // Ignore PTI |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 303 | int start = bb.getInt(); |
| 304 | int end = bb.getInt(); |
| 305 | byte number = bb.get(); |
| Nils Diewald | c7b6063 | 2014-09-05 19:59:01 +0000 | [diff] [blame] | 306 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 307 | if (DEBUG) |
| 308 | log.trace( |
| 309 | "Have a highlight of class {} in {}-{} inside of {}-{}", |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 310 | unsignedByte(number), start, end, |
| 311 | this.getStartPos(), this.getEndPos()); |
| Nils Diewald | c7b6063 | 2014-09-05 19:59:01 +0000 | [diff] [blame] | 312 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 313 | // Ignore classes out of match range and set by the system |
| Akron | 6d2c469 | 2016-02-03 18:29:10 +0100 | [diff] [blame] | 314 | // TODO: This may be decidable by PTI! |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 315 | if (unsignedByte(number) <= 128 |
| 316 | && start >= this.getStartPos() |
| Akron | 4299355 | 2016-02-04 13:24:24 +0100 | [diff] [blame] | 317 | && end <= this.getEndPos()) { |
| Akron | 63cd32f | 2016-04-21 17:56:06 +0200 | [diff] [blame] | 318 | |
| 319 | if (DEBUG) { |
| Akron | 430703a | 2017-11-16 18:32:54 +0100 | [diff] [blame] | 320 | log.trace("Add highlight with class/relationnr {}!", |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 321 | unsignedByte(number)); |
| Akron | 63cd32f | 2016-04-21 17:56:06 +0200 | [diff] [blame] | 322 | }; |
| 323 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 324 | this.addHighlight(start, end - 1, number); |
| Akron | 6d2c469 | 2016-02-03 18:29:10 +0100 | [diff] [blame] | 325 | } |
| 326 | else if (DEBUG) { |
| Akron | 6759b04 | 2016-04-28 01:25:00 +0200 | [diff] [blame] | 327 | log.trace("Don't add highlight of class {}!", |
| 328 | unsignedByte(number)); |
| Akron | 6d2c469 | 2016-02-03 18:29:10 +0100 | [diff] [blame] | 329 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 330 | } |
| Nils Diewald | c7b6063 | 2014-09-05 19:59:01 +0000 | [diff] [blame] | 331 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 332 | // Element payload for match! |
| 333 | // This MAY BE the correct match |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 334 | else if (b[0] == (byte) 64) { |
| 335 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 336 | bb.put(b); |
| Akron | 6d2c469 | 2016-02-03 18:29:10 +0100 | [diff] [blame] | 337 | bb.position(1); // Ignore pti |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 338 | |
| Akron | 6d2c469 | 2016-02-03 18:29:10 +0100 | [diff] [blame] | 339 | // Wasn't set before |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 340 | if (this.potentialStartPosChar == -1) { |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 341 | this.potentialStartPosChar = bb.getInt(1); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 342 | } |
| 343 | else { |
| 344 | if (bb.getInt(0) < this.potentialStartPosChar) |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 345 | this.potentialStartPosChar = bb.getInt(1); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 346 | }; |
| Nils Diewald | c7b6063 | 2014-09-05 19:59:01 +0000 | [diff] [blame] | 347 | |
| Akron | 70ce0c0 | 2018-05-25 23:44:26 +0200 | [diff] [blame] | 348 | if (bb.getInt(4) > this.potentialEndPosChar && !this.cutted) |
| Akron | 6cc7b7b | 2016-01-14 21:39:18 +0100 | [diff] [blame] | 349 | this.potentialEndPosChar = bb.getInt(5); |
| Nils Diewald | c7b6063 | 2014-09-05 19:59:01 +0000 | [diff] [blame] | 350 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 351 | if (DEBUG) |
| 352 | log.trace("Element payload from {} to {}", |
| 353 | this.potentialStartPosChar, |
| 354 | this.potentialEndPosChar); |
| 355 | }; |
| 356 | |
| 357 | // Clear bytebuffer |
| 358 | bb.clear(); |
| 359 | }; |
| 360 | } |
| 361 | |
| 362 | catch (Exception e) { |
| 363 | log.error(e.getMessage()); |
| 364 | } |
| Nils Diewald | c7b6063 | 2014-09-05 19:59:01 +0000 | [diff] [blame] | 365 | }; |
| 366 | |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 367 | |
| 368 | /** |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 369 | * Insert a highlight for the snippet view by means of positional |
| 370 | * offsets and an optional class number. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 371 | * |
| 372 | * @param start |
| 373 | * Integer value of a span's positional start offset. |
| 374 | * @param end |
| 375 | * Integer value of a span's positional end offset. |
| 376 | * @param number |
| 377 | * Optional class number of the highlight. |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 378 | */ |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 379 | public void addHighlight (int start, int end) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 380 | this.addHighlight(new Highlight(start, end, (int) 0)); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 381 | }; |
| 382 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 383 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 384 | public void addHighlight (int start, int end, byte number) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 385 | this.addHighlight(new Highlight(start, end, (int) number)); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 386 | }; |
| 387 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 388 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 389 | public void addHighlight (int start, int end, short number) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 390 | this.addHighlight(new Highlight(start, end, (int) number)); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 391 | }; |
| 392 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 393 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 394 | public void addHighlight (int start, int end, int number) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 395 | this.addHighlight(new Highlight(start, end, number)); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 396 | }; |
| 397 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 398 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 399 | /** |
| 400 | * Insert a highlight for the snippet view. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 401 | * |
| 402 | * @param hl |
| 403 | * A highlight object to add to the match. |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 404 | */ |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 405 | public void addHighlight (Highlight hl) { |
| 406 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 407 | if (this.highlight == null) |
| 408 | this.highlight = new ArrayList<Highlight>(16); |
| Nils Diewald | 82a4b86 | 2014-02-20 21:17:41 +0000 | [diff] [blame] | 409 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 410 | if (DEBUG) |
| 411 | log.trace("Add highlight from pos {}-{} of class {}", hl.start, |
| 412 | hl.end, hl.number); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 413 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 414 | // Reset the fetched match data |
| 415 | this._reset(); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 416 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 417 | this.highlight.add(hl); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 418 | }; |
| 419 | |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 420 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 421 | /** |
| 422 | * Insert a textual annotation for the snippet view by |
| 423 | * means of positional offsets and an annotation string. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 424 | * |
| 425 | * @param start |
| 426 | * Integer value of a span's positional start offset. |
| 427 | * @param end |
| 428 | * Integer value of a span's positional end offset. |
| 429 | * @param annotation |
| 430 | * Annotation string. |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 431 | */ |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 432 | public void addAnnotation (int start, int end, String annotation) { |
| Akron | 9ebdfab | 2018-02-19 16:38:17 +0100 | [diff] [blame] | 433 | |
| 434 | if (DEBUG && start > end) |
| 435 | log.warn("Annotation span is negative: {}, {} for {}", start, end, annotation); |
| 436 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 437 | this.addHighlight(new Highlight(start, end, annotation)); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 438 | }; |
| 439 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 440 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 441 | /** |
| 442 | * Insert an annotated relation for the snippet view by |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 443 | * means of relational participant positions and an annotation |
| 444 | * string. |
| 445 | * |
| 446 | * @param src |
| 447 | * Integer value of a span's positional source object. |
| 448 | * @param target |
| 449 | * Integer value of a span's positional target object. |
| 450 | * @param annotation |
| 451 | * Annotation string. |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 452 | */ |
| Akron | fae2c68 | 2017-09-18 18:47:49 +0200 | [diff] [blame] | 453 | public void addRelation (int srcStart, |
| 454 | int srcEnd, |
| 455 | int targetStart, |
| 456 | int targetEnd, |
| 457 | String annotation) { |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 458 | |
| 459 | if (DEBUG) |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 460 | log.trace("Add relation (1) '{}': source={}-{} >> target={}-{}", |
| Akron | a82cee2 | 2017-09-18 14:52:12 +0200 | [diff] [blame] | 461 | annotation, srcStart, srcEnd, targetStart, targetEnd); |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 462 | |
| Akron | 22d119d | 2017-11-15 16:53:02 +0100 | [diff] [blame] | 463 | // Add source token |
| Akron | 430703a | 2017-11-16 18:32:54 +0100 | [diff] [blame] | 464 | if (srcEnd == -1) { // || srcStart == srcEnd) { |
| 465 | this.addHighlight( |
| 466 | new Highlight(srcStart, srcStart, annotation, targetStart, targetEnd) |
| 467 | ); |
| Akron | fae2c68 | 2017-09-18 18:47:49 +0200 | [diff] [blame] | 468 | } |
| Akron | 22d119d | 2017-11-15 16:53:02 +0100 | [diff] [blame] | 469 | // Add source span |
| Akron | fae2c68 | 2017-09-18 18:47:49 +0200 | [diff] [blame] | 470 | else { |
| Akron | 430703a | 2017-11-16 18:32:54 +0100 | [diff] [blame] | 471 | this.addHighlight( |
| 472 | new Highlight(srcStart, srcEnd, annotation, targetStart, targetEnd) |
| 473 | ); |
| Akron | fae2c68 | 2017-09-18 18:47:49 +0200 | [diff] [blame] | 474 | }; |
| 475 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 476 | int id = identifierNumberCounter--; |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 477 | |
| 478 | // Here is probably the problem: the identifier-number |
| 479 | // needs to incorporate targetEnd as well |
| Akron | fae2c68 | 2017-09-18 18:47:49 +0200 | [diff] [blame] | 480 | |
| Akron | 22d119d | 2017-11-15 16:53:02 +0100 | [diff] [blame] | 481 | // Add target token |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 482 | // (The last part was previously commented |
| 483 | // out for unknown reason) |
| 484 | if (targetEnd == -1 || targetStart == targetEnd) { |
| Akron | fae2c68 | 2017-09-18 18:47:49 +0200 | [diff] [blame] | 485 | this.addHighlight(new Highlight(targetStart, targetStart, id)); |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 486 | |
| 487 | identifierNumber.put(id, String.valueOf(targetStart)); |
| Akron | fae2c68 | 2017-09-18 18:47:49 +0200 | [diff] [blame] | 488 | } |
| Akron | 22d119d | 2017-11-15 16:53:02 +0100 | [diff] [blame] | 489 | |
| 490 | // Add target span |
| Akron | fae2c68 | 2017-09-18 18:47:49 +0200 | [diff] [blame] | 491 | else { |
| 492 | this.addHighlight(new Highlight(targetStart, targetEnd, id)); |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 493 | identifierNumber.put(id, targetStart + "-" + targetEnd); |
| 494 | |
| Akron | fae2c68 | 2017-09-18 18:47:49 +0200 | [diff] [blame] | 495 | }; |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 496 | }; |
| 497 | |
| Akron | 4055017 | 2015-08-04 03:06:12 +0200 | [diff] [blame] | 498 | |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 499 | public void addPagebreak (int start, int pagenumber) { |
| 500 | this.addHighlight(new Highlight(start, pagenumber)); |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 501 | }; |
| 502 | |
| Akron | 6590c32 | 2015-07-02 16:08:13 +0200 | [diff] [blame] | 503 | /** |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 504 | * Get document id. |
| 505 | */ |
| Nils Diewald | 010c10f | 2013-12-17 01:58:31 +0000 | [diff] [blame] | 506 | @JsonProperty("docID") |
| 507 | public String getDocID () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 508 | return super.getID(); |
| Nils Diewald | 010c10f | 2013-12-17 01:58:31 +0000 | [diff] [blame] | 509 | }; |
| 510 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 511 | |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 512 | /** |
| 513 | * Get start page. |
| 514 | */ |
| 515 | @JsonIgnore |
| 516 | public int getStartPage () { |
| 517 | return this.startPage; |
| 518 | }; |
| 519 | |
| 520 | |
| 521 | /** |
| 522 | * Get end page. |
| 523 | */ |
| 524 | @JsonIgnore |
| 525 | public int getEndPage () { |
| 526 | return this.endPage; |
| 527 | }; |
| 528 | |
| 529 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 530 | /** |
| 531 | * Set document id. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 532 | * |
| 533 | * @param id |
| 534 | * String representation of document ID. |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 535 | */ |
| Nils Diewald | 364eb64 | 2013-12-22 15:03:01 +0000 | [diff] [blame] | 536 | public void setDocID (String id) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 537 | super.setID(id); |
| Nils Diewald | 364eb64 | 2013-12-22 15:03:01 +0000 | [diff] [blame] | 538 | }; |
| 539 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 540 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 541 | /** |
| 542 | * Get the positional start offset of the match. |
| 543 | */ |
| Nils Diewald | cdd465b | 2014-02-24 18:47:38 +0000 | [diff] [blame] | 544 | @JsonIgnore |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 545 | public int getStartPos () { |
| 546 | return this.startPos; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 547 | }; |
| 548 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 549 | |
| 550 | /** |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 551 | * Get the positional start offset of the class. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 552 | * |
| 553 | * @param number |
| 554 | * Class number of the highlight. |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 555 | */ |
| 556 | @JsonIgnore |
| 557 | public int getStartPos (int number) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 558 | if (number > 256 || this.highlight == null) |
| 559 | return -1; |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 560 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 561 | // Iterate over highlights to find matching class |
| 562 | for (Highlight h : this.highlight) { |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 563 | if (h.number == number && h.end != PB_MARKER) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 564 | return h.start; |
| 565 | }; |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 566 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 567 | return -1; |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 568 | }; |
| 569 | |
| 570 | |
| 571 | /** |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 572 | * Set the positional start offset of the match. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 573 | * |
| 574 | * @param pos |
| 575 | * The positional offset. |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 576 | */ |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 577 | @JsonIgnore |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 578 | public void setStartPos (int pos) { |
| 579 | this.startPos = pos; |
| Akron | 70ce0c0 | 2018-05-25 23:44:26 +0200 | [diff] [blame] | 580 | if (this.endPos != -1 && (this.endPos - pos) > MAX_MATCH_TOKENS) { |
| 581 | this.endPos = pos + MAX_MATCH_TOKENS; |
| 582 | this.cutted = true; |
| 583 | }; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 584 | }; |
| 585 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 586 | |
| 587 | /** |
| 588 | * Get the positional end offset of the match. |
| 589 | */ |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 590 | @JsonIgnore |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 591 | public int getEndPos () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 592 | return this.endPos; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 593 | }; |
| 594 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 595 | |
| 596 | /** |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 597 | * Get the positional end offset of the class. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 598 | * |
| 599 | * @param number |
| 600 | * Class number of the highlight. |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 601 | */ |
| 602 | @JsonIgnore |
| 603 | public int getEndPos (int number) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 604 | if (number > 256 || this.highlight == null) |
| 605 | return -1; |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 606 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 607 | // Iterate over highlights to find matching class |
| 608 | for (Highlight h : this.highlight) { |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 609 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 610 | // Get the number (incremented by 1) |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 611 | if (h.number == number && h.end != PB_MARKER) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 612 | return h.end + 1; |
| 613 | }; |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 614 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 615 | return -1; |
| Nils Diewald | 99d7f8a | 2014-09-17 14:49:42 +0000 | [diff] [blame] | 616 | }; |
| 617 | |
| 618 | |
| 619 | /** |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 620 | * Set the positional end offset of the match. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 621 | * |
| 622 | * @param pos |
| 623 | * The positional offset. |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 624 | */ |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 625 | @JsonIgnore |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 626 | public void setEndPos (int pos) { |
| Akron | 70ce0c0 | 2018-05-25 23:44:26 +0200 | [diff] [blame] | 627 | if (this.startPos != -1 && (pos - this.startPos) > MAX_MATCH_TOKENS) { |
| 628 | pos = this.startPos + MAX_MATCH_TOKENS; |
| 629 | this.cutted = true; |
| 630 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 631 | this.endPos = pos; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 632 | }; |
| 633 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 634 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 635 | /** |
| 636 | * Get the local (i.e. Lucene given) ID of the document. |
| 637 | */ |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 638 | @JsonIgnore |
| 639 | public int getLocalDocID () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 640 | return this.localDocID; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 641 | }; |
| 642 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 643 | |
| 644 | /** |
| 645 | * Set the local (i.e. Lucene given) ID of the document. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 646 | * |
| 647 | * @param id |
| 648 | * The id of the document. |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 649 | */ |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 650 | @JsonIgnore |
| 651 | public void setLocalDocID (int id) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 652 | this.localDocID = id; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 653 | }; |
| 654 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 655 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 656 | /** |
| 657 | * Get the PositionsToOffset object. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 658 | * |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 659 | * @see PositionsToOffset |
| 660 | */ |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 661 | @JsonIgnore |
| 662 | public PositionsToOffset getPositionsToOffset () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 663 | return this.positionsToOffset; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 664 | }; |
| 665 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 666 | |
| 667 | /** |
| 668 | * Set the PositionsToOffset object. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 669 | * |
| 670 | * @param pto |
| 671 | * The PositionsToOffset object |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 672 | * @see PositionsToOffset |
| 673 | */ |
| 674 | @JsonIgnore |
| 675 | public void setPositionsToOffset (PositionsToOffset pto) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 676 | this.positionsToOffset = pto; |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 677 | }; |
| 678 | |
| 679 | |
| 680 | /** |
| 681 | * Get match ID (for later retrieval). |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 682 | * |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 683 | * @see MatchIdentifier |
| 684 | */ |
| Nils Diewald | 010c10f | 2013-12-17 01:58:31 +0000 | [diff] [blame] | 685 | @Override |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 686 | @JsonProperty("matchID") |
| Nils Diewald | 010c10f | 2013-12-17 01:58:31 +0000 | [diff] [blame] | 687 | public String getID () { |
| Akron | 7e75097 | 2018-03-23 14:21:21 +0100 | [diff] [blame] | 688 | |
| 689 | // Return identifier as given |
| 690 | if (this.mirrorIdentifier != null) { |
| 691 | return this.mirrorIdentifier; |
| 692 | }; |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 693 | |
| Akron | 7e75097 | 2018-03-23 14:21:21 +0100 | [diff] [blame] | 694 | // Identifier already created |
| 695 | if (this.identifier != null) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 696 | return this.identifier; |
| Akron | 7e75097 | 2018-03-23 14:21:21 +0100 | [diff] [blame] | 697 | }; |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 698 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 699 | // No, nada, nix |
| 700 | if (this.localDocID == -1) |
| 701 | return null; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 702 | |
| Akron | 8f6f7a3 | 2015-06-25 01:03:15 +0200 | [diff] [blame] | 703 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 704 | MatchIdentifier id = this.getMatchIdentifier(); |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 705 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 706 | // Get prefix string corpus/doc |
| Akron | 8f6f7a3 | 2015-06-25 01:03:15 +0200 | [diff] [blame] | 707 | if (this.getTextSigle() != null) { |
| Akron | 640458c | 2015-06-25 12:36:15 +0200 | [diff] [blame] | 708 | id.setTextSigle(this.getTextSigle()); |
| Akron | 8f6f7a3 | 2015-06-25 01:03:15 +0200 | [diff] [blame] | 709 | } |
| 710 | // LEGACY |
| 711 | else { |
| 712 | id.setCorpusID(this.getCorpusID()); |
| 713 | id.setDocID(this.getDocID()); |
| 714 | }; |
| Nils Diewald | 6aa929e | 2014-09-17 13:30:34 +0000 | [diff] [blame] | 715 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 716 | return (this.identifier = id.toString()); |
| Nils Diewald | 6aa929e | 2014-09-17 13:30:34 +0000 | [diff] [blame] | 717 | }; |
| 718 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 719 | |
| Nils Diewald | 6aa929e | 2014-09-17 13:30:34 +0000 | [diff] [blame] | 720 | @JsonIgnore |
| 721 | public MatchIdentifier getMatchIdentifier () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 722 | MatchIdentifier id = new MatchIdentifier(); |
| Nils Diewald | 6aa929e | 2014-09-17 13:30:34 +0000 | [diff] [blame] | 723 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 724 | id.setStartPos(startPos); |
| 725 | id.setEndPos(endPos); |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 726 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 727 | // There are highlights to integrate |
| 728 | if (this.highlight != null) { |
| 729 | for (Highlight h : this.highlight) { |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 730 | if (h.number >= 256 || h.end == PB_MARKER) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 731 | continue; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 732 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 733 | // Add highlight to the snippet |
| 734 | id.addPos(h.start, h.end, h.number); |
| 735 | }; |
| 736 | }; |
| Nils Diewald | a115a33 | 2014-01-07 13:59:09 +0000 | [diff] [blame] | 737 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 738 | return id; |
| Nils Diewald | 010c10f | 2013-12-17 01:58:31 +0000 | [diff] [blame] | 739 | }; |
| 740 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 741 | /** |
| 742 | * Get identifier for a specific position. |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 743 | * |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 744 | * @param int |
| 745 | * Position to get identifier on. |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 746 | */ |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 747 | @JsonIgnore |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 748 | public String getPosID (int pos) { |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 749 | return this.getPosID(pos, -1); |
| 750 | }; |
| 751 | |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 752 | |
| 753 | /** |
| 754 | * Get identifier for a specific position. |
| 755 | * |
| 756 | * @param String |
| 757 | * Start and optional end position to get |
| 758 | * identifier on, separated by a dash. |
| 759 | */ |
| 760 | @JsonIgnore |
| 761 | public String getPosID (String pos) { |
| 762 | |
| 763 | String[] startEnd = pos.split("-"); |
| 764 | if (startEnd.length == 2) { |
| 765 | return this.getPosID( |
| 766 | Integer.parseInt(startEnd[0]), |
| 767 | Integer.parseInt(startEnd[1]) |
| 768 | ); |
| 769 | } |
| 770 | return this.getPosID(Integer.parseInt(startEnd[0]), -1); |
| 771 | }; |
| 772 | |
| 773 | |
| 774 | |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 775 | /** |
| 776 | * Get identifier for a specific position. |
| 777 | * |
| 778 | * @param int |
| 779 | * Start position to get identifier on. |
| 780 | * @param int |
| 781 | * End position to get identifier on. |
| 782 | */ |
| 783 | @JsonIgnore |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 784 | public String getPosID (int start, int end) { |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 785 | |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 786 | if (DEBUG) |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 787 | log.trace("Retrieve identifier for position {}-{}", start, end); |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 788 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 789 | // Identifier already given |
| 790 | if (this.identifier != null) |
| 791 | return this.identifier; |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 792 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 793 | // Nothing here |
| 794 | if (this.localDocID == -1) |
| 795 | return null; |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 796 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 797 | PosIdentifier id = new PosIdentifier(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 798 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 799 | // Get prefix string corpus/doc |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 800 | // <legacy> |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 801 | id.setCorpusID(this.getCorpusID()); |
| 802 | id.setDocID(this.getDocID()); |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 803 | // </legacy> |
| 804 | id.setTextSigle(this.getTextSigle()); |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 805 | id.setStart(start); |
| 806 | id.setEnd(end); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 807 | |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 808 | if (DEBUG) |
| 809 | log.trace( |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 810 | "Identifier is {} in {} ({}-{}) {}", |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 811 | id.toString(), |
| 812 | this.getTextSigle(), |
| 813 | this.getCorpusID(), |
| 814 | this.getDocID(), |
| Akron | 652e436 | 2017-09-18 20:14:44 +0200 | [diff] [blame] | 815 | start |
| Akron | 4792969 | 2017-09-12 14:41:26 +0200 | [diff] [blame] | 816 | ); |
| 817 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 818 | return id.toString(); |
| Nils Diewald | 345bdc0 | 2014-01-21 21:48:57 +0000 | [diff] [blame] | 819 | }; |
| 820 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 821 | |
| Nils Diewald | 392bcf3 | 2015-02-26 20:01:17 +0000 | [diff] [blame] | 822 | public Match setContext (SearchContext context) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 823 | this.context = context; |
| 824 | return this; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 825 | }; |
| 826 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 827 | |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 828 | @JsonIgnore |
| 829 | public SearchContext getContext () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 830 | if (this.context == null) |
| 831 | this.context = new SearchContext(); |
| 832 | return this.context; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 833 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 834 | |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 835 | |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 836 | |
| 837 | // Retrieve pagebreaks in a certain area |
| 838 | public List<int[]> retrievePagebreaks (String pb) { |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 839 | if (this.positionsToOffset != null) { |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 840 | return this.retrievePagebreaks( |
| 841 | this.positionsToOffset.getLeafReader(), |
| 842 | (Bits) null, |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 843 | "tokens", |
| 844 | pb |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 845 | ); |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 846 | }; |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 847 | |
| 848 | return null; |
| 849 | }; |
| 850 | |
| 851 | // Retrieve pagebreaks in a certain area |
| 852 | // THIS IS NOT VERY CLEVER - MAKE IT MORE CLEVER! |
| 853 | public List<int[]> retrievePagebreaks (LeafReaderContext atomic, |
| 854 | Bits bitset, |
| 855 | String field, |
| 856 | String pb) { |
| 857 | |
| 858 | // List of relevant pagebreaks |
| 859 | List<int[]> pagebreaks = new ArrayList<>(24); |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 860 | |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 861 | int charOffset = 0, pagenumber = 0, start = 0; |
| 862 | |
| 863 | if (DEBUG) |
| 864 | log.debug("Retrieve pagebreaks between {}-{}", |
| 865 | this.getStartPos(), |
| 866 | this.getEndPos()); |
| 867 | |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 868 | try { |
| 869 | |
| 870 | // Store character offsets in ByteBuffer |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 871 | ByteBuffer bb = ByteBuffer.allocate(16); |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 872 | |
| 873 | // Store last relevant pagebreak in byte array |
| 874 | byte[] b = null; |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 875 | |
| 876 | SpanTermQuery stq = new SpanTermQuery(new Term(field, pb)); |
| 877 | |
| 878 | if (DEBUG) |
| 879 | log.trace("Check pagebreaks with {}", stq.toString()); |
| 880 | |
| 881 | Spans pagebreakSpans = stq.getSpans( |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 882 | atomic, bitset, new HashMap<Term, TermContext>() |
| 883 | ); |
| 884 | |
| 885 | // Iterate over all pagebreaks |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 886 | while (pagebreakSpans.next() == true) { |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 887 | |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 888 | if (DEBUG) { |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 889 | log.debug("There is a pagebreak at {}/{}", |
| 890 | pagebreakSpans.doc(), |
| 891 | pagebreakSpans.start()); |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 892 | }; |
| 893 | |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 894 | // Current pagebreak is not in the correct document |
| 895 | if (pagebreakSpans.doc() != this.localDocID) { |
| 896 | pagebreakSpans.skipTo(this.localDocID); |
| 897 | |
| 898 | // No pagebreaks in this document |
| 899 | if (pagebreakSpans.doc() != this.localDocID) |
| 900 | break; |
| 901 | }; |
| 902 | |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 903 | if (DEBUG) |
| 904 | log.debug("The pagebreak occurs in the document"); |
| 905 | |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 906 | // There is a pagebreak found - check, |
| 907 | // if it is in the correct area |
| 908 | if (pagebreakSpans.start() <= this.getStartPos()) { |
| 909 | |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 910 | if (DEBUG) |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 911 | log.debug("PB start position is before match at {}", |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 912 | pagebreakSpans.start()); |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 913 | |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 914 | // Only the first payload is relevant |
| 915 | b = pagebreakSpans.getPayload().iterator().next(); |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 916 | start = pagebreakSpans.start(); |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 917 | } |
| 918 | |
| 919 | // This is the first pagebreak! |
| 920 | else { |
| 921 | |
| 922 | // b is already defined! |
| 923 | if (b != null) { |
| 924 | bb.rewind(); |
| 925 | bb.put(b); |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 926 | bb.rewind(); |
| 927 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 928 | pagenumber = bb.getInt(); |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 929 | charOffset = bb.getInt(); |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 930 | |
| 931 | if (DEBUG) |
| 932 | log.debug("Add pagebreak to list: {}-{}", charOffset, pagenumber); |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 933 | |
| 934 | // This is the first pagebreak! |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 935 | pagebreaks.add(new int[]{charOffset, pagenumber}); |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 936 | if (start >= this.getStartPos()) { |
| 937 | |
| 938 | if (DEBUG) |
| 939 | log.debug("Add pagebreak to rendering: {}-{}", |
| 940 | charOffset, |
| 941 | pagenumber); |
| 942 | this.addPagebreak(charOffset, pagenumber); |
| 943 | }; |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 944 | } |
| 945 | |
| 946 | // b wasn't used yet |
| Akron | d8f8861 | 2017-02-15 19:26:54 +0100 | [diff] [blame] | 947 | if (pagebreakSpans.start() <= this.getEndPos()) { |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 948 | |
| 949 | // Set new pagebreak |
| 950 | // Only the first payload is relevant |
| 951 | b = pagebreakSpans.getPayload().iterator().next(); |
| 952 | bb.rewind(); |
| 953 | bb.put(b); |
| 954 | bb.rewind(); |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 955 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 956 | pagenumber = bb.getInt(); |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 957 | charOffset = bb.getInt(); |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 958 | |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 959 | // This is the first pagebreak! |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 960 | pagebreaks.add(new int[]{charOffset, pagenumber}); |
| 961 | this.addPagebreak(charOffset,pagenumber); |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 962 | } |
| 963 | |
| 964 | // Pagebreak beyond the current position |
| 965 | else { |
| 966 | break; |
| 967 | }; |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 968 | |
| 969 | // Reset byte |
| 970 | b = null; |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 971 | }; |
| 972 | }; |
| 973 | } |
| 974 | catch (Exception e) { |
| 975 | log.warn("Some problems with ByteBuffer: {}", e.getMessage()); |
| 976 | }; |
| 977 | |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 978 | if (pagebreaks.size() > 0) { |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 979 | this.startPage = pagebreaks.get(0)[1]; |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 980 | if (pagebreaks.size() > 1 && pagebreaks.get(pagebreaks.size()-1) != null) |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 981 | this.endPage = pagebreaks.get(pagebreaks.size()-1)[1]; |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 982 | } |
| 983 | |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 984 | return pagebreaks; |
| 985 | }; |
| 986 | |
| 987 | |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 988 | // Expand the context to a span |
| 989 | public int[] expandContextToSpan (String element) { |
| 990 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 991 | // TODO: THE BITS HAVE TO BE SET! |
| 992 | |
| 993 | if (this.positionsToOffset != null) |
| 994 | return this.expandContextToSpan( |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 995 | this.positionsToOffset.getLeafReader(), (Bits) null, |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 996 | "tokens", element); |
| 997 | return new int[] { 0, 0, 0, 0 }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 998 | }; |
| 999 | |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 1000 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1001 | |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1002 | // Expand the context to a span |
| Nils Diewald | 8493437 | 2014-05-20 13:48:18 +0000 | [diff] [blame] | 1003 | // THIS IS NOT VERY CLEVER - MAKE IT MORE CLEVER! |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 1004 | public int[] expandContextToSpan (LeafReaderContext atomic, Bits bitset, |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1005 | String field, String element) { |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1006 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1007 | try { |
| 1008 | // Store character offsets in ByteBuffer |
| Akron | 499c94c | 2016-02-04 13:13:43 +0100 | [diff] [blame] | 1009 | ByteBuffer bb = ByteBuffer.allocate(24); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1010 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1011 | SpanElementQuery cquery = new SpanElementQuery(field, element); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1012 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1013 | Spans contextSpans = cquery.getSpans(atomic, bitset, |
| 1014 | new HashMap<Term, TermContext>()); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1015 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1016 | int newStart = -1, newEnd = -1; |
| 1017 | int newStartChar = -1, newEndChar = -1; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1018 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1019 | if (DEBUG) |
| Akron | 4299355 | 2016-02-04 13:24:24 +0100 | [diff] [blame] | 1020 | log.trace( |
| 1021 | "Extend match to context boundary with {} in docID {}", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1022 | cquery.toString(), this.localDocID); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1023 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1024 | while (true) { |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1025 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1026 | // Game over |
| 1027 | if (contextSpans.next() != true) |
| 1028 | break; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1029 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1030 | if (contextSpans.doc() != this.localDocID) { |
| 1031 | contextSpans.skipTo(this.localDocID); |
| 1032 | if (contextSpans.doc() != this.localDocID) |
| 1033 | break; |
| 1034 | }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1035 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1036 | // There's a <context> found -- I'm curious, |
| 1037 | // if it's closer to the match than everything before |
| 1038 | if (contextSpans.start() <= this.getStartPos() |
| 1039 | && contextSpans.end() >= this.getStartPos()) { |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1040 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1041 | // Set as newStart |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 1042 | newStart = contextSpans.start() > newStart |
| 1043 | ? contextSpans.start() : newStart; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1044 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1045 | if (DEBUG) |
| 1046 | log.trace("NewStart is at {}", newStart); |
| Nils Diewald | 8493437 | 2014-05-20 13:48:18 +0000 | [diff] [blame] | 1047 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1048 | // Get character offset (start) |
| 1049 | if (contextSpans.isPayloadAvailable()) { |
| 1050 | try { |
| 1051 | bb.rewind(); |
| 1052 | for (byte[] b : contextSpans.getPayload()) { |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1053 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1054 | // Not an element span |
| Akron | 499c94c | 2016-02-04 13:13:43 +0100 | [diff] [blame] | 1055 | if (b[0] != (byte) 64) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1056 | continue; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1057 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1058 | bb.rewind(); |
| Akron | 499c94c | 2016-02-04 13:13:43 +0100 | [diff] [blame] | 1059 | bb.put(b); |
| 1060 | bb.position(1); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1061 | newStartChar = bb.getInt(); |
| 1062 | newEndChar = bb.getInt(); |
| 1063 | break; |
| 1064 | }; |
| 1065 | } |
| 1066 | catch (Exception e) { |
| Akron | f815b59 | 2017-02-09 16:54:59 +0100 | [diff] [blame] | 1067 | log.warn("Some problems with ByteBuffer: {}", |
| 1068 | e.getMessage()); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1069 | }; |
| 1070 | }; |
| 1071 | } |
| 1072 | else { |
| 1073 | // Has to be resettet to avoid multiple readings of the payload |
| 1074 | newEndChar = 0; |
| 1075 | }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1076 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1077 | // There's an s found, that ends after the match |
| 1078 | if (contextSpans.end() >= this.getEndPos()) { |
| 1079 | newEnd = contextSpans.end(); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1080 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1081 | // Get character offset (end) |
| 1082 | if (newEndChar == 0 && contextSpans.isPayloadAvailable()) { |
| 1083 | try { |
| 1084 | bb.rewind(); |
| 1085 | for (byte[] b : contextSpans.getPayload()) { |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1086 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1087 | // Not an element span |
| Akron | 499c94c | 2016-02-04 13:13:43 +0100 | [diff] [blame] | 1088 | if (b[0] != (byte) 64) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1089 | continue; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1090 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1091 | bb.rewind(); |
| Akron | 499c94c | 2016-02-04 13:13:43 +0100 | [diff] [blame] | 1092 | bb.put(b); |
| 1093 | bb.position(1); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1094 | newEndChar = bb.getInt(1); |
| 1095 | break; |
| 1096 | }; |
| 1097 | } |
| 1098 | catch (Exception e) { |
| 1099 | log.warn(e.getMessage()); |
| 1100 | }; |
| 1101 | }; |
| 1102 | break; |
| 1103 | }; |
| 1104 | }; |
| 1105 | |
| 1106 | // We have a new match surrounding |
| 1107 | if (DEBUG) |
| 1108 | log.trace("New match spans from {}-{}/{}-{}", newStart, newEnd, |
| 1109 | newStartChar, newEndChar); |
| 1110 | |
| 1111 | return new int[] { newStart, newEnd, newStartChar, newEndChar }; |
| 1112 | } |
| 1113 | catch (IOException e) { |
| 1114 | log.error(e.getMessage()); |
| 1115 | }; |
| 1116 | |
| 1117 | return new int[] { -1, -1, -1, -1 }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1118 | }; |
| 1119 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1120 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 1121 | // Reset all internal data |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 1122 | private void _reset () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1123 | this.processed = false; |
| 1124 | this.snippetHTML = null; |
| 1125 | this.snippetBrackets = null; |
| Akron | 7e75097 | 2018-03-23 14:21:21 +0100 | [diff] [blame] | 1126 | this.identifier = null; |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 1127 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1128 | // Delete all spans |
| 1129 | if (this.span != null) |
| 1130 | this.span.clear(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1131 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1132 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1133 | |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 1134 | // Start building highlighted snippets |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1135 | private boolean _processHighlight () { |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1136 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1137 | if (processed) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1138 | return true; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1139 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1140 | // Relevant details are missing |
| 1141 | if (this.positionsToOffset == null || this.localDocID == -1) { |
| 1142 | log.warn("You have to define " |
| Akron | 4299355 | 2016-02-04 13:24:24 +0100 | [diff] [blame] | 1143 | + "positionsToOffset and localDocID first before"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1144 | return false; |
| 1145 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1146 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1147 | if (DEBUG) |
| 1148 | log.trace("--- Start highlight processing ..."); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1149 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1150 | // Get pto object |
| 1151 | PositionsToOffset pto = this.positionsToOffset; |
| 1152 | pto.add(this.localDocID, this.getStartPos()); |
| 1153 | pto.add(this.localDocID, this.getEndPos() - 1); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1154 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1155 | if (DEBUG) |
| 1156 | log.trace("PTO will retrieve {} & {} (Match boundary)", |
| 1157 | this.getStartPos(), this.getEndPos()); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1158 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1159 | // Set inner match |
| 1160 | if (this.innerMatchEndPos != 1) |
| 1161 | this.addHighlight(this.innerMatchStartPos, this.innerMatchEndPos, |
| 1162 | -1); |
| 1163 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1164 | // Add all highlights for character retrieval |
| 1165 | if (this.highlight != null) { |
| 1166 | for (Highlight hl : this.highlight) { |
| 1167 | if (hl.start >= this.getStartPos() |
| 1168 | && hl.end <= this.getEndPos()) { |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 1169 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1170 | // Highlight is no pagebreak |
| 1171 | if (hl.end != PB_MARKER) { |
| 1172 | pto.add(this.localDocID, hl.start); |
| 1173 | pto.add(this.localDocID, hl.end); |
| 1174 | |
| 1175 | if (DEBUG) |
| 1176 | log.trace( |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1177 | "PTO will retrieve {} & {} (Highlight boundary)", |
| 1178 | hl.start, hl.end); |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1179 | |
| 1180 | } |
| 1181 | |
| 1182 | else if (DEBUG) { |
| 1183 | log.trace("Highlight is a pagebreak - do not retrieve PTO"); |
| 1184 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1185 | }; |
| 1186 | }; |
| 1187 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1188 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1189 | // Get the list of spans for matches and highlighting |
| 1190 | if (this.span == null || this.span.size() == 0) { |
| 1191 | if (!this._processHighlightSpans()) |
| 1192 | return false; |
| 1193 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1194 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1195 | // Create a stack for highlighted elements |
| 1196 | // (opening and closing elements) |
| 1197 | ArrayList<int[]> stack = this._processHighlightStack(); |
| 1198 | |
| 1199 | if (DEBUG) |
| 1200 | log.trace("The snippet is {}", this.tempSnippet); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1201 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1202 | // The temporary snippet is empty, nothing to do |
| 1203 | if (this.tempSnippet == null) { |
| 1204 | processed = true; |
| 1205 | return false; |
| 1206 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1207 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1208 | // Merge the element stack with the primary textual data |
| 1209 | this._processHighlightSnippet(this.tempSnippet, stack); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1210 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1211 | // Match is processed - done |
| 1212 | return (processed = true); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1213 | }; |
| 1214 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 1215 | |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 1216 | /* |
| 1217 | Comparator class for opening tags |
| 1218 | */ |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1219 | private class OpeningTagComparator implements Comparator<int[]> { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1220 | @Override |
| 1221 | public int compare (int[] arg0, int[] arg1) { |
| 1222 | // Check start positions |
| 1223 | if (arg0[0] > arg1[0]) { |
| 1224 | return 1; |
| 1225 | } |
| 1226 | else if (arg0[0] == arg1[0]) { |
| 1227 | // Check endpositions |
| 1228 | if (arg0[1] > arg1[1]) { |
| 1229 | return -1; |
| 1230 | } |
| 1231 | else if (arg0[1] == arg1[1]) { |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1232 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1233 | // Compare class number |
| 1234 | if (arg0[2] > arg1[2]) |
| 1235 | return 1; |
| Akron | 417eaa9 | 2017-01-13 18:00:15 +0100 | [diff] [blame] | 1236 | else if (arg0[2] < arg1[2]) |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1237 | return -1; |
| 1238 | return 0; |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1239 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1240 | } |
| 1241 | return 1; |
| 1242 | }; |
| 1243 | return -1; |
| 1244 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1245 | }; |
| 1246 | |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 1247 | /* |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1248 | * Comparator class for closing tags |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 1249 | */ |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1250 | private class ClosingTagComparator implements Comparator<int[]> { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1251 | @Override |
| 1252 | public int compare (int[] arg0, int[] arg1) { |
| 1253 | // Check end positions |
| 1254 | if (arg0[1] > arg1[1]) { |
| 1255 | return 1; |
| 1256 | } |
| 1257 | else if (arg0[1] == arg1[1]) { |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1258 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1259 | // Check start positions |
| 1260 | if (arg0[0] < arg1[0]) { |
| 1261 | return 1; |
| 1262 | } |
| 1263 | else if (arg0[0] == arg1[0]) { |
| 1264 | return 0; |
| 1265 | }; |
| 1266 | return -1; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1267 | }; |
| 1268 | return -1; |
| 1269 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1270 | }; |
| 1271 | |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 1272 | |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 1273 | /* |
| 1274 | * This takes a clean string and the tag stack |
| 1275 | * to decorate the string with annotations. |
| 1276 | */ |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 1277 | private void _processHighlightSnippet (String clean, |
| 1278 | ArrayList<int[]> stack) { |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1279 | |
| Akron | 22d119d | 2017-11-15 16:53:02 +0100 | [diff] [blame] | 1280 | if (DEBUG) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1281 | log.trace("--- Process Highlight snippet"); |
| Akron | 22d119d | 2017-11-15 16:53:02 +0100 | [diff] [blame] | 1282 | log.trace("--- Snippet: {}", clean); |
| 1283 | }; |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 1284 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1285 | int pos = 0, oldPos = 0; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1286 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1287 | this.snippetArray = new HighlightCombinator(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1288 | |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 1289 | // Iterate over all elements of the stack |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1290 | for (int[] element : stack) { |
| Akron | a7b936d | 2016-03-04 13:40:54 +0100 | [diff] [blame] | 1291 | |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1292 | // The position is the start position for opening and |
| 1293 | // empty elements and the end position for closing elements |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1294 | pos = element[3] != 0 ? element[0] : element[1]; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1295 | |
| Akron | 22d119d | 2017-11-15 16:53:02 +0100 | [diff] [blame] | 1296 | if (DEBUG) |
| 1297 | log.trace("Add tag at position {} (was {})", |
| 1298 | pos, |
| 1299 | oldPos); |
| 1300 | |
| 1301 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1302 | // The new position is behind the old position |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1303 | if (pos > oldPos) { |
| Nils Diewald | da1722b | 2014-02-17 00:12:05 +0000 | [diff] [blame] | 1304 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1305 | // The position is behind the string length, |
| 1306 | // which may end when an element ends beyond |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1307 | if (pos > clean.length()) { |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1308 | |
| 1309 | // Reposition to the end |
| Akron | 22d119d | 2017-11-15 16:53:02 +0100 | [diff] [blame] | 1310 | pos = clean.length(); |
| 1311 | |
| 1312 | if (DEBUG) |
| 1313 | log.trace("Position exceeds string, now {}", |
| 1314 | pos); |
| 1315 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1316 | }; |
| Nils Diewald | da1722b | 2014-02-17 00:12:05 +0000 | [diff] [blame] | 1317 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1318 | // Add partial string |
| Akron | dd31e8d | 2017-11-15 16:22:45 +0100 | [diff] [blame] | 1319 | if (pos > 0 && pos > oldPos) { |
| Akron | 57d57aa | 2017-11-13 18:56:33 +0100 | [diff] [blame] | 1320 | snippetArray.addString(clean.substring(oldPos, pos)); |
| Akron | dd31e8d | 2017-11-15 16:22:45 +0100 | [diff] [blame] | 1321 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1322 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1323 | // Remember the new position |
| Akron | dd31e8d | 2017-11-15 16:22:45 +0100 | [diff] [blame] | 1324 | oldPos = pos; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1325 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1326 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1327 | // close tag |
| 1328 | if (element[3] == 0) { |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1329 | |
| 1330 | // Add close |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1331 | snippetArray.addClose(element[2]); |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1332 | } |
| 1333 | |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1334 | // empty tag |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1335 | else if (element[3] == 2) { |
| 1336 | |
| 1337 | // Add Empty (pagebreak) |
| 1338 | snippetArray.addEmpty(element[2]); |
| 1339 | } |
| 1340 | |
| 1341 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1342 | // open tag |
| 1343 | else { |
| 1344 | snippetArray.addOpen(element[2]); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1345 | }; |
| 1346 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1347 | |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1348 | if (clean.length() > pos && pos >= 0) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1349 | snippetArray.addString(clean.substring(pos)); |
| 1350 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1351 | }; |
| 1352 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1353 | |
| 1354 | @JsonProperty("snippet") |
| 1355 | public String getSnippetHTML () { |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1356 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1357 | if (!this._processHighlight()) |
| 1358 | return null; |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 1359 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1360 | if (this.processed && this.snippetHTML != null) |
| 1361 | return this.snippetHTML; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1362 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1363 | if (DEBUG) |
| 1364 | log.trace("Create HTML Snippet"); |
| Nils Diewald | 833fe7e | 2013-12-14 16:06:33 +0000 | [diff] [blame] | 1365 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1366 | StringBuilder sb = new StringBuilder(); |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1367 | StringBuilder rightContext = new StringBuilder(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1368 | |
| Akron | 1c126b4 | 2018-01-30 19:48:48 +0100 | [diff] [blame] | 1369 | // Remember ids already defined to |
| 1370 | // have joined elements |
| 1371 | HashSet<String> joins = new HashSet<>(100); |
| 1372 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1373 | // Snippet stack sizes |
| 1374 | short start = (short) 0; |
| 1375 | short end = this.snippetArray.size(); |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 1376 | |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1377 | // Create context |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1378 | sb.append("<span class=\"context-left\">"); |
| 1379 | if (this.startMore) |
| 1380 | sb.append("<span class=\"more\"></span>"); |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 1381 | |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1382 | // Set levels for highlights |
| 1383 | FixedBitSet level = new FixedBitSet(255); |
| 1384 | level.set(0, 255); |
| 1385 | byte[] levelCache = new byte[255]; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1386 | |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1387 | HighlightCombinatorElement elem; |
| Nils Diewald | f3b30ae | 2013-11-27 17:42:37 +0000 | [diff] [blame] | 1388 | |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1389 | end--; |
| 1390 | if (end > 0) { |
| Nils Diewald | f3b30ae | 2013-11-27 17:42:37 +0000 | [diff] [blame] | 1391 | |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1392 | // First element of sorted array |
| 1393 | elem = this.snippetArray.getFirst(); |
| Nils Diewald | 4679010 | 2014-09-18 16:05:42 +0000 | [diff] [blame] | 1394 | |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1395 | // First element is textual |
| 1396 | if (elem.type == 0) { |
| Akron | 1c126b4 | 2018-01-30 19:48:48 +0100 | [diff] [blame] | 1397 | sb.append(elem.toHTML(this, level, levelCache, joins)); |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1398 | // Move start position |
| 1399 | start++; |
| 1400 | }; |
| 1401 | sb.append("</span>"); |
| Nils Diewald | 4679010 | 2014-09-18 16:05:42 +0000 | [diff] [blame] | 1402 | |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1403 | // Last element of sorted array |
| 1404 | elem = this.snippetArray.getLast(); |
| 1405 | |
| 1406 | // Create right context, if there is any |
| 1407 | rightContext.append("<span class=\"context-right\">"); |
| 1408 | |
| 1409 | // Last element is textual |
| 1410 | if (elem != null && elem.type == 0) { |
| Akron | 1c126b4 | 2018-01-30 19:48:48 +0100 | [diff] [blame] | 1411 | rightContext.append( |
| 1412 | elem.toHTML(this, level, levelCache, joins) |
| 1413 | ); |
| Akron | 1dd062d | 2016-11-11 23:21:46 +0100 | [diff] [blame] | 1414 | |
| 1415 | // decrement end |
| 1416 | end--; |
| 1417 | }; |
| 1418 | }; |
| Akron | 8288ad0 | 2016-11-11 19:23:05 +0100 | [diff] [blame] | 1419 | |
| 1420 | if (this.endMore) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1421 | rightContext.append("<span class=\"more\"></span>"); |
| Akron | 8288ad0 | 2016-11-11 19:23:05 +0100 | [diff] [blame] | 1422 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1423 | rightContext.append("</span>"); |
| Nils Diewald | f3b30ae | 2013-11-27 17:42:37 +0000 | [diff] [blame] | 1424 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1425 | // Iterate through all remaining elements |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1426 | sb.append("<span class=\"match\">"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1427 | for (short i = start; i <= end; i++) { |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1428 | |
| Akron | 8288ad0 | 2016-11-11 19:23:05 +0100 | [diff] [blame] | 1429 | elem = this.snippetArray.get(i); |
| 1430 | // UNTESTED |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1431 | if (elem != null) { |
| Akron | 1c126b4 | 2018-01-30 19:48:48 +0100 | [diff] [blame] | 1432 | String elemString = elem.toHTML( |
| 1433 | this, level, levelCache, joins |
| 1434 | ); |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1435 | if (DEBUG) { |
| 1436 | log.trace("Add node {}", elemString); |
| 1437 | }; |
| 1438 | sb.append(elemString); |
| 1439 | } |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1440 | }; |
| Akron | 70ce0c0 | 2018-05-25 23:44:26 +0200 | [diff] [blame] | 1441 | if (this.cutted) { |
| 1442 | sb.append("<span class=\"cutted\"></span>"); |
| 1443 | }; |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1444 | sb.append("</span>"); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1445 | sb.append(rightContext); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1446 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1447 | return (this.snippetHTML = sb.toString()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1448 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1449 | |
| 1450 | |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1451 | @JsonIgnore |
| 1452 | public String getSnippetBrackets () { |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 1453 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1454 | if (!this._processHighlight()) |
| 1455 | return null; |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 1456 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1457 | if (this.processed && this.snippetBrackets != null) |
| 1458 | return this.snippetBrackets; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1459 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1460 | // Snippet stack sizes |
| 1461 | short start = (short) 0; |
| 1462 | short end = this.snippetArray.size(); |
| 1463 | end--; |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1464 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1465 | StringBuilder sb = new StringBuilder(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1466 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1467 | if (this.startMore) |
| 1468 | sb.append("... "); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1469 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1470 | // First element of sorted array |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1471 | HighlightCombinatorElement elem = this.snippetArray.getFirst(); |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1472 | if (elem.type == 0) { |
| 1473 | sb.append(elem.toBrackets(this)); |
| 1474 | start++; |
| 1475 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1476 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1477 | sb.append("["); |
| 1478 | |
| 1479 | // Last element of sorted array |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1480 | elem = this.snippetArray.getLast(); |
| 1481 | StringBuilder rightContext = new StringBuilder(); |
| 1482 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1483 | // Last element is textual |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1484 | if (elem != null && elem.type == 0) { |
| 1485 | rightContext.append(elem.toBrackets(this)); |
| 1486 | // decrement end |
| 1487 | end--; |
| 1488 | }; |
| 1489 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1490 | for (short i = start; i <= end; i++) { |
| 1491 | sb.append(this.snippetArray.get(i).toBrackets(this)); |
| 1492 | }; |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1493 | |
| Akron | 70ce0c0 | 2018-05-25 23:44:26 +0200 | [diff] [blame] | 1494 | if (this.cutted) { |
| 1495 | sb.append("<!>"); |
| 1496 | }; |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1497 | sb.append("]"); |
| 1498 | sb.append(rightContext); |
| 1499 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1500 | if (this.endMore) |
| 1501 | sb.append(" ..."); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1502 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1503 | return (this.snippetBrackets = sb.toString()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1504 | }; |
| 1505 | |
| 1506 | |
| Nils Diewald | 3caa00d | 2013-12-13 02:24:04 +0000 | [diff] [blame] | 1507 | // This sorts all highlight and match spans to make them nesting correctly, |
| 1508 | // even in case they overlap |
| 1509 | // TODO: Not very fast - improve! |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 1510 | private ArrayList<int[]> _processHighlightStack () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1511 | if (DEBUG) |
| 1512 | log.trace("--- Process Highlight stack"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1513 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1514 | LinkedList<int[]> openList = new LinkedList<int[]>(); |
| 1515 | LinkedList<int[]> closeList = new LinkedList<int[]>(); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1516 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1517 | // Filter multiple identifiers, that may be introduced and would |
| 1518 | // result in invalid xml |
| 1519 | this._filterMultipleIdentifiers(); |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 1520 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1521 | // Add highlight spans to balance lists |
| 1522 | openList.addAll(this.span); |
| 1523 | closeList.addAll(this.span); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1524 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1525 | // Sort balance lists |
| 1526 | Collections.sort(openList, new OpeningTagComparator()); |
| 1527 | Collections.sort(closeList, new ClosingTagComparator()); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1528 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1529 | // New stack array |
| 1530 | ArrayList<int[]> stack = new ArrayList<>(openList.size() * 2); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1531 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1532 | // Create stack unless both lists are empty |
| 1533 | while (!openList.isEmpty() || !closeList.isEmpty()) { |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1534 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1535 | // Nothing more to open -- close all |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1536 | if (openList.isEmpty()) { |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1537 | |
| 1538 | if (DEBUG) |
| 1539 | log.debug("No more open tags -- close all non pagebreaks"); |
| 1540 | |
| 1541 | if (closeList.peekFirst()[1] != PB_MARKER) { |
| 1542 | stack.add(closeList.removeFirst()); |
| 1543 | } |
| 1544 | else if (DEBUG) { |
| 1545 | if (DEBUG) |
| 1546 | log.debug("Close is pagebreak -- ignore (1)"); |
| 1547 | }; |
| 1548 | |
| 1549 | continue; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1550 | } |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 1551 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1552 | // Not sure about this, but it can happen |
| 1553 | else if (closeList.isEmpty()) { |
| 1554 | break; |
| 1555 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1556 | |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1557 | // Closener is pagebreak |
| 1558 | if (closeList.peekFirst()[1] == PB_MARKER) { |
| 1559 | |
| 1560 | if (DEBUG) |
| 1561 | log.debug("Close is pagebreak -- ignore (2)"); |
| 1562 | |
| 1563 | // Remove closing pagebreak |
| 1564 | closeList.removeFirst(); |
| 1565 | } |
| 1566 | |
| 1567 | // Opener is pagebreak |
| 1568 | else if (openList.peekFirst()[1] == PB_MARKER) { |
| 1569 | int[] e = openList.removeFirst().clone(); |
| 1570 | |
| 1571 | if (DEBUG) |
| 1572 | log.debug("Open is pagebreak"); |
| 1573 | |
| 1574 | // Mark as empty |
| 1575 | e[1] = e[0]; // Remove pagebreak marker |
| 1576 | e[3] = 2; |
| 1577 | |
| 1578 | // Add empty pagebreak |
| 1579 | stack.add(e); |
| 1580 | } |
| 1581 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1582 | // check if the opener is smaller than the closener |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1583 | else if (openList.peekFirst()[0] < closeList.peekFirst()[1]) { |
| 1584 | |
| 1585 | if (DEBUG) |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1586 | log.debug("Open tag starts before close tag ends"); |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1587 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1588 | int[] e = openList.removeFirst().clone(); |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1589 | |
| 1590 | // Mark as opener |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 1591 | e[3] = 1; |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1592 | |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1593 | if (DEBUG) { |
| 1594 | |
| 1595 | // -1: match |
| 1596 | // < -1: relation target |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 1597 | // -99998: context |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1598 | // >= 2048: relation source |
| 1599 | // >= 256: annotation |
| 1600 | |
| 1601 | log.trace( |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 1602 | "Add open with number {} to stack at {}-{} as {}", |
| 1603 | e[2], e[0], e[1], e[3] |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1604 | ); |
| 1605 | }; |
| 1606 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1607 | // Add opener to stack |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1608 | stack.add(e); |
| 1609 | } |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1610 | |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 1611 | else { |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1612 | int[] e = closeList.removeFirst(); |
| 1613 | |
| 1614 | if (DEBUG) { |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1615 | log.debug("Close ends before open"); |
| 1616 | |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1617 | log.trace( |
| 1618 | "Add close with number {} to stack at {}-{}", |
| 1619 | e[2], e[0], e[1] |
| 1620 | ); |
| 1621 | }; |
| 1622 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1623 | // Add closener to stack |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1624 | stack.add(e); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1625 | }; |
| 1626 | }; |
| 1627 | return stack; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1628 | }; |
| 1629 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1630 | |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1631 | /** |
| 1632 | * Sometimes the match start and end positions are inside the |
| 1633 | * matching region, e.g. when the match was expanded. |
| 1634 | * This will override the original matching positions |
| 1635 | * And matrk the real matching. |
| 1636 | */ |
| 1637 | public void overrideMatchPosition (int start, int end) { |
| 1638 | if (DEBUG) |
| 1639 | log.trace("--- Override match position"); |
| 1640 | |
| 1641 | this.innerMatchStartPos = start; |
| 1642 | this.innerMatchEndPos = end; |
| 1643 | }; |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1644 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1645 | |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 1646 | /** |
| 1647 | * This will retrieve character offsets for all spans. |
| 1648 | */ |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1649 | private boolean _processHighlightSpans () { |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 1650 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1651 | if (DEBUG) |
| 1652 | log.trace("--- Process Highlight spans"); |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 1653 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1654 | // Local document ID |
| 1655 | int ldid = this.localDocID; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1656 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1657 | int startPosChar = -1, endPosChar = -1; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1658 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1659 | // No positionsToOffset object found |
| 1660 | if (this.positionsToOffset == null) |
| 1661 | return false; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1662 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1663 | // Match position |
| 1664 | startPosChar = this.positionsToOffset.start(ldid, this.startPos); |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 1665 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1666 | if (DEBUG) |
| 1667 | log.trace("Unaltered startPosChar is {}", startPosChar); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 1668 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1669 | // Check potential differing start characters |
| 1670 | // e.g. from element spans |
| 1671 | if (potentialStartPosChar != -1 |
| 1672 | && (startPosChar > this.potentialStartPosChar)) |
| 1673 | startPosChar = this.potentialStartPosChar; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1674 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1675 | endPosChar = this.positionsToOffset.end(ldid, this.endPos - 1); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 1676 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1677 | if (DEBUG) |
| 1678 | log.trace("Unaltered endPosChar is {}", endPosChar); |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 1679 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1680 | // Potential end characters may come from spans with |
| 1681 | // defined character offsets like sentences including .", ... etc. |
| 1682 | if (endPosChar < potentialEndPosChar) |
| 1683 | endPosChar = potentialEndPosChar; |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 1684 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1685 | if (DEBUG) |
| 1686 | log.trace("Refined: Match offset is pos {}-{} (chars {}-{})", |
| 1687 | this.startPos, this.endPos, startPosChar, endPosChar); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1688 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1689 | this.identifier = null; |
| Nils Diewald | 498d598 | 2014-03-03 20:09:22 +0000 | [diff] [blame] | 1690 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1691 | // No spans yet |
| 1692 | if (this.span == null) |
| 1693 | this.span = new LinkedList<int[]>(); |
| Nils Diewald | 2cd1c3d | 2014-01-08 22:53:08 +0000 | [diff] [blame] | 1694 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1695 | // Process offset char findings |
| 1696 | int[] intArray = this._processOffsetChars(ldid, startPosChar, |
| 1697 | endPosChar); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1698 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1699 | // Recalculate startOffsetChar |
| 1700 | int startOffsetChar = startPosChar - intArray[0]; |
| Nils Diewald | 20607ab | 2014-03-20 23:28:36 +0000 | [diff] [blame] | 1701 | |
| Akron | f05fde6 | 2016-08-03 23:46:17 +0200 | [diff] [blame] | 1702 | // Add match span, in case no inner match is defined |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 1703 | if (this.innerMatchEndPos == -1) { |
| 1704 | if (DEBUG) |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 1705 | log.debug("Added array to match span with {} (1)", intArray); |
| Akron | 08f4ceb | 2016-08-03 23:53:32 +0200 | [diff] [blame] | 1706 | this.span.add(intArray); |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 1707 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1708 | |
| Akron | 12cd258 | 2018-02-17 12:58:38 +0100 | [diff] [blame] | 1709 | // Add context highlight |
| 1710 | this.span.add(new int[]{intArray[0], intArray[1], CONTEXT, 0}); |
| 1711 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1712 | // highlights |
| 1713 | // -- I'm not sure about this. |
| 1714 | if (this.highlight != null) { |
| 1715 | if (DEBUG) |
| 1716 | log.trace("There are highlights!"); |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1717 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1718 | for (Highlight highlight : this.highlight) { |
| Akron | 9ebdfab | 2018-02-19 16:38:17 +0100 | [diff] [blame] | 1719 | if (DEBUG && highlight.start > highlight.end) { |
| 1720 | log.warn("Start position is before end position {}-{}!", |
| 1721 | highlight.start, |
| 1722 | highlight.end); |
| 1723 | }; |
| 1724 | |
| 1725 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1726 | int start = -1; |
| 1727 | int end = -1; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 1728 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1729 | // Highlight is a pagebreak |
| 1730 | if (highlight.end != PB_MARKER) { |
| 1731 | start = this.positionsToOffset.start(ldid, highlight.start); |
| 1732 | end = this.positionsToOffset.end(ldid, highlight.end); |
| 1733 | } |
| 1734 | else { |
| 1735 | |
| 1736 | if (DEBUG) |
| 1737 | log.trace("Highlight is pagebreak -- do not retrieve offset"); |
| 1738 | |
| 1739 | // In pagebreak highlights |
| 1740 | // there is already a character |
| 1741 | start = highlight.start; |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1742 | end = highlight.end; |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1743 | }; |
| Nils Diewald | 3ef9a47 | 2013-12-02 16:06:09 +0000 | [diff] [blame] | 1744 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1745 | if (DEBUG) |
| 1746 | log.trace("PTO has retrieved {}-{} for class {}", start, |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1747 | end, highlight.number); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1748 | |
| 1749 | start -= startOffsetChar; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1750 | |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1751 | // Keep end equal -1 |
| 1752 | if (end != PB_MARKER) { |
| 1753 | end -= startOffsetChar; |
| Akron | d4b1933 | 2017-02-15 18:36:24 +0100 | [diff] [blame] | 1754 | } |
| 1755 | else if (DEBUG) { |
| 1756 | log.debug("Pagebreak keeps end position"); |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1757 | }; |
| 1758 | |
| 1759 | if (start < 0 || (end < 0 && end != PB_MARKER)) |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1760 | continue; |
| 1761 | |
| 1762 | // Create intArray for highlight |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1763 | intArray = new int[] { |
| 1764 | start, |
| 1765 | end, |
| 1766 | highlight.number, |
| 1767 | 0 // Dummy value for later use |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1768 | }; |
| 1769 | |
| Akron | 35c2d0d | 2017-02-15 11:16:22 +0100 | [diff] [blame] | 1770 | if (DEBUG) |
| 1771 | log.debug("Added array to span with {} (2)", intArray); |
| 1772 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1773 | this.span.add(intArray); |
| 1774 | }; |
| 1775 | }; |
| 1776 | return true; |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1777 | }; |
| 1778 | |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 1779 | |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1780 | // Pass the local docid to retrieve character positions for the offset |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1781 | private int[] _processOffsetChars (int ldid, int startPosChar, |
| 1782 | int endPosChar) { |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1783 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1784 | int startOffsetChar = -1, endOffsetChar = -1; |
| 1785 | int startOffset = -1, endOffset = -1; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1786 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1787 | // The offset is defined by a span |
| 1788 | if (this.getContext().isSpanDefined()) { |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1789 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1790 | if (DEBUG) |
| 1791 | log.trace("Try to expand to <{}>", |
| 1792 | this.context.getSpanContext()); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1793 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1794 | this.startMore = false; |
| 1795 | this.endMore = false; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1796 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1797 | int[] spanContext = this.expandContextToSpan( |
| Akron | 700c1eb | 2015-09-25 16:57:30 +0200 | [diff] [blame] | 1798 | this.positionsToOffset.getLeafReader(), (Bits) null, |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1799 | "tokens", this.context.getSpanContext()); |
| 1800 | startOffset = spanContext[0]; |
| 1801 | endOffset = spanContext[1]; |
| 1802 | startOffsetChar = spanContext[2]; |
| 1803 | endOffsetChar = spanContext[3]; |
| 1804 | if (DEBUG) |
| Akron | c27b811 | 2018-02-16 17:08:55 +0100 | [diff] [blame] | 1805 | log.trace("Got context based on span {}-{}/{}-{}", |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1806 | startOffset, endOffset, startOffsetChar, endOffsetChar); |
| 1807 | }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1808 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1809 | // The offset is defined by tokens or characters |
| 1810 | if (endOffset == -1) { |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1811 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1812 | PositionsToOffset pto = this.positionsToOffset; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1813 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1814 | // The left offset is defined by tokens |
| 1815 | if (this.context.left.isToken()) { |
| 1816 | startOffset = this.startPos - this.context.left.getLength(); |
| 1817 | if (DEBUG) |
| 1818 | log.trace("PTO will retrieve {} (Left context)", |
| 1819 | startOffset); |
| 1820 | pto.add(ldid, startOffset); |
| 1821 | } |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1822 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1823 | // The left offset is defined by characters |
| 1824 | else { |
| 1825 | startOffsetChar = startPosChar - this.context.left.getLength(); |
| 1826 | }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1827 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1828 | // The right context is defined by tokens |
| 1829 | if (this.context.right.isToken()) { |
| 1830 | endOffset = this.endPos + this.context.right.getLength() - 1; |
| 1831 | if (DEBUG) |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 1832 | log.trace("PTO will retrieve {} (Right context)", |
| 1833 | endOffset); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1834 | pto.add(ldid, endOffset); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1835 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1836 | } |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1837 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1838 | // The right context is defined by characters |
| 1839 | else { |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 1840 | endOffsetChar = (endPosChar == -1) ? -1 |
| 1841 | : endPosChar + this.context.right.getLength(); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1842 | }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1843 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1844 | if (startOffset != -1) |
| 1845 | startOffsetChar = pto.start(ldid, startOffset); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1846 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1847 | if (endOffset != -1) |
| 1848 | endOffsetChar = pto.end(ldid, endOffset); |
| 1849 | }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1850 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1851 | if (DEBUG) |
| 1852 | log.trace("Premature found offsets at {}-{}", startOffsetChar, |
| 1853 | endOffsetChar); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1854 | |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1855 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1856 | // This can happen in case of non-token characters |
| 1857 | // in the match and null offsets |
| 1858 | if (startOffsetChar > startPosChar) |
| 1859 | startOffsetChar = startPosChar; |
| 1860 | else if (startOffsetChar < 0) |
| 1861 | startOffsetChar = 0; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1862 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1863 | // No "..." at the beginning |
| 1864 | if (startOffsetChar == 0) |
| 1865 | this.startMore = false; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1866 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1867 | if (endOffsetChar != -1 && endOffsetChar < endPosChar) |
| 1868 | endOffsetChar = endPosChar; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1869 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1870 | if (DEBUG) |
| 1871 | log.trace("The context spans from chars {}-{}", startOffsetChar, |
| 1872 | endOffsetChar); |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1873 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1874 | // Get snippet information from the primary data |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 1875 | if (endOffsetChar > -1 |
| 1876 | && (endOffsetChar < this.getPrimaryDataLength())) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1877 | this.tempSnippet = this.getPrimaryData(startOffsetChar, |
| 1878 | endOffsetChar); |
| 1879 | } |
| 1880 | else { |
| 1881 | this.tempSnippet = this.getPrimaryData(startOffsetChar); |
| 1882 | this.endMore = false; |
| 1883 | }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1884 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1885 | if (DEBUG) |
| 1886 | log.trace("Snippet: '" + this.tempSnippet + "'"); |
| 1887 | |
| 1888 | if (DEBUG) |
| 1889 | log.trace( |
| 1890 | "The match entry is {}-{} ({}-{}) with absolute offsetChars {}-{}", |
| Eliza Margaretha | 6f98920 | 2016-10-14 21:48:29 +0200 | [diff] [blame] | 1891 | startPosChar - startOffsetChar, |
| 1892 | endPosChar - startOffsetChar, startPosChar, endPosChar, |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1893 | startOffsetChar, endOffsetChar); |
| 1894 | |
| 1895 | // TODO: Simplify |
| 1896 | return new int[] { startPosChar - startOffsetChar, |
| 1897 | endPosChar - startOffsetChar, -1, 0 }; |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1898 | }; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1899 | |
| Nils Diewald | 1e5d594 | 2014-05-20 13:29:53 +0000 | [diff] [blame] | 1900 | |
| Nils Diewald | 884dbcf | 2015-02-27 17:02:28 +0000 | [diff] [blame] | 1901 | // Identical to Result! |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 1902 | public JsonNode toJsonNode () { |
| 1903 | // ObjectNode json = (ObjectNode) mapper.valueToTree(this); |
| 1904 | ObjectNode json = (ObjectNode) super.toJsonNode(); |
| Nils Diewald | cde6908 | 2014-01-16 15:46:48 +0000 | [diff] [blame] | 1905 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1906 | if (this.context != null) |
| 1907 | json.put("context", this.getContext().toJsonNode()); |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 1908 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1909 | if (this.version != null) |
| 1910 | json.put("version", this.getVersion()); |
| Nils Diewald | cdd465b | 2014-02-24 18:47:38 +0000 | [diff] [blame] | 1911 | |
| Akron | 79d51d4 | 2017-02-13 21:28:27 +0100 | [diff] [blame] | 1912 | if (this.startPage != -1) { |
| 1913 | ArrayNode pages = mapper.createArrayNode(); |
| 1914 | pages.add(this.startPage); |
| 1915 | if (this.endPage != -1 && this.endPage != this.startPage) |
| 1916 | pages.add(this.endPage); |
| 1917 | |
| 1918 | json.put("pages", pages); |
| 1919 | }; |
| 1920 | |
| Akron | 7d45e6b | 2015-06-26 17:23:42 +0200 | [diff] [blame] | 1921 | return json; |
| 1922 | }; |
| 1923 | |
| 1924 | |
| 1925 | public String toJsonString () { |
| 1926 | JsonNode json = (JsonNode) this.toJsonNode(); |
| 1927 | |
| 1928 | // Match was no match |
| 1929 | if (json.size() == 0) |
| 1930 | return "{}"; |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1931 | try { |
| 1932 | return mapper.writeValueAsString(json); |
| 1933 | } |
| 1934 | catch (Exception e) { |
| 1935 | log.warn(e.getLocalizedMessage()); |
| 1936 | }; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 1937 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1938 | return "{}"; |
| Nils Diewald | bfe554b | 2014-01-09 19:35:05 +0000 | [diff] [blame] | 1939 | }; |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 1940 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1941 | |
| Nils Diewald | 277e9ce | 2014-11-06 03:42:11 +0000 | [diff] [blame] | 1942 | // Return match as token list |
| Akron | 48937e9 | 2015-06-26 01:49:02 +0200 | [diff] [blame] | 1943 | // TODO: This will be retrieved in case "tokenList" is |
| 1944 | // requested in "fields" |
| Nils Diewald | 277e9ce | 2014-11-06 03:42:11 +0000 | [diff] [blame] | 1945 | public ObjectNode toTokenList () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1946 | ObjectNode json = mapper.createObjectNode(); |
| Nils Diewald | 277e9ce | 2014-11-06 03:42:11 +0000 | [diff] [blame] | 1947 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1948 | if (this.getDocID() != null) |
| 1949 | json.put("textSigle", this.getDocID()); |
| 1950 | else if (this.getTextSigle() != null) |
| 1951 | json.put("textSigle", this.getTextSigle()); |
| Nils Diewald | 277e9ce | 2014-11-06 03:42:11 +0000 | [diff] [blame] | 1952 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1953 | ArrayNode tokens = json.putArray("tokens"); |
| Nils Diewald | 277e9ce | 2014-11-06 03:42:11 +0000 | [diff] [blame] | 1954 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1955 | // Get pto object |
| 1956 | PositionsToOffset pto = this.positionsToOffset; |
| Nils Diewald | 277e9ce | 2014-11-06 03:42:11 +0000 | [diff] [blame] | 1957 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1958 | // Add for position retrieval |
| 1959 | for (int i = this.getStartPos(); i < this.getEndPos(); i++) { |
| 1960 | pto.add(this.localDocID, i); |
| 1961 | }; |
| Nils Diewald | 277e9ce | 2014-11-06 03:42:11 +0000 | [diff] [blame] | 1962 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1963 | // Retrieve positions |
| 1964 | for (int i = this.getStartPos(); i < this.getEndPos(); i++) { |
| 1965 | ArrayNode token = tokens.addArray(); |
| 1966 | for (int offset : pto.span(this.localDocID, i)) { |
| 1967 | token.add(offset); |
| 1968 | }; |
| 1969 | }; |
| Nils Diewald | 277e9ce | 2014-11-06 03:42:11 +0000 | [diff] [blame] | 1970 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1971 | return json; |
| Nils Diewald | 277e9ce | 2014-11-06 03:42:11 +0000 | [diff] [blame] | 1972 | }; |
| 1973 | |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 1974 | |
| 1975 | // Remove duplicate identifiers |
| 1976 | // Yeah ... I mean ... why not? |
| 1977 | private void _filterMultipleIdentifiers () { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1978 | ArrayList<Integer> removeDuplicate = new ArrayList<>(10); |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1979 | HashSet<String> identifiers = new HashSet<>(20); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1980 | for (int i = 0; i < this.span.size(); i++) { |
| Akron | b98c266 | 2017-02-14 19:38:05 +0100 | [diff] [blame] | 1981 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1982 | // span is an int array: [Start, End, Number, Dummy] |
| 1983 | int highlightNumber = this.span.get(i)[2]; |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 1984 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1985 | // Number is an identifier |
| 1986 | if (highlightNumber < -1) { |
| Nils Diewald | d216a03 | 2014-04-30 17:40:19 +0000 | [diff] [blame] | 1987 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1988 | // Get the real identifier |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 1989 | String idNumber = |
| 1990 | identifierNumber.get(highlightNumber); |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 1991 | if (identifiers.contains(idNumber)) { |
| 1992 | removeDuplicate.add(i); |
| 1993 | } |
| 1994 | else { |
| 1995 | identifiers.add(idNumber); |
| 1996 | }; |
| 1997 | }; |
| 1998 | }; |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 1999 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 2000 | // Order the duplicates to filter from the tail |
| 2001 | Collections.sort(removeDuplicate); |
| 2002 | Collections.reverse(removeDuplicate); |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 2003 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 2004 | // Delete all duplicate identifiers |
| 2005 | for (int delete : removeDuplicate) { |
| 2006 | this.span.remove(delete); |
| 2007 | }; |
| Nils Diewald | 50389b0 | 2014-04-11 16:27:52 +0000 | [diff] [blame] | 2008 | }; |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 2009 | |
| 2010 | |
| 2011 | /* |
| 2012 | * Get identifier based on class number |
| 2013 | */ |
| Akron | d504f21 | 2015-06-20 00:27:54 +0200 | [diff] [blame] | 2014 | @JsonIgnore |
| Akron | 99220ea | 2018-01-30 19:09:20 +0100 | [diff] [blame] | 2015 | public String getClassID (int nr) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 2016 | return this.identifierNumber.get(nr); |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 2017 | }; |
| 2018 | |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 2019 | |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 2020 | /* |
| 2021 | * Get annotation based on id |
| 2022 | */ |
| Akron | d504f21 | 2015-06-20 00:27:54 +0200 | [diff] [blame] | 2023 | @JsonIgnore |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 2024 | public String getAnnotationID (int nr) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 2025 | return this.annotationNumber.get(nr); |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 2026 | }; |
| 2027 | |
| 2028 | |
| 2029 | /* |
| 2030 | * Get relation based on id |
| 2031 | */ |
| Akron | d504f21 | 2015-06-20 00:27:54 +0200 | [diff] [blame] | 2032 | @JsonIgnore |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 2033 | public Relation getRelationID (int nr) { |
| Nils Diewald | bb33da2 | 2015-03-04 16:24:25 +0000 | [diff] [blame] | 2034 | return this.relationNumber.get(nr); |
| Nils Diewald | 79f6c4d | 2014-09-17 17:34:01 +0000 | [diff] [blame] | 2035 | }; |
| Nils Diewald | f399a67 | 2013-11-18 17:55:22 +0000 | [diff] [blame] | 2036 | }; |