| package de.ids_mannheim.korap.response; |
| |
| import static de.ids_mannheim.korap.util.KrillByte.unsignedByte; |
| import static de.ids_mannheim.korap.util.KrillString.codePointSubstring; |
| |
| import java.io.IOException; |
| import java.nio.ByteBuffer; |
| import java.nio.charset.StandardCharsets; |
| import java.util.*; |
| |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermContext; |
| import org.apache.lucene.search.spans.SpanTermQuery; |
| import org.apache.lucene.search.spans.Spans; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.FixedBitSet; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import com.fasterxml.jackson.annotation.JsonIgnore; |
| import com.fasterxml.jackson.annotation.JsonInclude; |
| import com.fasterxml.jackson.annotation.JsonInclude.Include; |
| import com.fasterxml.jackson.annotation.JsonProperty; |
| import com.fasterxml.jackson.databind.JsonNode; |
| import com.fasterxml.jackson.databind.ObjectMapper; |
| import com.fasterxml.jackson.databind.node.ArrayNode; |
| import com.fasterxml.jackson.databind.node.ObjectNode; |
| import com.fasterxml.jackson.databind.node.TextNode; |
| |
| import de.ids_mannheim.korap.index.AbstractDocument; |
| import de.ids_mannheim.korap.index.PositionsToOffset; |
| import de.ids_mannheim.korap.query.SpanElementQuery; |
| import de.ids_mannheim.korap.response.match.HighlightCombinator; |
| import de.ids_mannheim.korap.response.match.HighlightCombinatorElement; |
| import de.ids_mannheim.korap.response.match.MatchIdentifier; |
| import de.ids_mannheim.korap.response.match.PosIdentifier; |
| import de.ids_mannheim.korap.response.match.Relation; |
| import de.ids_mannheim.korap.util.KrillProperties; |
| |
| /* |
| * The snippet building algorithm is quite complicated for now |
| * and should probably be refactored. |
| * It works like this: |
| * |
| * 1. For all spans and highlights, pagebreaks etc. all necessary |
| * positions are collected (processHighlight) |
| * 2. For all collected positions the character offsets are retrieved |
| * and based on that for all spans and highlights a list |
| * is created with arrays of the spans with the structure |
| * [startchar, endchar, highlightClass] (processHighlightSpans) |
| * 2.1 The primary data and optional context information is retrieved |
| * (processOffsetChars) |
| * 3. Based on the collected spans 2 lists are created for opening and |
| * closing tags (pretty much clones of the initial span list), |
| * sorted for opening resp. closing, and processed in parallel |
| * to form an open/close stack. The new structure on the stack is |
| * [startchar, endchar, highlightclass, close=0/open=1/empty=2] |
| * (processHighlightStack) |
| * 3.1. If the element is a relation with an identifier, this may |
| * be removed if duplicate (filterMultipleIdentifiers) |
| * 4. Based on the stack and the primary data the snippet is created. |
| * (processHighlightSnippet) |
| * 4.1. To avoid unbalanced elements, all open/close/empty tags |
| * are balanced (i.e. closed and reopened if overlaps occur). |
| * (Highlightcombinator) |
| */ |
| |
| /* |
| * Todo: The implemented classes and private names are horrible! |
| * Refactor, future-me! |
| * |
| * The number based Highlighttype is ugly - UGLY! |
| * |
| * substrings may be out of range - e.g. if snippets are not lifted! |
| */ |
| |
| /** |
| * Representation of Matches in a Result. |
| * <strong>Warning:</strong> This is currently highly dependent |
| * on DeReKo data and will change in the future. |
| * |
| * @author Nils Diewald |
| * @see Result |
| */ |
| @JsonInclude(Include.NON_NULL) |
| public class Match extends AbstractDocument { |
| |
| // Logger |
| private final static Logger log = LoggerFactory.getLogger(Match.class); |
| |
| // end marker of highlights that are pagebreaks |
| private static final int PB_MARKER = -99999; |
| private static final int ALL_MARKER = -99998; |
| |
| // Textual elements that are in context |
| private static final int CONTEXT = -99997; |
| |
| // This advices the java compiler to ignore all loggings |
| public static final boolean DEBUG = false; |
| |
| // Mapper for JSON serialization |
| ObjectMapper mapper = new ObjectMapper(); |
| |
| // Snippet information |
| @JsonIgnore |
| public SearchContext context; |
| |
| // Public, while used wildly in tests! |
| @JsonIgnore |
| public int startPos, endPos = -1; |
| |
| @JsonIgnore |
| private int innerMatchStartPos, innerMatchEndPos = -1; |
| |
| @JsonIgnore |
| public int potentialStartPosChar = -1, potentialEndPosChar = -1; |
| |
| @JsonIgnore |
| public boolean startCutted = false, endCutted = false; |
| |
| private String version; |
| |
| // TEMPORARILY |
| @JsonIgnore |
| public int localDocID = -1; |
| |
| private HashMap<Integer, String> annotationNumber = new HashMap<>(16); |
| private HashMap<Integer, Relation> relationNumber = new HashMap<>(16); |
| private HashMap<Integer, String> identifierNumber = new HashMap<>(16); |
| |
| // -1 is match highlight |
| int annotationNumberCounter = 256; |
| int relationNumberCounter = 2048; |
| int identifierNumberCounter = -2; |
| |
| private int startPage = -1; |
| private int endPage = -1; |
| |
| private String tempSnippet, |
| snippetHTML, |
| snippetBrackets, |
| identifier, |
| mirrorIdentifier; |
| |
| private ObjectNode snippetTokens; |
| |
| private HighlightCombinator snippetArray; |
| |
| public boolean hasSnippet = false; |
| public boolean hasTokens = false; |
| |
| |
| @JsonIgnore |
| public boolean startMore = true, endMore = true; |
| |
| // private Collection<byte[]> payload; |
| private ArrayList<Highlight> highlight; |
| private LinkedList<int[]> span; |
| |
| private PositionsToOffset positionsToOffset; |
| private boolean processed = false; |
| |
| /** |
| * Constructs a new Match object. |
| * Todo: Maybe that's not necessary! |
| * |
| * @param maxTokenMatchSize |
| * The maximum number of tokens a match may have |
| * @param pto |
| * The PositionsToOffset object, containing relevant |
| * positional information for highlighting |
| * @param localDocID |
| * Document ID based on the atomic reader. |
| * @param startPos |
| * Start position of the match in the document. |
| * @param endPos |
| * End position of the match in the document. |
| * |
| * @see #snippetHTML() |
| * @see #snippetBrackets() |
| * @see PositionsToOffset |
| */ |
| public Match (int maxTokenMatchSize, PositionsToOffset pto, |
| int localDocID, int startPos, int endPos) { |
| this.positionsToOffset = pto; |
| this.localDocID = localDocID; |
| this.setStartPos(maxTokenMatchSize, startPos); |
| this.setEndPos(maxTokenMatchSize, endPos); |
| }; |
| |
| |
| /** |
| * Constructs a new Match object. |
| */ |
| public Match () {}; |
| |
| |
| /** |
| * Constructs a new Match object. |
| * |
| * @param idString |
| * Match identifier string as provided by Result. |
| * @param includeHighlights |
| * Boolean value indicating if possible provided |
| * highlight information should be ignored or not. |
| */ |
| public Match (int maxTokenMatchSize, String idString, boolean includeHighlights) { |
| MatchIdentifier id = new MatchIdentifier(idString); |
| |
| if (id.getStartPos() > -1) { |
| this.mirrorIdentifier = id.toString(); |
| |
| if (id.getTextSigle() != null) |
| this.addString("textSigle", id.getTextSigle()); |
| |
| // <legacy> |
| this.addString("corpusID", id.getCorpusID()); |
| this.addString("ID", id.getDocID()); |
| // </legacy> |
| |
| this.setStartPos(maxTokenMatchSize, id.getStartPos()); |
| this.setEndPos(maxTokenMatchSize, id.getEndPos()); |
| |
| if (includeHighlights) { |
| for (int[] pos : id.getPos()) { |
| if (pos[0] < id.getStartPos() || pos[1] > id.getEndPos()) |
| continue; |
| this.addHighlight(pos[0], pos[1], pos[2]); |
| }; |
| }; |
| }; |
| }; |
| |
| /** |
| * Private class of highlights. |
| * TODO: This should probably be renamed, as it not only contains highlights |
| * but also annotations, markers, pagebreaks and relations |
| */ |
| private class Highlight { |
| public int start, end; |
| public int number = -1; |
| |
| // Relational highlight |
| public Highlight (int start, int end, String annotation, int refStart, int refEnd) { |
| this.start = start; |
| this.end = end; |
| // TODO: This can overflow! |
| this.number = relationNumberCounter++; |
| |
| if (DEBUG) { |
| log.trace("Add relation (2) '{}': source={}-{} >> target={}-{}", |
| annotation, start, end, refStart, refEnd); |
| }; |
| |
| relationNumber.put(this.number, new Relation(annotation, refStart, refEnd)); |
| }; |
| |
| |
| // Span highlight |
| public Highlight (int start, int end, String annotation) { |
| this.start = start; |
| this.end = end; |
| |
| // TODO: This can overflow! |
| if (annotationNumberCounter < 2048) { |
| this.number = annotationNumberCounter++; |
| annotationNumber.put(this.number, annotation); |
| }; |
| }; |
| |
| |
| // Simple highlight |
| public Highlight (int start, int end, int number) { |
| this.start = start; |
| this.end = end; |
| this.number = number; |
| }; |
| |
| // Pagebreak |
| public Highlight (int start, int pagenumber) { |
| this.start = start; |
| this.end = PB_MARKER; |
| this.number = pagenumber; |
| }; |
| |
| // Marker |
| public Highlight (int start, String marker) { |
| this.start = start; |
| this.end = ALL_MARKER; |
| |
| // TODO: This can overflow! |
| if (annotationNumberCounter < 2048) { |
| this.number = annotationNumberCounter++; |
| annotationNumber.put(this.number, marker); |
| }; |
| }; |
| |
| }; |
| |
| |
| // TODO: Here are offsets and highlight offsets! |
| // <> payloads have 12 bytes (iii) or 8!? |
| // highlightoffsets have 11 bytes (iis)! |
| public void addPayload (List<byte[]> payload) { |
| |
| if (DEBUG) |
| log.trace("Add payloads to match"); |
| |
| // Reverse to make embedding of highlights correct |
| Collections.reverse(payload); |
| try { |
| |
| ByteBuffer bb = ByteBuffer.allocate(24); |
| |
| // TODO: REVERSE ITERATOR! |
| for (byte[] b : payload) { |
| |
| if (DEBUG) |
| log.trace("Found a payload of pti {}", b[0]); |
| |
| // Todo element searches! |
| |
| // Highlights! This is a class PTI |
| if (b[0] == 0) { |
| bb.put(b); |
| bb.position(1); // Ignore PTI |
| int start = bb.getInt(); |
| int end = bb.getInt(); |
| byte number = bb.get(); |
| |
| if (DEBUG) |
| log.trace( |
| "Have a highlight of class {} in {}-{} inside of {}-{}", |
| unsignedByte(number), start, end, |
| this.getStartPos(), this.getEndPos()); |
| |
| // Ignore classes out of match range and set by the system |
| // TODO: This may be decidable by PTI! |
| if (unsignedByte(number) <= 128 |
| && start >= this.getStartPos() |
| && end <= this.getEndPos()) { |
| |
| if (DEBUG) { |
| log.trace("Add highlight with class/relationnr {}!", |
| unsignedByte(number)); |
| }; |
| |
| this.addHighlight(start, end - 1, number); |
| } |
| else if (DEBUG) { |
| log.trace("Don't add highlight of class {}!", |
| unsignedByte(number)); |
| }; |
| } |
| |
| // Element payload for match! |
| // This MAY BE the correct match |
| else if (b[0] == (byte) 64) { |
| |
| bb.put(b); |
| bb.position(1); // Ignore pti |
| |
| // Wasn't set before |
| if (this.potentialStartPosChar == -1) { |
| this.potentialStartPosChar = bb.getInt(1); |
| } |
| else { |
| if (bb.getInt(0) < this.potentialStartPosChar) |
| this.potentialStartPosChar = bb.getInt(1); |
| }; |
| |
| if (bb.getInt(4) > this.potentialEndPosChar && !this.endCutted) |
| this.potentialEndPosChar = bb.getInt(5); |
| |
| if (DEBUG) |
| log.trace("Element payload from {} to {}", |
| this.potentialStartPosChar, |
| this.potentialEndPosChar); |
| }; |
| |
| // Clear bytebuffer |
| bb.clear(); |
| }; |
| } |
| |
| catch (Exception e) { |
| log.error(e.getMessage()); |
| } |
| }; |
| |
| |
| /** |
| * Insert a highlight for the snippet view by means of positional |
| * offsets and an optional class number. |
| * |
| * @param start |
| * Integer value of a span's positional start offset. |
| * @param end |
| * Integer value of a span's positional end offset. |
| * @param number |
| * Optional class number of the highlight. |
| */ |
| public void addHighlight (int start, int end) { |
| this.addHighlight(new Highlight(start, end, (int) 0)); |
| }; |
| |
| |
| public void addHighlight (int start, int end, byte number) { |
| this.addHighlight(new Highlight(start, end, (int) number)); |
| }; |
| |
| |
| public void addHighlight (int start, int end, short number) { |
| this.addHighlight(new Highlight(start, end, (int) number)); |
| }; |
| |
| |
| public void addHighlight (int start, int end, int number) { |
| this.addHighlight(new Highlight(start, end, number)); |
| }; |
| |
| |
| /** |
| * Insert a highlight for the snippet view. |
| * |
| * @param hl |
| * A highlight object to add to the match. |
| */ |
| public void addHighlight (Highlight hl) { |
| |
| if (this.highlight == null) |
| this.highlight = new ArrayList<Highlight>(16); |
| |
| if (DEBUG) |
| log.trace("Add highlight from pos {}-{} of class {}", hl.start, |
| hl.end, hl.number); |
| |
| // Reset the fetched match data |
| this._reset(); |
| |
| this.highlight.add(hl); |
| }; |
| |
| |
| /** |
| * Insert a textual annotation for the snippet view by |
| * means of positional offsets and an annotation string. |
| * |
| * @param start |
| * Integer value of a span's positional start offset. |
| * @param end |
| * Integer value of a span's positional end offset. |
| * @param annotation |
| * Annotation string. |
| */ |
| public void addAnnotation (int start, int end, String annotation) { |
| |
| if (DEBUG && start > end) |
| log.warn("Annotation span is negative: {}, {} for {}", start, end, annotation); |
| |
| this.addHighlight(new Highlight(start, end, annotation)); |
| }; |
| |
| |
| /** |
| * Insert an annotated relation for the snippet view by |
| * means of relational participant positions and an annotation |
| * string. |
| * |
| * @param src |
| * Integer value of a span's positional source object. |
| * @param target |
| * Integer value of a span's positional target object. |
| * @param annotation |
| * Annotation string. |
| */ |
| public void addRelation (int srcStart, |
| int srcEnd, |
| int targetStart, |
| int targetEnd, |
| String annotation) { |
| |
| if (DEBUG) |
| log.trace("Add relation (1) '{}': source={}-{} >> target={}-{}", |
| annotation, srcStart, srcEnd, targetStart, targetEnd); |
| |
| // Add source token |
| if (srcEnd == -1) { // || srcStart == srcEnd) { |
| this.addHighlight( |
| new Highlight(srcStart, srcStart, annotation, targetStart, targetEnd) |
| ); |
| } |
| // Add source span |
| else { |
| this.addHighlight( |
| new Highlight(srcStart, srcEnd, annotation, targetStart, targetEnd) |
| ); |
| }; |
| |
| int id = identifierNumberCounter--; |
| |
| // Here is probably the problem: the identifier-number |
| // needs to incorporate targetEnd as well |
| |
| // Add target token |
| // (The last part was previously commented |
| // out for unknown reason) |
| if (targetEnd == -1 || targetStart == targetEnd) { |
| this.addHighlight(new Highlight(targetStart, targetStart, id)); |
| |
| identifierNumber.put(id, String.valueOf(targetStart)); |
| } |
| |
| // Add target span |
| else { |
| this.addHighlight(new Highlight(targetStart, targetEnd, id)); |
| identifierNumber.put(id, targetStart + "-" + targetEnd); |
| |
| }; |
| }; |
| |
| public void addPagebreak (int start, int pagenumber) { |
| this.addHighlight(new Highlight(start, pagenumber)); |
| }; |
| |
| public void addMarker (int start, String data) { |
| this.addHighlight(new Highlight(start, data)); |
| }; |
| |
| |
| /** |
| * Get document id. |
| */ |
| @JsonProperty("docID") |
| public String getDocID () { |
| return super.getID(); |
| }; |
| |
| |
| /** |
| * Get start page. |
| */ |
| @JsonIgnore |
| public int getStartPage () { |
| return this.startPage; |
| }; |
| |
| |
| /** |
| * Get end page. |
| */ |
| @JsonIgnore |
| public int getEndPage () { |
| return this.endPage; |
| }; |
| |
| |
| /** |
| * Get the positional start offset of the match. |
| */ |
| @JsonIgnore |
| public int getStartPos () { |
| return this.startPos; |
| }; |
| |
| |
| /** |
| * Get the positional start offset of the class. |
| * |
| * @param number |
| * Class number of the highlight. |
| */ |
| @JsonIgnore |
| public int getStartPos (int number) { |
| if (number > 256 || this.highlight == null) |
| return -1; |
| |
| // Iterate over highlights to find matching class |
| for (Highlight h : this.highlight) { |
| if (h.number == number && h.end != PB_MARKER && h.end != ALL_MARKER) |
| return h.start; |
| }; |
| |
| return -1; |
| }; |
| |
| |
| /** |
| * Set the positional start offset of the match. |
| * |
| * @param pos |
| * The positional offset. |
| */ |
| @JsonIgnore |
| public void setStartPos (int maxTokenMatchSize, int pos) { |
| this.startPos = pos; |
| if (this.endPos != -1 && (this.endPos - pos) > maxTokenMatchSize) { |
| this.endPos = pos + maxTokenMatchSize; |
| this.endCutted = true; |
| }; |
| }; |
| |
| |
| /** |
| * Get the positional end offset of the match. |
| */ |
| @JsonIgnore |
| public int getEndPos () { |
| return this.endPos; |
| }; |
| |
| |
| /** |
| * Get the positional end offset of the class. |
| * |
| * @param number |
| * Class number of the highlight. |
| */ |
| @JsonIgnore |
| public int getEndPos (int number) { |
| if (number > 256 || this.highlight == null) |
| return -1; |
| |
| // Iterate over highlights to find matching class |
| for (Highlight h : this.highlight) { |
| |
| // Get the number (incremented by 1) |
| if (h.number == number && h.end != PB_MARKER) |
| return h.end + 1; |
| }; |
| |
| return -1; |
| }; |
| |
| |
| /** |
| * Set the positional end offset of the match. |
| * |
| * @param pos |
| * The positional offset. |
| */ |
| @JsonIgnore |
| public void setEndPos (int maxTokenMatchSize, int pos) { |
| if (this.startPos != -1 && (pos - this.startPos) > maxTokenMatchSize) { |
| pos = this.startPos + maxTokenMatchSize; |
| this.endCutted = true; |
| }; |
| this.endPos = pos; |
| }; |
| |
| |
| /** |
| * Get the local (i.e. Lucene given) ID of the document. |
| */ |
| @JsonIgnore |
| public int getLocalDocID () { |
| return this.localDocID; |
| }; |
| |
| |
| /** |
| * Set the local (i.e. Lucene given) ID of the document. |
| * |
| * @param id |
| * The id of the document. |
| */ |
| @JsonIgnore |
| public void setLocalDocID (int id) { |
| this.localDocID = id; |
| }; |
| |
| |
| /** |
| * Get the PositionsToOffset object. |
| * |
| * @see PositionsToOffset |
| */ |
| @JsonIgnore |
| public PositionsToOffset getPositionsToOffset () { |
| return this.positionsToOffset; |
| }; |
| |
| |
| /** |
| * Set the PositionsToOffset object. |
| * |
| * @param pto |
| * The PositionsToOffset object |
| * @see PositionsToOffset |
| */ |
| @JsonIgnore |
| public void setPositionsToOffset (PositionsToOffset pto) { |
| this.positionsToOffset = pto; |
| }; |
| |
| |
| /** |
| * Get match ID (for later retrieval). |
| * |
| * @see MatchIdentifier |
| */ |
| @Override |
| @JsonProperty("matchID") |
| public String getID () { |
| |
| // Return identifier as given |
| if (this.mirrorIdentifier != null) { |
| return this.mirrorIdentifier; |
| }; |
| |
| // Identifier already created |
| if (this.identifier != null) { |
| return this.identifier; |
| }; |
| |
| // No, nada, nix |
| if (this.localDocID == -1) |
| return null; |
| |
| |
| MatchIdentifier id = this.getMatchIdentifier(); |
| |
| // Get prefix string corpus/doc |
| if (this.getTextSigle() != null) { |
| id.setTextSigle(this.getTextSigle()); |
| } |
| // LEGACY |
| else { |
| id.setCorpusID(this.getCorpusID()); |
| id.setDocID(this.getDocID()); |
| }; |
| |
| return (this.identifier = id.toString()); |
| }; |
| |
| |
| @JsonIgnore |
| public MatchIdentifier getMatchIdentifier () { |
| MatchIdentifier id = new MatchIdentifier(); |
| |
| id.setStartPos(startPos); |
| id.setEndPos(endPos); |
| |
| // There are highlights to integrate |
| if (this.highlight != null) { |
| for (Highlight h : this.highlight) { |
| if (h.number >= 256 || h.end == PB_MARKER || h.end == ALL_MARKER) |
| continue; |
| |
| // Add highlight to the snippet |
| id.addPos(h.start, h.end, h.number); |
| }; |
| }; |
| |
| return id; |
| }; |
| |
| /** |
| * Get identifier for a specific position. |
| * |
| * @param int |
| * Position to get identifier on. |
| */ |
| @JsonIgnore |
| public String getPosID (int pos) { |
| return this.getPosID(pos, -1); |
| }; |
| |
| |
| /** |
| * Get identifier for a specific position. |
| * |
| * @param String |
| * Start and optional end position to get |
| * identifier on, separated by a dash. |
| */ |
| @JsonIgnore |
| public String getPosID (String pos) { |
| |
| if (pos == null) { |
| return ""; |
| }; |
| |
| String[] startEnd = pos.split("-"); |
| if (startEnd.length == 2) { |
| return this.getPosID( |
| Integer.parseInt(startEnd[0]), |
| Integer.parseInt(startEnd[1]) |
| ); |
| } |
| return this.getPosID(Integer.parseInt(startEnd[0]), -1); |
| }; |
| |
| |
| |
| /** |
| * Get identifier for a specific position. |
| * |
| * @param int |
| * Start position to get identifier on. |
| * @param int |
| * End position to get identifier on. |
| */ |
| @JsonIgnore |
| public String getPosID (int start, int end) { |
| |
| if (DEBUG) |
| log.trace("Retrieve identifier for position {}-{}", start, end); |
| |
| // Identifier already given |
| if (this.identifier != null) |
| return this.identifier; |
| |
| // Nothing here |
| if (this.localDocID == -1) |
| return null; |
| |
| PosIdentifier id = new PosIdentifier(); |
| |
| // Get prefix string corpus/doc |
| // <legacy> |
| id.setCorpusID(this.getCorpusID()); |
| id.setDocID(this.getDocID()); |
| // </legacy> |
| id.setTextSigle(this.getTextSigle()); |
| id.setStart(start); |
| id.setEnd(end); |
| |
| if (DEBUG) |
| log.trace( |
| "Identifier is {} in {} ({}-{}) {}", |
| id.toString(), |
| this.getTextSigle(), |
| this.getCorpusID(), |
| this.getDocID(), |
| start |
| ); |
| |
| return id.toString(); |
| }; |
| |
| |
| public Match setContext (SearchContext context) { |
| this.context = context; |
| return this; |
| }; |
| |
| |
| @JsonIgnore |
| public SearchContext getContext () { |
| if (this.context == null) |
| this.context = new SearchContext(); |
| return this.context; |
| }; |
| |
| @JsonIgnore |
| public int getLength () { |
| return this.getEndPos() - this.getStartPos(); |
| }; |
| |
| |
| // Retrieve markers in a certain area |
| public List<int[]> retrieveMarkers (String marker) { |
| if (this.positionsToOffset != null) { |
| return this.retrieveMarkers( |
| this.positionsToOffset.getLeafReader(), |
| (Bits) null, |
| "tokens", |
| marker |
| ); |
| }; |
| |
| return null; |
| }; |
| |
| // Retrieve markers in a certain area |
| // THIS IS NOT VERY CLEVER - MAKE IT MORE CLEVER! |
| public List<int[]> retrieveMarkers (LeafReaderContext atomic, |
| Bits bitset, |
| String field, |
| String marker) { |
| |
| // List of relevant pagebreaks - only used for pagebreak markers! |
| List<int[]> pagebreaks = new ArrayList<>(24); |
| |
| int charOffset = 0, pagenumber = 0, start = 0; |
| |
| int minStartPos = this.getStartPos() - KrillProperties.maxTokenContextSize; |
| int maxEndPos = this.getEndPos() + KrillProperties.maxTokenContextSize; |
| |
| if (DEBUG) { |
| log.debug("================================="); |
| log.debug("Retrieve markers between {}-{}", |
| this.getStartPos(), |
| this.getEndPos()); |
| }; |
| |
| try { |
| |
| // Store character offsets in ByteBuffer |
| ByteBuffer bb = ByteBuffer.allocate(256); |
| |
| // Store last relevant marker in byte array |
| byte[] b = null; |
| |
| SpanTermQuery stq = new SpanTermQuery(new Term(field, marker)); |
| |
| if (DEBUG) |
| log.trace("Check markers with {}", stq.toString()); |
| |
| Spans markerSpans = stq.getSpans( |
| atomic, bitset, new HashMap<Term, TermContext>() |
| ); |
| |
| // Iterate over all markers |
| while (markerSpans.next() == true) { |
| |
| if (DEBUG) { |
| log.debug("There is a marker at {}/{} and we are at {}", |
| markerSpans.doc(), |
| markerSpans.start(), |
| this.localDocID); |
| }; |
| |
| // Current marker is not in the correct document |
| if (markerSpans.doc() != this.localDocID) { |
| if (markerSpans.doc() < this.localDocID) { |
| markerSpans.skipTo(this.localDocID); |
| |
| // No pagebreaks in this document |
| if (markerSpans.doc() != this.localDocID) |
| break; |
| } |
| else { |
| break; |
| }; |
| continue; |
| }; |
| |
| if (DEBUG) |
| log.debug("The marker occurs in the document"); |
| |
| // There is a marker found - check, |
| // if it is in the correct area |
| if (markerSpans.start() < minStartPos) { |
| |
| // Only the first payload is relevant |
| b = markerSpans.getPayload().iterator().next(); |
| start = markerSpans.start(); |
| |
| if (DEBUG) |
| log.debug("Marker start position is before match at {}:{}", |
| markerSpans.start(), |
| b); |
| |
| } |
| |
| // This captures all markers starting in the potential (i.e. maximum) context of the match |
| else { |
| |
| // b is already defined! |
| // This may be due to the last next |
| if (b != null) { |
| bb.rewind(); |
| bb.put(b); |
| bb.rewind(); |
| |
| pagenumber = bb.getInt(); |
| charOffset = bb.getInt(); |
| |
| // This marker is a pagebreak |
| if (pagenumber != 0) { |
| if (DEBUG) |
| log.debug("Add pagebreak to list: {}-{}", charOffset, pagenumber); |
| |
| // Add all pagebreaks for later counting |
| pagebreaks.add(new int[]{charOffset, pagenumber}); |
| |
| if (start >= minStartPos) { |
| if (DEBUG) |
| log.debug("Add marker to rendering: {}-{}", |
| charOffset, |
| pagenumber); |
| this.addPagebreak(charOffset, pagenumber); |
| }; |
| } |
| |
| // This marker is no pagebreak |
| else { |
| int bytelength = bb.getInt(); |
| byte[] anno = new byte[bytelength]; |
| bb.get(anno, 0, bytelength); |
| String annoStr = new String(anno, StandardCharsets.UTF_8); |
| this.addMarker(charOffset, annoStr); |
| } |
| |
| b = null; |
| }; |
| |
| // b wasn't used yet |
| if (markerSpans.start() <= maxEndPos) { |
| |
| // Set new marker |
| // Only the first payload is relevant |
| b = markerSpans.getPayload().iterator().next(); |
| bb.rewind(); |
| bb.put(b); |
| bb.rewind(); |
| |
| pagenumber = bb.getInt(); |
| charOffset = bb.getInt(); |
| |
| // This marker is a pagebreak |
| if (pagenumber != 0) { |
| if (DEBUG) |
| log.debug("Add pagebreak to list: {}-{}", charOffset, pagenumber); |
| |
| // This is the first pagebreak! |
| pagebreaks.add(new int[]{charOffset, pagenumber}); |
| |
| if (start >= minStartPos) { |
| |
| |
| if (DEBUG) |
| log.debug("Add pagebreak to rendering: {}-{}", |
| charOffset, |
| pagenumber); |
| this.addPagebreak(charOffset, pagenumber); |
| }; |
| } |
| |
| // This marker is no pagebreak |
| else { |
| int bytelength = bb.getInt(); |
| |
| byte[] anno = new byte[bytelength]; |
| bb.get(anno); |
| String annoStr = new String(anno, StandardCharsets.UTF_8); |
| this.addMarker(charOffset, annoStr); |
| } |
| |
| b = null; |
| } |
| |
| // Pagebreak beyond the current position |
| else { |
| break; |
| }; |
| }; |
| }; |
| |
| // That's identical to the above approach and should only occur once |
| if (b != null) { |
| bb.rewind(); |
| bb.put(b); |
| bb.rewind(); |
| |
| pagenumber = bb.getInt(); |
| charOffset = bb.getInt(); |
| |
| // This marker is a pagebreak |
| if (pagenumber != 0) { |
| |
| if (DEBUG) |
| log.debug("Add pagebreak to list: {}-{}", charOffset, pagenumber); |
| |
| // This is a remembered pagebreak! |
| pagebreaks.add(new int[]{charOffset, pagenumber}); |
| |
| if (start >= minStartPos) { |
| |
| if (DEBUG) |
| log.debug("Add pagebreak to rendering: {}-{}", |
| charOffset, |
| pagenumber); |
| this.addPagebreak(charOffset, pagenumber); |
| }; |
| } |
| // This marker is no pagebreak |
| else { |
| int bytelength = bb.getInt(); |
| |
| byte[] anno = new byte[bytelength]; |
| bb.get(anno); |
| String annoStr = new String(anno, StandardCharsets.UTF_8); |
| this.addMarker(charOffset, annoStr); |
| } |
| |
| b = null; |
| }; |
| } |
| catch (Exception e) { |
| log.warn("Some problems with ByteBuffer: {}", e.getMessage()); |
| }; |
| |
| // For references calculate the page for the match |
| if (pagebreaks.size() > 0) { |
| int i = 0; |
| for (; i < pagebreaks.size(); i++) { |
| if (pagebreaks.get(i)[0] <= this.getStartPos()) { |
| this.startPage = pagebreaks.get(i)[1]; |
| } else { |
| // i++; |
| break; |
| }; |
| }; |
| for (; i < pagebreaks.size(); i++) { |
| if (pagebreaks.get(i)[0] < this.getEndPos()) { |
| this.endPage = pagebreaks.get(i)[1]; |
| } else { |
| break; |
| }; |
| }; |
| }; |
| |
| return pagebreaks; |
| }; |
| |
| // Expand the context to a span |
| public void expandContextToSpan (String element) { |
| |
| // TODO: THE BITS HAVE TO BE SET! |
| |
| int[] spanContext = new int[] { 0, 0, 0, 0 }; |
| |
| if (this.positionsToOffset != null) { |
| spanContext = this.expandContextToSpan( |
| this.positionsToOffset.getLeafReader(), (Bits) null, |
| "tokens", element); |
| } |
| |
| if (spanContext[0] >= 0 |
| && spanContext[0] < spanContext[1]) { |
| |
| int maxExpansionSize = KrillProperties.maxTokenMatchSize; |
| if (KrillProperties.matchExpansionIncludeContextSize) { |
| maxExpansionSize += KrillProperties.maxTokenContextSize; |
| } |
| |
| // Match needs to be cutted! |
| boolean cutExpansion = false; |
| if ((spanContext[1] - spanContext[0]) > maxExpansionSize) { |
| cutExpansion=true; |
| int contextLength = maxExpansionSize - this.getLength(); |
| int halfContext = contextLength / 2; |
| |
| // This is the extended context calculated |
| int realLeftLength = this.getStartPos() - spanContext[0]; |
| |
| // The length is too large - cut! |
| if (realLeftLength > halfContext) { |
| this.startCutted = true; |
| spanContext[0] = this.getStartPos() - halfContext; |
| } |
| |
| int realRightLength = spanContext[1] - this.getEndPos(); |
| |
| // The length is too large - cut! |
| if (realRightLength > halfContext) { |
| this.endCutted = true; |
| spanContext[1] = this.getEndPos() + halfContext; |
| } |
| } |
| |
| this.setStartPos(maxExpansionSize,spanContext[0]); |
| this.setEndPos(maxExpansionSize,spanContext[1]); |
| // EM: update char offsets |
| |
| if (cutExpansion) { |
| this.positionsToOffset.add(localDocID, startPos); |
| this.positionsToOffset.add(localDocID, endPos); |
| |
| int start = this.positionsToOffset.start(localDocID, startPos); |
| int end = this.positionsToOffset.start(localDocID, endPos)-1; |
| spanContext[2] = start; //spanContext[2]; |
| spanContext[3] = end; // spanContext[3]; |
| } |
| |
| this.potentialStartPosChar = spanContext[2]; |
| this.potentialEndPosChar = spanContext[3]; |
| this.startMore = false; |
| this.endMore = false; |
| |
| this.positionsToOffset.clear(); |
| } |
| else { |
| this.addWarning(651, "Unable to extend context"); |
| }; |
| }; |
| |
| |
| |
| // Expand the context to a span |
| // THIS IS NOT VERY CLEVER - MAKE IT MORE CLEVER! |
| public int[] expandContextToSpan (LeafReaderContext atomic, Bits bitset, |
| String field, String element) { |
| |
| try { |
| // Store character offsets in ByteBuffer |
| ByteBuffer bb = ByteBuffer.allocate(24); |
| |
| SpanElementQuery cquery = new SpanElementQuery(field, element); |
| |
| Spans contextSpans = cquery.getSpans(atomic, bitset, |
| new HashMap<Term, TermContext>()); |
| |
| int newStart = -1, newEnd = -1; |
| int newStartChar = -1, newEndChar = -1; |
| |
| if (DEBUG) |
| log.trace( |
| "Extend match to context boundary with {} in docID {}", |
| cquery.toString(), this.localDocID); |
| |
| while (true) { |
| |
| // Game over |
| if (contextSpans.next() != true) |
| break; |
| |
| if (contextSpans.doc() != this.localDocID) { |
| contextSpans.skipTo(this.localDocID); |
| if (contextSpans.doc() != this.localDocID) |
| break; |
| }; |
| |
| // There's a <context> found -- I'm curious, |
| // if it's closer to the match than everything before |
| if (contextSpans.start() <= this.getStartPos() |
| && contextSpans.end() >= this.getStartPos()) { |
| |
| // Set as newStart |
| newStart = contextSpans.start() > newStart |
| ? contextSpans.start() : newStart; |
| |
| if (DEBUG) |
| log.trace("NewStart is at {}", newStart); |
| |
| // Get character offset (start) |
| if (contextSpans.isPayloadAvailable()) { |
| try { |
| bb.rewind(); |
| for (byte[] b : contextSpans.getPayload()) { |
| |
| // Not an element span |
| if (b[0] != (byte) 64) |
| continue; |
| |
| bb.rewind(); |
| bb.put(b); |
| bb.position(1); |
| newStartChar = bb.getInt(); |
| newEndChar = bb.getInt(); |
| break; |
| }; |
| } |
| catch (Exception e) { |
| log.warn("Some problems with ByteBuffer: {}", |
| e.getMessage()); |
| }; |
| }; |
| } |
| else { |
| // Has to be resettet to avoid multiple readings of the payload |
| newEndChar = 0; |
| }; |
| |
| // There's an s found, that ends after the match |
| if (contextSpans.end() >= this.getEndPos()) { |
| newEnd = contextSpans.end(); |
| |
| // Get character offset (end) |
| if (newEndChar == 0 && contextSpans.isPayloadAvailable()) { |
| try { |
| bb.rewind(); |
| for (byte[] b : contextSpans.getPayload()) { |
| |
| // Not an element span |
| if (b[0] != (byte) 64) |
| continue; |
| |
| bb.rewind(); |
| bb.put(b); |
| bb.position(1); |
| newEndChar = bb.getInt(1); |
| break; |
| }; |
| } |
| catch (Exception e) { |
| log.warn(e.getMessage()); |
| }; |
| }; |
| break; |
| }; |
| }; |
| |
| // We have a new match surrounding |
| if (DEBUG) |
| log.trace("New match spans from {}-{}/{}-{}", newStart, newEnd, |
| newStartChar, newEndChar); |
| |
| return new int[] { newStart, newEnd, newStartChar, newEndChar }; |
| } |
| catch (IOException e) { |
| log.error(e.getMessage()); |
| }; |
| |
| return new int[] { -1, -1, -1, -1 }; |
| }; |
| |
| |
| // Reset all internal data |
| private void _reset () { |
| this.processed = false; |
| this.snippetHTML = null; |
| this.snippetBrackets = null; |
| this.snippetTokens = null; |
| this.identifier = null; |
| |
| // Delete all spans |
| if (this.span != null) |
| this.span.clear(); |
| }; |
| |
| |
| // Start building highlighted snippets |
| private boolean _processHighlight () { |
| |
| if (processed) |
| return true; |
| |
| // Relevant details are missing |
| if (this.positionsToOffset == null || this.localDocID == -1) { |
| if (DEBUG) { |
| log.warn("You have to define " |
| + "positionsToOffset and localDocID first before"); |
| } |
| return false; |
| }; |
| |
| if (DEBUG) |
| log.trace("--- Start highlight processing ..."); |
| |
| // Get pto object |
| PositionsToOffset pto = this.positionsToOffset; |
| pto.add(this.localDocID, this.getStartPos()); |
| pto.add(this.localDocID, this.getEndPos() - 1); |
| |
| if (DEBUG) |
| log.trace("PTO will retrieve {} & {} (Match boundary)", |
| this.getStartPos(), this.getEndPos()); |
| |
| // Set inner match |
| if (this.innerMatchEndPos != 1) |
| this.addHighlight(this.innerMatchStartPos, this.innerMatchEndPos, |
| -1); |
| |
| // Add all highlights for character retrieval |
| if (this.highlight != null) { |
| for (Highlight hl : this.highlight) { |
| if (hl.start >= this.getStartPos() |
| && hl.end <= this.getEndPos()) { |
| |
| // Highlight is no pagebreak |
| if (hl.end != PB_MARKER && hl.end != ALL_MARKER) { |
| pto.add(this.localDocID, hl.start); |
| pto.add(this.localDocID, hl.end); |
| |
| if (DEBUG) |
| log.trace( |
| "PTO will retrieve offsets from token {} & {} (Highlight boundary)", |
| hl.start, hl.end); |
| |
| } |
| |
| else if (DEBUG) { |
| log.trace("Highlight is a pagebreak or marker - do not retrieve PTO"); |
| }; |
| }; |
| }; |
| }; |
| |
| // Get the list of spans for matches and highlighting |
| if (this.span == null || this.span.size() == 0) { |
| if (!this._processHighlightSpans()) |
| return false; |
| }; |
| |
| // Create a stack for highlighted elements |
| // (opening and closing elements) |
| ArrayList<int[]> stack = this._processHighlightStack(); |
| |
| if (DEBUG) |
| log.trace("The snippet is {}", this.tempSnippet); |
| |
| // The temporary snippet is empty, nothing to do |
| if (this.tempSnippet == null) { |
| processed = true; |
| return false; |
| }; |
| |
| // Merge the element stack with the primary textual data |
| this._processHighlightSnippet(this.tempSnippet, stack); |
| |
| // Match is processed - done |
| return (processed = true); |
| }; |
| |
| |
| /* |
| Comparator class for opening tags |
| */ |
| private class OpeningTagComparator implements Comparator<int[]> { |
| @Override |
| public int compare (int[] arg0, int[] arg1) { |
| // Check start positions |
| if (arg0[0] > arg1[0]) { |
| return 1; |
| } |
| |
| else if (arg0[0] == arg1[0]) { |
| |
| int end0 = arg0[1]; |
| int end1 = arg1[1]; |
| |
| if (arg0[1] == PB_MARKER || arg0[1] == ALL_MARKER) { |
| end0 = arg0[0]; |
| }; |
| if (arg1[1] == PB_MARKER || arg1[1] == ALL_MARKER) { |
| end1 = arg1[0]; |
| }; |
| |
| // Check endpositions |
| if (end0 > end1) { |
| return -1; |
| } |
| else if (end0 == end1) { |
| |
| // Compare class number |
| if (arg0[2] > arg1[2]) |
| return 1; |
| else if (arg0[2] < arg1[2]) |
| return -1; |
| return 0; |
| |
| } |
| return 1; |
| }; |
| return -1; |
| }; |
| }; |
| |
| /* |
| * Comparator class for closing tags |
| */ |
| private class ClosingTagComparator implements Comparator<int[]> { |
| @Override |
| public int compare (int[] arg0, int[] arg1) { |
| |
| int end0 = arg0[1]; |
| int end1 = arg1[1]; |
| |
| if (arg0[1] == PB_MARKER || arg0[1] == ALL_MARKER) { |
| end0 = arg0[0]; |
| }; |
| |
| if (arg1[1] == PB_MARKER || arg1[1] == ALL_MARKER) { |
| end1 = arg1[0]; |
| }; |
| |
| // Check end positions |
| if (end0 > end1) { |
| return 1; |
| } |
| else if (end0 == end1) { |
| |
| // Check start positions |
| if (arg0[0] < arg1[0]) { |
| return 1; |
| } |
| else if (arg0[0] == arg1[0]) { |
| return 0; |
| }; |
| return -1; |
| }; |
| return -1; |
| }; |
| }; |
| |
| |
| /* |
| * This takes a clean string and the tag stack |
| * to decorate the string with annotations. |
| */ |
| private void _processHighlightSnippet (String clean, |
| ArrayList<int[]> stack) { |
| |
| if (DEBUG) { |
| log.trace("--- Process Highlight snippet"); |
| log.trace("--- Snippet: {}", clean); |
| }; |
| |
| int pos = 0, oldPos = 0; |
| boolean exceeded = false; |
| |
| this.snippetArray = new HighlightCombinator(); |
| |
| // The snippetArray can have preceeding and following pagebreaks |
| // and markers that need to be removed |
| |
| |
| // Iterate over all elements of the stack |
| for (int[] element : stack) { |
| |
| // The position is the start position for opening and |
| // empty/marker elements and the end position for closing elements |
| pos = element[3] != 0 ? element[0] : element[1]; |
| |
| if (DEBUG) { |
| log.trace("Check tag at position {} (was {}) [{},{},{},{}]", |
| pos, |
| oldPos, |
| element[0], |
| element[1], |
| element[2], |
| element[3]); |
| }; |
| |
| // The new position is behind the old position |
| if (pos > oldPos) { |
| |
| // The position is behind the string length, |
| // which may end when an element ends beyond |
| if (pos > clean.length()) { |
| |
| // Reposition to the end |
| pos = clean.length(); |
| |
| if (DEBUG) |
| log.trace("Position exceeds string, now {}", pos); |
| |
| exceeded = true; |
| }; |
| |
| // Add partial string |
| if (pos > 0 && pos > oldPos) { |
| if (DEBUG) |
| log.trace("Add string {}", codePointSubstring(clean, oldPos, pos)); |
| snippetArray.addString(codePointSubstring(clean, oldPos, pos)); |
| }; |
| |
| // Remember the new position |
| oldPos = pos; |
| }; |
| |
| // close tag |
| if (element[3] == 0) { |
| |
| if (DEBUG) |
| log.trace("Add closer: {}", element[2]); |
| |
| // Add close |
| snippetArray.addClose(element[2]); |
| } |
| |
| // empty tag (pagebreak) |
| else if (!exceeded && element[3] == 2) { |
| |
| // Add Empty (pagebreak) |
| snippetArray.addEmpty(element[2]); |
| } |
| |
| // empty tag (marker) |
| else if (!exceeded && element[3] == 3) { |
| |
| // Add Empty (pagebreak) |
| snippetArray.addMarker(element[2]); |
| } |
| |
| // opening element exceeds primary data |
| else if (exceeded) { |
| break; |
| } |
| |
| // open tag |
| else { |
| snippetArray.addOpen(element[2]); |
| }; |
| }; |
| |
| if (clean.length() > pos && pos >= 0) { |
| snippetArray.addString(codePointSubstring(clean, pos)); |
| if (DEBUG) |
| log.trace("Add rest string {}", codePointSubstring(clean, pos)); |
| }; |
| }; |
| |
| /* |
| * Return the snippet as a list of tokens |
| */ |
| @JsonIgnore |
| public ObjectNode getSnippetTokens () { |
| ObjectNode json = mapper.createObjectNode(); |
| |
| if (!this._processHighlight()) |
| return null; |
| |
| if (this.processed && this.snippetTokens != null) |
| return this.snippetTokens; |
| |
| if (DEBUG) |
| log.trace("--- Process tokens"); |
| |
| if (this.positionsToOffset == null || this.localDocID == -1) |
| return null; |
| |
| PositionsToOffset pto = this.positionsToOffset; |
| int ldid = this.localDocID; |
| |
| int startContext = -1; |
| int endContext = -1; |
| int startContextChar = -1; |
| int endContextChar = -1; |
| |
| int pdl = this.getPrimaryDataLength(); |
| |
| // Get context based on a span definition |
| if (this.getContext().isSpanDefined()) { |
| |
| if (DEBUG) |
| log.debug("Context defined by span"); |
| |
| int[] spanContext = this.expandContextToSpan( |
| this.positionsToOffset.getLeafReader(), (Bits) null, |
| "tokens", this.context.getSpanContext()); |
| startContext = spanContext[0]; |
| endContext = spanContext[1]; |
| startContextChar = spanContext[2]; |
| endContextChar = spanContext[3]; |
| } |
| |
| // The offset is not yet defined - and defined by tokens |
| if (endContext == -1) { |
| |
| if (DEBUG) |
| log.debug("No context defined by span"); |
| |
| if (this.context.left.isToken() && this.context.left.getLength() > 0) { |
| startContext = this.startPos - this.context.left.getLength(); |
| if (startContext < 0) |
| startContext = 0; |
| }; |
| |
| if (this.context.right.isToken() && this.context.right.getLength() > 0) { |
| endContext = this.endPos + this.context.right.getLength() - 1; |
| }; |
| }; |
| |
| if (startContext == -1) { |
| startContext = this.startPos; |
| if (DEBUG) |
| log.debug("Set startContext {}", endContext); |
| }; |
| |
| if (endContext == -1) { |
| endContext = this.endPos - 1; |
| if (DEBUG) |
| log.debug("Set endContext {}", endContext); |
| }; |
| |
| // Retrieve the character offsets for all tokens |
| for (int i = startContext; i < endContext; i++) { |
| pto.add(ldid, i); |
| }; |
| |
| if (startContextChar == -1) |
| startContextChar = pto.start(ldid, startContext); |
| |
| if (endContextChar == -1) |
| endContextChar = pto.end(ldid, endContext); |
| |
| if (DEBUG) |
| log.debug("Match is {}/{} - {}/{}",startContext,startContextChar,endContext,endContextChar); |
| |
| if (endContextChar == -1 || endContextChar == 0 || endContextChar > pdl) { |
| this.tempSnippet = this.getPrimaryData(startContextChar); |
| this.endMore = false; |
| } else { |
| this.tempSnippet = this.getPrimaryData(startContextChar,endContextChar); |
| } |
| |
| if (startContext == 0) { |
| this.startMore = false; |
| } |
| |
| Integer[] offsets; |
| ArrayNode tokens; |
| int i; |
| |
| // Create left context token list |
| if (startContext < this.startPos) { |
| tokens = json.putArray("left"); |
| for (i = startContext; i < this.startPos; i++) { |
| offsets = pto.span(ldid,i); |
| tokens.add( |
| codePointSubstring(this.tempSnippet, |
| offsets[0]- startContextChar, offsets[1] - startContextChar) |
| ); |
| }; |
| }; |
| |
| tokens = json.putArray("match"); |
| for (i = this.startPos; i < this.endPos; i++) { |
| offsets = pto.span(ldid,i); |
| if (offsets == null) { |
| continue; |
| } |
| tokens.add( |
| codePointSubstring(this.tempSnippet, |
| offsets[0]- startContextChar, offsets[1] - startContextChar) |
| ); |
| }; |
| |
| // Create right context token list |
| if (endContext > this.endPos) { |
| tokens = null; |
| for (i = this.endPos; i < endContext; i++) { |
| offsets = pto.span(ldid,i); |
| if (offsets == null) { |
| break; |
| }; |
| |
| if (tokens == null) |
| tokens = json.putArray("right"); |
| |
| tokens.add( |
| codePointSubstring(this.tempSnippet, |
| offsets[0]- startContextChar, offsets[1] - startContextChar) |
| ); |
| }; |
| }; |
| |
| // Add class arrays to JSON |
| if (this.highlight != null) { |
| |
| ArrayNode classes = null; |
| for (Highlight highlight : this.highlight) { |
| |
| if (highlight.number < 0 || highlight.number > 255) |
| continue; |
| |
| // Highlight is a pagebreak |
| if (highlight.end == PB_MARKER || highlight.end == ALL_MARKER) |
| continue; |
| |
| if (classes == null) |
| classes = json.putArray("classes"); |
| |
| ArrayNode cls = mapper.createArrayNode(); |
| cls.add(highlight.number); |
| cls.add(highlight.start - this.startPos); |
| cls.add(highlight.end - this.startPos); |
| classes.add(cls); |
| }; |
| }; |
| |
| return (this.snippetTokens = json); |
| }; |
| |
| |
| @JsonIgnore |
| public String getSnippetHTML () { |
| |
| if (!this._processHighlight()) |
| return null; |
| |
| if (this.processed && this.snippetHTML != null) |
| return this.snippetHTML; |
| |
| if (DEBUG) |
| log.trace("Create HTML Snippet"); |
| |
| StringBuilder sb = new StringBuilder(); |
| StringBuilder rightContext = new StringBuilder(); |
| |
| // Remember ids already defined to |
| // have joined elements |
| HashSet<String> joins = new HashSet<>(100); |
| |
| // Snippet stack sizes |
| short start = (short) 0; |
| short end = this.snippetArray.size(); |
| end--; |
| |
| // Set levels for highlights |
| FixedBitSet level = new FixedBitSet(255); |
| level.set(0, 255); |
| byte[] levelCache = new byte[255]; |
| |
| HighlightCombinatorElement elem; |
| |
| // Create context |
| sb.append("<span class=\"context-left\">"); |
| if (this.startMore) |
| sb.append("<span class=\"more\"></span>"); |
| |
| // Iterate over the snippet array |
| // Start with left context |
| while (end > 0) { |
| |
| // Get element of sorted array |
| elem = this.snippetArray.get(start); |
| |
| // Element is in context - but only markers are allowed! |
| // The problem with other elements is, that they may span the whole range |
| // around the match, so we have overlaps. |
| if (elem.type == 1 || elem.type == 2) |
| break; |
| |
| // Text or marker |
| |
| String elemString = elem.toHTML(this, level, levelCache, joins); |
| sb.append(elemString); |
| |
| if (DEBUG) |
| log.trace("Add node {}", elemString); |
| |
| // Move start position |
| start++; |
| }; |
| |
| // end of context |
| sb.append("</span>"); |
| |
| // Iterate through all the match |
| sb.append("<span class=\"match\">"); |
| |
| if (this.startCutted) { |
| sb.append("<span class=\"cutted\"></span>"); |
| }; |
| |
| for (; start <= end; start++) { |
| elem = this.snippetArray.get(start); |
| |
| if (elem == null) |
| continue; |
| |
| String elemString = elem.toHTML( |
| this, level, levelCache, joins |
| ); |
| if (DEBUG) { |
| log.trace("Add node {}", elemString); |
| }; |
| sb.append(elemString); |
| |
| // The match closes |
| if (elem.type == 2 && elem.number == CONTEXT) { |
| start++; |
| break; |
| }; |
| }; |
| |
| // Warning! TODO: |
| // Check that all elements are closed that are opened at this point |
| // and only inline markers |
| // can follow in the context! |
| |
| if (this.endCutted) { |
| sb.append("<span class=\"cutted\"></span>"); |
| }; |
| |
| |
| sb.append("</span>"); |
| |
| |
| // There is the right context |
| // if (start <= end) { |
| sb.append("<span class=\"context-right\">"); |
| |
| for (; start <= end; start++) { |
| elem = this.snippetArray.get(start); |
| |
| if (elem == null) |
| continue; |
| |
| String elemString = elem.toHTML( |
| this, level, levelCache, joins |
| ); |
| if (DEBUG) { |
| log.trace("Add node {}", elemString); |
| }; |
| sb.append(elemString); |
| }; |
| |
| if (this.endMore) |
| sb.append("<span class=\"more\"></span>"); |
| |
| // End of context |
| sb.append("</span>"); |
| |
| return (this.snippetHTML = sb.toString()); |
| }; |
| |
| |
| @JsonIgnore |
| public String getSnippetBrackets () { |
| |
| if (!this._processHighlight()) |
| return null; |
| |
| if (this.processed && this.snippetBrackets != null) |
| return this.snippetBrackets; |
| |
| // Snippet stack sizes |
| short start = (short) 0; |
| short end = this.snippetArray.size(); |
| end--; |
| |
| StringBuilder sb = new StringBuilder(); |
| |
| if (this.startMore) |
| sb.append("... "); |
| |
| // First element of sorted array |
| HighlightCombinatorElement elem = this.snippetArray.getFirst(); |
| |
| while (end > 0) { |
| |
| // Get element of sorted array |
| elem = this.snippetArray.get(start); |
| |
| if (elem.type == 1 || elem.type == 2) { |
| break; |
| } |
| else { |
| sb.append(elem.toBrackets(this)); |
| start++; |
| }; |
| }; |
| |
| sb.append("["); |
| |
| if (this.startCutted) { |
| sb.append("<!>"); |
| }; |
| |
| |
| for (; start <= end; start++) { |
| elem = this.snippetArray.get(start); |
| |
| if (elem == null) |
| continue; |
| |
| sb.append(elem.toBrackets(this)); |
| |
| // The match closes |
| if (elem.type == 2 && elem.number == CONTEXT) { |
| start++; |
| break; |
| }; |
| }; |
| |
| if (this.endCutted) { |
| sb.append("<!>"); |
| }; |
| sb.append("]"); |
| |
| for (; start <= end; start++) { |
| elem = this.snippetArray.get(start); |
| |
| if (elem != null) |
| sb.append(elem.toBrackets(this)); |
| }; |
| |
| if (this.endMore) |
| sb.append(" ..."); |
| |
| return (this.snippetBrackets = sb.toString()); |
| }; |
| |
| |
| // This sorts all highlight and match spans to make them nesting correctly, |
| // even in case they overlap |
| // TODO: Not very fast - improve! |
| private ArrayList<int[]> _processHighlightStack () { |
| if (DEBUG) |
| log.trace("--- Process Highlight stack"); |
| |
| LinkedList<int[]> openList = new LinkedList<int[]>(); |
| LinkedList<int[]> closeList = new LinkedList<int[]>(); |
| |
| // Filter multiple identifiers, that may be introduced and would |
| // result in invalid xml |
| this._filterMultipleIdentifiers(); |
| |
| // the start and end of the snippet is currently stored in span[0] |
| // this should be trimmed here! |
| |
| // Add highlight spans to balance lists |
| openList.addAll(this.span); |
| closeList.addAll(this.span); |
| |
| // Sort balance lists |
| Collections.sort(openList, new OpeningTagComparator()); |
| Collections.sort(closeList, new ClosingTagComparator()); |
| |
| if (DEBUG) { |
| log.trace("OpenList: {}", openList); |
| log.trace("CloseList: {}", closeList); |
| }; |
| |
| // New stack array |
| ArrayList<int[]> stack = new ArrayList<>(openList.size() * 2); |
| |
| // Create stack unless both lists are empty |
| while (!openList.isEmpty() || !closeList.isEmpty()) { |
| |
| // Nothing more to open -- close all |
| if (openList.isEmpty()) { |
| |
| if (DEBUG) |
| log.debug("No more open tags -- close all non pagebreaks"); |
| |
| int pf = closeList.peekFirst()[1]; |
| |
| if (pf != PB_MARKER && pf != ALL_MARKER) { |
| //closeList.removeFirst(); |
| |
| int[] e = closeList.removeFirst().clone(); |
| |
| if (DEBUG) { |
| log.trace( |
| "Add close with number {} to stack at {}-{} as {}", |
| e[2], e[0], e[1], e[3] |
| ); |
| } |
| stack.add(e); |
| } |
| else { |
| closeList.removeFirst(); |
| |
| if (DEBUG) |
| log.debug("Close is pagebreak -- ignore (1)"); |
| }; |
| |
| continue; |
| } |
| |
| // Not sure about this, but it can happen |
| else if (closeList.isEmpty()) { |
| |
| if (DEBUG) |
| log.debug("Closelist is empty"); |
| |
| int[] e = openList.removeFirst().clone(); |
| |
| if (e[1] == PB_MARKER || e[1] == ALL_MARKER) { |
| |
| if (e[1] == PB_MARKER) { |
| e[3] = 2; |
| } else { |
| e[3] = 3; |
| }; |
| |
| // Mark as empty |
| e[1] = e[0]; // Remove pagebreak marker |
| |
| if (DEBUG) |
| log.trace( |
| "Add pagebreak or marker with {} to stack at {}-{} as {}", |
| e[2], e[0], e[1], e[3] |
| ); |
| |
| // Add empty pagebreak |
| stack.add(e); |
| }; |
| |
| continue; |
| }; |
| |
| int clpf = closeList.peekFirst()[1]; |
| int olpf = openList.peekFirst()[1]; |
| |
| |
| // Closener is pagebreak or marker |
| if (clpf == PB_MARKER || clpf == ALL_MARKER) { |
| |
| if (DEBUG) |
| log.debug("Close is pagebreak or a marker -- remove (2)"); |
| |
| // Remove closing pagebreak |
| closeList.removeFirst(); |
| } |
| |
| // Opener is pagebreak or marker |
| else if ((olpf == PB_MARKER || olpf == ALL_MARKER) && closeList.peekFirst()[1] >= openList.peekFirst()[0]) { |
| |
| int[] e = openList.removeFirst().clone(); |
| |
| // Mark as empty |
| e[1] = e[0]; // Remove pagebreak marker |
| |
| if (olpf == PB_MARKER) { |
| e[3] = 2; |
| } else { |
| e[3] = 3; |
| }; |
| |
| if (DEBUG) |
| log.trace( |
| "Add pagebreak or marker with {} to stack at {}-{} as {}", |
| e[2], e[0], e[1], e[3] |
| ); |
| |
| |
| // Add empty pagebreak |
| stack.add(e); |
| } |
| |
| // check if the opener is smaller than the closener |
| else if (openList.peekFirst()[0] < closeList.peekFirst()[1]) { |
| |
| if (DEBUG) |
| log.debug("Open tag starts before close tag ends"); |
| |
| int[] e = openList.removeFirst().clone(); |
| |
| // Mark as opener |
| e[3] = 1; |
| |
| if (DEBUG) { |
| |
| // -1: match |
| // < -1: relation target |
| // -99998: context |
| // >= 2048: relation source |
| // >= 256: annotation |
| |
| log.trace( |
| "Add open with number {} to stack at {}-{} as {}", |
| e[2], e[0], e[1], e[3] |
| ); |
| }; |
| |
| // Add opener to stack |
| stack.add(e); |
| } |
| |
| else { |
| int[] e = closeList.removeFirst(); |
| |
| if (DEBUG) { |
| log.debug("Close ends before next opens or at the same position"); |
| |
| log.trace( |
| "Add close with number {} to stack at {}-{}", |
| e[2], e[0], e[1] |
| ); |
| }; |
| |
| // Add closener to stack |
| stack.add(e); |
| }; |
| }; |
| return stack; |
| }; |
| |
| |
| /** |
| * Sometimes the match start and end positions are inside the |
| * matching region, e.g. when the match was expanded. |
| * This will override the original matching positions |
| * And mark the real matching. |
| */ |
| public void overrideMatchPosition (int start, int end) { |
| if (DEBUG) |
| log.trace("--- Override match position"); |
| |
| this.innerMatchStartPos = start; |
| this.innerMatchEndPos = end; |
| }; |
| |
| |
| /** |
| * This will retrieve character offsets for all spans. |
| * This includes pagebreaks and markers. |
| */ |
| private boolean _processHighlightSpans () { |
| |
| if (DEBUG) |
| log.trace("--- Process Highlight spans"); |
| |
| // Local document ID |
| int ldid = this.localDocID; |
| |
| int startPosChar = -1, endPosChar = -1; |
| |
| // No positionsToOffset object found |
| if (this.positionsToOffset == null) |
| return false; |
| |
| // Match position |
| startPosChar = this.positionsToOffset.start(ldid, this.startPos); |
| |
| if (DEBUG) |
| log.trace("Unaltered startPosChar is {}", startPosChar); |
| |
| // Check potential differing start characters |
| // e.g. from element spans |
| if (potentialStartPosChar != -1 |
| && (startPosChar > this.potentialStartPosChar)) |
| startPosChar = this.potentialStartPosChar; |
| |
| endPosChar = this.positionsToOffset.end(ldid, this.endPos - 1); |
| |
| if (DEBUG) |
| log.trace("Unaltered endPosChar is {}", endPosChar); |
| |
| // Potential end characters may come from spans with |
| // defined character offsets like sentences including .", ... etc. |
| if (endPosChar < potentialEndPosChar) |
| endPosChar = potentialEndPosChar; |
| |
| if (DEBUG) |
| log.trace("Refined: Match offset is pos {}-{} (chars {}-{})", |
| this.startPos, this.endPos, startPosChar, endPosChar); |
| |
| this.identifier = null; |
| |
| // No spans yet |
| if (this.span == null) |
| this.span = new LinkedList<int[]>(); |
| |
| // Process offset char findings |
| int[] intArray = this._processOffsetChars(ldid, startPosChar, |
| endPosChar); |
| |
| // Recalculate startOffsetChar |
| int startOffsetChar = startPosChar - intArray[0]; |
| int endRelOffsetChar = intArray[1]; |
| |
| // Add match span, in case no inner match is defined |
| if (this.innerMatchEndPos == -1) { |
| if (DEBUG) |
| log.debug("Added array to match span with {} (1)", intArray); |
| this.span.add(intArray); |
| }; |
| |
| // Add context highlight |
| intArray = new int[]{intArray[0], intArray[1], CONTEXT, 0}; |
| |
| this.span.add(intArray); |
| |
| if (DEBUG) |
| log.debug("Added array to context span with {} (1)", intArray); |
| |
| |
| // All spans starting before startOffsetChar and end before |
| // endOffsetChar can be dismissed, as they are not part of tempSnippet |
| // This can actually be seen based on the first element of this.span |
| // at the moment. |
| |
| // highlights |
| // -- I'm not sure about this. |
| if (this.highlight != null) { |
| if (DEBUG) |
| log.trace("There are highlights!"); |
| |
| for (Highlight highlight : this.highlight) { |
| if (DEBUG && (highlight.start > highlight.end)) { |
| log.warn("Start position is before end position {} - {}!", |
| highlight.start, |
| highlight.end); |
| }; |
| |
| int start = -1; |
| int end = -1; |
| |
| // Highlight is a pagebreak |
| if (highlight.end != PB_MARKER && highlight.end != ALL_MARKER) { |
| start = this.positionsToOffset.start(ldid, highlight.start); |
| end = this.positionsToOffset.end(ldid, highlight.end); |
| } |
| else { |
| |
| if (DEBUG) |
| log.trace("Highlight is pagebreak -- do not retrieve offset"); |
| |
| // In pagebreak highlights |
| // there is already a character |
| start = highlight.start; |
| end = highlight.end; |
| }; |
| |
| start -= startOffsetChar; |
| |
| // Keep end equal -1 |
| if (end != PB_MARKER && end != ALL_MARKER) { |
| if (DEBUG) |
| log.trace("PTO whas retrieved {}-{} for class {}", start, |
| end, highlight.number); |
| end -= startOffsetChar; |
| |
| // Cut longer spans (e.g. from relation references) |
| if (end > endRelOffsetChar) { |
| end = endRelOffsetChar; |
| }; |
| } |
| else if (DEBUG) { |
| log.debug("Pagebreak keeps end position"); |
| }; |
| |
| if (start < 0 || |
| ((end < 0 | start > endRelOffsetChar) && end != PB_MARKER && end != ALL_MARKER)) { |
| continue; |
| }; |
| |
| if (DEBUG && (start > endRelOffsetChar)) |
| log.debug("Ignore marker {}/{}/{}/{}", start, end, highlight.number, endRelOffsetChar); |
| |
| |
| // Create intArray for highlight |
| intArray = new int[] { |
| start, |
| end, |
| highlight.number, |
| 0 // Dummy value for later use |
| }; |
| |
| if (DEBUG) |
| log.debug("Added array to span with {} (2)", intArray); |
| |
| this.span.add(intArray); |
| }; |
| }; |
| return true; |
| }; |
| |
| |
| // Pass the local docid to retrieve character positions for the offset |
| private int[] _processOffsetChars (int ldid, int startPosChar, |
| int endPosChar) { |
| |
| int startOffsetChar = -1, endOffsetChar = -1; |
| int startOffset = -1, endOffset = -1; |
| |
| // The offset is defined by a span |
| if (this.getContext().isSpanDefined()) { |
| |
| if (DEBUG) |
| log.trace("Try to expand to <{}>", |
| this.context.getSpanContext()); |
| |
| this.startMore = false; |
| this.endMore = false; |
| |
| int[] spanContext = this.expandContextToSpan( |
| this.positionsToOffset.getLeafReader(), (Bits) null, |
| "tokens", this.context.getSpanContext()); |
| startOffset = spanContext[0]; |
| endOffset = spanContext[1]; |
| startOffsetChar = spanContext[2]; |
| endOffsetChar = spanContext[3]; |
| if (DEBUG) |
| log.trace("Got context based on span {}-{}/{}-{}", |
| startOffset, endOffset, startOffsetChar, endOffsetChar); |
| }; |
| |
| // The offset is defined by tokens or characters |
| if (endOffset == -1) { |
| |
| PositionsToOffset pto = this.positionsToOffset; |
| |
| // The left offset is defined by tokens |
| if (this.context.left.isToken()) { |
| startOffset = this.startPos - this.context.left.getLength(); |
| if (DEBUG) |
| log.trace("PTO will retrieve {} (Left context)", |
| startOffset); |
| pto.add(ldid, startOffset); |
| } |
| |
| // The left offset is defined by characters |
| else { |
| startOffsetChar = startPosChar - this.context.left.getLength(); |
| }; |
| |
| // The right context is defined by tokens |
| if (this.context.right.isToken()) { |
| endOffset = this.endPos + this.context.right.getLength() - 1; |
| if (DEBUG) |
| log.trace("PTO will retrieve {} (Right context)", |
| endOffset); |
| pto.add(ldid, endOffset); |
| } |
| |
| // The right context is defined by characters |
| else { |
| endOffsetChar = (endPosChar == -1) ? -1 |
| : endPosChar + this.context.right.getLength(); |
| }; |
| |
| if (startOffset != -1) |
| startOffsetChar = pto.start(ldid, startOffset); |
| |
| if (endOffset != -1) |
| endOffsetChar = pto.end(ldid, endOffset); |
| }; |
| |
| if (DEBUG) |
| log.trace("Premature found offsets at {}-{}", startOffsetChar, |
| endOffsetChar); |
| |
| |
| // This can happen in case of non-token characters |
| // in the match and null offsets |
| if (startOffsetChar > startPosChar) |
| startOffsetChar = startPosChar; |
| else if (startOffsetChar < 0) |
| startOffsetChar = 0; |
| |
| // No "..." at the beginning |
| if (startOffsetChar == 0) |
| this.startMore = false; |
| |
| if (endOffsetChar != -1 && endOffsetChar < endPosChar) |
| endOffsetChar = endPosChar; |
| |
| if (DEBUG) |
| log.trace("The context spans from chars {}-{}", startOffsetChar, |
| endOffsetChar); |
| |
| // Get snippet information from the primary data |
| if (endOffsetChar > -1 |
| && (endOffsetChar < this.getPrimaryDataLength())) { |
| this.tempSnippet = this.getPrimaryData(startOffsetChar, |
| endOffsetChar); |
| } |
| else { |
| this.tempSnippet = this.getPrimaryData(startOffsetChar); |
| this.endMore = false; |
| }; |
| |
| if (DEBUG) |
| log.trace("Snippet: '{}'", this.tempSnippet); |
| |
| if (DEBUG) |
| log.trace( |
| "The match entry is {}-{} ({}-{}) with absolute offsetChars {}-{}", |
| startPosChar - startOffsetChar, |
| endPosChar - startOffsetChar, startPosChar, endPosChar, |
| startOffsetChar, endOffsetChar); |
| |
| // TODO: Simplify |
| return new int[] { startPosChar - startOffsetChar, |
| endPosChar - startOffsetChar, -1, 0 }; |
| }; |
| |
| |
| // Identical to Result! |
| public JsonNode toJsonNode () { |
| ObjectNode json = (ObjectNode) super.toJsonNode(); |
| |
| if (this.context != null) |
| json.set("context", this.getContext().toJsonNode()); |
| |
| if (this.version != null) |
| json.put("version", this.getVersion()); |
| |
| if (this.startPage != -1) { |
| ArrayNode pages = mapper.createArrayNode(); |
| pages.add(this.startPage); |
| if (this.endPage != -1 && this.endPage != this.startPage) |
| pages.add(this.endPage); |
| |
| json.set("pages", pages); |
| }; |
| |
| if (this.hasSnippet) |
| json.put("snippet", this.getSnippetHTML()); |
| |
| if (this.hasTokens) |
| json.set("tokens", this.getSnippetTokens()); |
| |
| ArrayNode fields = json.putArray("fields"); |
| |
| // Iterate over all fields |
| Iterator<MetaField> fIter = mFields.iterator(); |
| while (fIter.hasNext()) { |
| MetaField mf = fIter.next(); |
| fields.add(mf.toJsonNode()); |
| |
| // Legacy flat field support |
| String mfs = mf.key; |
| String value = this.getFieldValue(mfs); |
| if (value != null && !json.has(mfs)) |
| json.set(mfs, new TextNode(value)); |
| }; |
| |
| this.addMessage(0, "Support for flat field values is deprecated"); |
| |
| return json; |
| }; |
| |
| |
| public String toJsonString () { |
| JsonNode json = (JsonNode) this.toJsonNode(); |
| |
| // Match was no match |
| if (json.size() == 0) |
| return "{}"; |
| try { |
| return mapper.writeValueAsString(json); |
| } |
| catch (Exception e) { |
| log.warn(e.getLocalizedMessage()); |
| }; |
| |
| return "{}"; |
| }; |
| |
| |
| // Return match as token list |
| // TODO: This will be retrieved in case "tokenList" is |
| // requested in "fields" |
| public ObjectNode toTokenList () { |
| ObjectNode json = mapper.createObjectNode(); |
| |
| if (this.getDocID() != null) |
| json.put("textSigle", this.getDocID()); |
| else if (this.getTextSigle() != null) |
| json.put("textSigle", this.getTextSigle()); |
| |
| ArrayNode tokens = json.putArray("tokens"); |
| |
| // Get pto object |
| PositionsToOffset pto = this.positionsToOffset; |
| |
| // Add for position retrieval |
| for (int i = this.getStartPos(); i < this.getEndPos(); i++) { |
| pto.add(this.localDocID, i); |
| }; |
| |
| // Retrieve positions |
| for (int i = this.getStartPos(); i < this.getEndPos(); i++) { |
| ArrayNode token = tokens.addArray(); |
| for (int offset : pto.span(this.localDocID, i)) { |
| token.add(offset); |
| }; |
| }; |
| |
| return json; |
| }; |
| |
| |
| // Remove duplicate identifiers |
| // Yeah ... I mean ... why not? |
| private void _filterMultipleIdentifiers () { |
| ArrayList<Integer> removeDuplicate = new ArrayList<>(10); |
| HashSet<String> identifiers = new HashSet<>(20); |
| for (int i = 0; i < this.span.size(); i++) { |
| |
| // span is an int array: [Start, End, Number, Dummy] |
| int highlightNumber = this.span.get(i)[2]; |
| |
| // Number is an identifier |
| if (highlightNumber < -1) { |
| |
| // Get the real identifier |
| String idNumber = |
| identifierNumber.get(highlightNumber); |
| if (identifiers.contains(idNumber)) { |
| removeDuplicate.add(i); |
| } |
| else { |
| identifiers.add(idNumber); |
| }; |
| }; |
| }; |
| |
| // Order the duplicates to filter from the tail |
| Collections.sort(removeDuplicate); |
| Collections.reverse(removeDuplicate); |
| |
| // Delete all duplicate identifiers |
| for (int delete : removeDuplicate) { |
| this.span.remove(delete); |
| }; |
| }; |
| |
| |
| /* |
| * Get identifier based on class number |
| */ |
| @JsonIgnore |
| public String getClassID (int nr) { |
| return this.identifierNumber.get(nr); |
| }; |
| |
| |
| /* |
| * Get annotation based on id |
| */ |
| @JsonIgnore |
| public String getAnnotationID (int nr) { |
| return this.annotationNumber.get(nr); |
| }; |
| |
| |
| /* |
| * Get relation based on id |
| */ |
| @JsonIgnore |
| public Relation getRelationID (int nr) { |
| return this.relationNumber.get(nr); |
| }; |
| }; |