Refactor snippet generation and fix inline markers in contexts
Change-Id: Iff81bde2b7126e5efb9d664dcb28f65415ee122e
diff --git a/Changes b/Changes
index e0b9400..c742450 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,10 @@
+0.63.0 2024-06-21
+ - [bugfix] Show all inline markers and pagebreaks at match borders (diewald).
+ - [feature] Show inline markers and pagebreaks in contexts (diewald).
+ - [bugfix] Prevent matches in contexts (diewald).
+ - [bugfix] Prevent showing empty elements and opening tags past primary data
+ (diewald).
+
0.62.6 2024-06-13
- [feature] Make match expansion configurable (close #150, margaretha)
diff --git a/pom.xml b/pom.xml
index 503a086..579c1d4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids-mannheim.korap.krill</groupId>
<artifactId>Krill</artifactId>
- <version>0.62.6</version>
+ <version>0.63.0</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index fa18740..76be86e 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -227,12 +227,13 @@
this.setStartPos(maxTokenMatchSize, id.getStartPos());
this.setEndPos(maxTokenMatchSize, id.getEndPos());
- if (includeHighlights)
+ if (includeHighlights) {
for (int[] pos : id.getPos()) {
if (pos[0] < id.getStartPos() || pos[1] > id.getEndPos())
continue;
this.addHighlight(pos[0], pos[1], pos[2]);
};
+ };
};
};
@@ -890,6 +891,9 @@
int charOffset = 0, pagenumber = 0, start = 0;
+ int minStartPos = this.getStartPos() - KrillProperties.maxTokenContextSize;
+ int maxEndPos = this.getEndPos() + KrillProperties.maxTokenContextSize;
+
if (DEBUG) {
log.debug("=================================");
log.debug("Retrieve markers between {}-{}",
@@ -914,17 +918,17 @@
atomic, bitset, new HashMap<Term, TermContext>()
);
- // Iterate over all pagebreaks
+ // Iterate over all markers
while (markerSpans.next() == true) {
if (DEBUG) {
- log.debug("There is a pagebreak at {}/{} and we are at {}",
+ log.debug("There is a marker at {}/{} and we are at {}",
markerSpans.doc(),
markerSpans.start(),
this.localDocID);
};
- // Current pagebreak is not in the correct document
+ // Current marker is not in the correct document
if (markerSpans.doc() != this.localDocID) {
if (markerSpans.doc() < this.localDocID) {
markerSpans.skipTo(this.localDocID);
@@ -944,8 +948,8 @@
// There is a marker found - check,
// if it is in the correct area
- if (markerSpans.start() < this.getStartPos()) {
-
+ if (markerSpans.start() < minStartPos) {
+
// Only the first payload is relevant
b = markerSpans.getPayload().iterator().next();
start = markerSpans.start();
@@ -957,7 +961,7 @@
}
- // This is the first pagebreak inside the match!
+ // This captures all markers starting in the potential (i.e. maximum) context of the match
else {
// b is already defined!
@@ -969,25 +973,26 @@
pagenumber = bb.getInt();
charOffset = bb.getInt();
-
+
// This marker is a pagebreak
if (pagenumber != 0) {
if (DEBUG)
log.debug("Add pagebreak to list: {}-{}", charOffset, pagenumber);
- // This is the first pagebreak!
+ // Add all pagebreaks for later counting
pagebreaks.add(new int[]{charOffset, pagenumber});
- if (start >= this.getStartPos()) {
+ if (start >= minStartPos) {
if (DEBUG)
log.debug("Add marker to rendering: {}-{}",
charOffset,
pagenumber);
this.addPagebreak(charOffset, pagenumber);
};
-
- // This marker is no pagebreak
- } else {
+ }
+
+ // This marker is no pagebreak
+ else {
int bytelength = bb.getInt();
byte[] anno = new byte[bytelength];
bb.get(anno, 0, bytelength);
@@ -996,12 +1001,12 @@
}
b = null;
- }
+ };
// b wasn't used yet
- if (markerSpans.start() <= this.getEndPos()) {
+ if (markerSpans.start() <= maxEndPos) {
- // Set new pagebreak
+ // Set new marker
// Only the first payload is relevant
b = markerSpans.getPayload().iterator().next();
bb.rewind();
@@ -1010,7 +1015,7 @@
pagenumber = bb.getInt();
charOffset = bb.getInt();
-
+
// This marker is a pagebreak
if (pagenumber != 0) {
if (DEBUG)
@@ -1019,17 +1024,18 @@
// This is the first pagebreak!
pagebreaks.add(new int[]{charOffset, pagenumber});
- if (start >= this.getStartPos()) {
+ if (start >= minStartPos) {
+
+
if (DEBUG)
log.debug("Add pagebreak to rendering: {}-{}",
charOffset,
pagenumber);
this.addPagebreak(charOffset, pagenumber);
};
-
-
}
- // This marker is no pagebreak
+
+ // This marker is no pagebreak
else {
int bytelength = bb.getInt();
@@ -1067,7 +1073,7 @@
// This is a remembered pagebreak!
pagebreaks.add(new int[]{charOffset, pagenumber});
- if (start >= this.getStartPos()) {
+ if (start >= minStartPos) {
if (DEBUG)
log.debug("Add pagebreak to rendering: {}-{}",
@@ -1093,11 +1099,25 @@
log.warn("Some problems with ByteBuffer: {}", e.getMessage());
};
+ // For references calculate the page for the match
if (pagebreaks.size() > 0) {
- this.startPage = pagebreaks.get(0)[1];
- if (pagebreaks.size() > 1 && pagebreaks.get(pagebreaks.size()-1) != null)
- this.endPage = pagebreaks.get(pagebreaks.size()-1)[1];
- }
+ int i = 0;
+ for (; i < pagebreaks.size(); i++) {
+ if (pagebreaks.get(i)[0] <= this.getStartPos()) {
+ this.startPage = pagebreaks.get(i)[1];
+ } else {
+ i++;
+ break;
+ };
+ };
+ for (; i < pagebreaks.size(); i++) {
+ if (pagebreaks.get(i)[0] < this.getEndPos()) {
+ this.endPage = pagebreaks.get(i)[1];
+ } else {
+ break;
+ };
+ };
+ };
return pagebreaks;
};
@@ -1345,7 +1365,7 @@
if (this.highlight != null) {
for (Highlight hl : this.highlight) {
if (hl.start >= this.getStartPos()
- && hl.end <= this.getEndPos()) {
+ && hl.end <= this.getEndPos()) {
// Highlight is no pagebreak
if (hl.end != PB_MARKER && hl.end != ALL_MARKER) {
@@ -1354,13 +1374,13 @@
if (DEBUG)
log.trace(
- "PTO will retrieve {} & {} (Highlight boundary)",
+ "PTO will retrieve offsets from token {} & {} (Highlight boundary)",
hl.start, hl.end);
}
else if (DEBUG) {
- log.trace("Highlight is a pagebreak - do not retrieve PTO");
+ log.trace("Highlight is a pagebreak or marker - do not retrieve PTO");
};
};
};
@@ -1403,12 +1423,24 @@
if (arg0[0] > arg1[0]) {
return 1;
}
+
else if (arg0[0] == arg1[0]) {
+
+ int end0 = arg0[1];
+ int end1 = arg1[1];
+
+ if (arg0[1] == PB_MARKER || arg0[1] == ALL_MARKER) {
+ end0 = arg0[0];
+ };
+ if (arg1[1] == PB_MARKER || arg1[1] == ALL_MARKER) {
+ end1 = arg1[0];
+ };
+
// Check endpositions
- if (arg0[1] > arg1[1]) {
+ if (end0 > end1) {
return -1;
}
- else if (arg0[1] == arg1[1]) {
+ else if (end0 == end1) {
// Compare class number
if (arg0[2] > arg1[2])
@@ -1430,11 +1462,23 @@
private class ClosingTagComparator implements Comparator<int[]> {
@Override
public int compare (int[] arg0, int[] arg1) {
+
+ int end0 = arg0[1];
+ int end1 = arg1[1];
+
+ if (arg0[1] == PB_MARKER || arg0[1] == ALL_MARKER) {
+ end0 = arg0[0];
+ };
+
+ if (arg1[1] == PB_MARKER || arg1[1] == ALL_MARKER) {
+ end1 = arg1[0];
+ };
+
// Check end positions
- if (arg0[1] > arg1[1]) {
+ if (end0 > end1) {
return 1;
}
- else if (arg0[1] == arg1[1]) {
+ else if (end0 == end1) {
// Check start positions
if (arg0[0] < arg1[0]) {
@@ -1463,25 +1507,34 @@
};
int pos = 0, oldPos = 0;
+ boolean exceeded = false;
this.snippetArray = new HighlightCombinator();
+ // The snippetArray can have preceeding and following pagebreaks
+ // and markers that need to be removed
+
+
// Iterate over all elements of the stack
for (int[] element : stack) {
// The position is the start position for opening and
- // empty elements and the end position for closing elements
+ // empty/marker elements and the end position for closing elements
pos = element[3] != 0 ? element[0] : element[1];
- if (DEBUG)
- log.trace("Add tag at position {} (was {})",
+ if (DEBUG) {
+ log.trace("Check tag at position {} (was {}) [{},{},{},{}]",
pos,
- oldPos);
-
+ oldPos,
+ element[0],
+ element[1],
+ element[2],
+ element[3]);
+ };
// The new position is behind the old position
if (pos > oldPos) {
-
+
// The position is behind the string length,
// which may end when an element ends beyond
if (pos > clean.length()) {
@@ -1490,13 +1543,15 @@
pos = clean.length();
if (DEBUG)
- log.trace("Position exceeds string, now {}",
- pos);
+ log.trace("Position exceeds string, now {}", pos);
+ exceeded = true;
};
// Add partial string
if (pos > 0 && pos > oldPos) {
+ if (DEBUG)
+ log.trace("Add string {}", codePointSubstring(clean, oldPos, pos));
snippetArray.addString(codePointSubstring(clean, oldPos, pos));
};
@@ -1507,25 +1562,33 @@
// close tag
if (element[3] == 0) {
+ if (DEBUG)
+ log.trace("Add closer: {}", element[2]);
+
// Add close
snippetArray.addClose(element[2]);
}
// empty tag (pagebreak)
- else if (element[3] == 2) {
+ else if (!exceeded && element[3] == 2) {
// Add Empty (pagebreak)
snippetArray.addEmpty(element[2]);
}
// empty tag (marker)
- else if (element[3] == 3) {
+ else if (!exceeded && element[3] == 3) {
// Add Empty (pagebreak)
snippetArray.addMarker(element[2]);
}
- // open tag
+ // opening element exceeds primary data
+ else if (exceeded) {
+ break;
+ }
+
+ // open tag
else {
snippetArray.addOpen(element[2]);
};
@@ -1533,6 +1596,8 @@
if (clean.length() > pos && pos >= 0) {
snippetArray.addString(codePointSubstring(clean, pos));
+ if (DEBUG)
+ log.trace("Add rest string {}", codePointSubstring(clean, pos));
};
};
@@ -1731,11 +1796,7 @@
// Snippet stack sizes
short start = (short) 0;
short end = this.snippetArray.size();
-
- // Create context
- sb.append("<span class=\"context-left\">");
- if (this.startMore)
- sb.append("<span class=\"more\"></span>");
+ end--;
// Set levels for highlights
FixedBitSet level = new FixedBitSet(255);
@@ -1743,69 +1804,105 @@
byte[] levelCache = new byte[255];
HighlightCombinatorElement elem;
+
+ // Create context
+ sb.append("<span class=\"context-left\">");
+ if (this.startMore)
+ sb.append("<span class=\"more\"></span>");
- end--;
- if (end > 0) {
+ // Iterate over the snippet array
+ // Start with left context
+ while (end > 0) {
- // First element of sorted array
- elem = this.snippetArray.getFirst();
+ // Get element of sorted array
+ elem = this.snippetArray.get(start);
- // First element is textual
- if (elem.type == 0) {
- sb.append(elem.toHTML(this, level, levelCache, joins));
- // Move start position
- start++;
- };
- sb.append("</span>");
+ // Element is in context - but only markers are allowed!
+ // The problem with other elements is, that they may span the whole range
+ // around the match, so we have overlaps.
+ if (elem.type == 1 || elem.type == 2)
+ break;
- // Last element of sorted array
- elem = this.snippetArray.getLast();
+ // Text or marker
+
+ String elemString = elem.toHTML(this, level, levelCache, joins);
+ sb.append(elemString);
- // Create right context, if there is any
- rightContext.append("<span class=\"context-right\">");
+ if (DEBUG)
+ log.trace("Add node {}", elemString);
- // Last element is textual
- if (elem != null && elem.type == 0) {
- rightContext.append(
- elem.toHTML(this, level, levelCache, joins)
- );
-
- // decrement end
- end--;
- };
+ // Move start position
+ start++;
};
- if (this.endMore)
- rightContext.append("<span class=\"more\"></span>");
+ // end of context
+ sb.append("</span>");
- rightContext.append("</span>");
-
- // Iterate through all remaining elements
+ // Iterate through all the match
sb.append("<span class=\"match\">");
if (this.startCutted) {
sb.append("<span class=\"cutted\"></span>");
};
- for (short i = start; i <= end; i++) {
+ for (; start <= end; start++) {
+ elem = this.snippetArray.get(start);
- elem = this.snippetArray.get(i);
- // UNTESTED
- if (elem != null) {
- String elemString = elem.toHTML(
- this, level, levelCache, joins
- );
- if (DEBUG) {
- log.trace("Add node {}", elemString);
- };
- sb.append(elemString);
- }
+ if (elem == null)
+ continue;
+
+ String elemString = elem.toHTML(
+ this, level, levelCache, joins
+ );
+ if (DEBUG) {
+ log.trace("Add node {}", elemString);
+ };
+ sb.append(elemString);
+
+ // The match closes
+ if (elem.type == 2 && elem.number == CONTEXT) {
+ start++;
+ break;
+ };
};
+
+ // Warning! TODO:
+ // Check that all elements are closed that are opened at this point
+ // and only inline markers
+ // can follow in the context!
+
if (this.endCutted) {
sb.append("<span class=\"cutted\"></span>");
};
+
+
sb.append("</span>");
- sb.append(rightContext);
+
+
+ // There is the right context
+ // if (start <= end) {
+ sb.append("<span class=\"context-right\">");
+
+ for (; start <= end; start++) {
+ elem = this.snippetArray.get(start);
+
+ if (elem == null)
+ continue;
+
+ String elemString = elem.toHTML(
+ this, level, levelCache, joins
+ );
+ if (DEBUG) {
+ log.trace("Add node {}", elemString);
+ };
+ sb.append(elemString);
+ };
+
+ if (this.endMore)
+ sb.append("<span class=\"more\"></span>");
+
+ // End of context
+ sb.append("</span>");
return (this.snippetHTML = sb.toString());
};
@@ -1832,9 +1929,19 @@
// First element of sorted array
HighlightCombinatorElement elem = this.snippetArray.getFirst();
- if (elem.type == 0) {
- sb.append(elem.toBrackets(this));
- start++;
+
+ while (end > 0) {
+
+ // Get element of sorted array
+ elem = this.snippetArray.get(start);
+
+ if (elem.type == 1 || elem.type == 2) {
+ break;
+ }
+ else {
+ sb.append(elem.toBrackets(this));
+ start++;
+ };
};
sb.append("[");
@@ -1843,27 +1950,34 @@
sb.append("<!>");
};
- // Last element of sorted array
- elem = this.snippetArray.getLast();
- StringBuilder rightContext = new StringBuilder();
+
+ for (; start <= end; start++) {
+ elem = this.snippetArray.get(start);
+
+ if (elem == null)
+ continue;
+
+ sb.append(elem.toBrackets(this));
- // Last element is textual
- if (elem != null && elem.type == 0) {
- rightContext.append(elem.toBrackets(this));
- // decrement end
- end--;
+ // The match closes
+ if (elem.type == 2 && elem.number == CONTEXT) {
+ start++;
+ break;
+ };
};
-
- for (short i = start; i <= end; i++) {
- sb.append(this.snippetArray.get(i).toBrackets(this));
- };
-
+
if (this.endCutted) {
sb.append("<!>");
};
sb.append("]");
- sb.append(rightContext);
+ for (; start <= end; start++) {
+ elem = this.snippetArray.get(start);
+
+ if (elem != null)
+ sb.append(elem.toBrackets(this));
+ };
+
if (this.endMore)
sb.append(" ...");
@@ -1885,6 +1999,9 @@
// result in invalid xml
this._filterMultipleIdentifiers();
+ // the start and end of the snippet is currently stored in span[0]
+ // this should be trimmed here!
+
// Add highlight spans to balance lists
openList.addAll(this.span);
closeList.addAll(this.span);
@@ -1893,6 +2010,11 @@
Collections.sort(openList, new OpeningTagComparator());
Collections.sort(closeList, new ClosingTagComparator());
+ if (DEBUG) {
+ log.trace("OpenList: {}", openList);
+ log.trace("CloseList: {}", closeList);
+ };
+
// New stack array
ArrayList<int[]> stack = new ArrayList<>(openList.size() * 2);
@@ -1908,40 +2030,78 @@
int pf = closeList.peekFirst()[1];
if (pf != PB_MARKER && pf != ALL_MARKER) {
- stack.add(closeList.removeFirst());
- }
- else if (DEBUG) {
- if (DEBUG)
- log.debug("Close is pagebreak -- ignore (1)");
- };
+ //closeList.removeFirst();
+
+ int[] e = closeList.removeFirst().clone();
+ if (DEBUG) {
+ log.trace(
+ "Add close with number {} to stack at {}-{} as {}",
+ e[2], e[0], e[1], e[3]
+ );
+ }
+ stack.add(e);
+ }
+ else {
+ closeList.removeFirst();
+
+ if (DEBUG)
+ log.debug("Close is pagebreak -- ignore (1)");
+ };
+
continue;
}
// Not sure about this, but it can happen
else if (closeList.isEmpty()) {
- break;
+
+ if (DEBUG)
+ log.debug("Closelist is empty");
+
+ int[] e = openList.removeFirst().clone();
+
+ if (e[1] == PB_MARKER || e[1] == ALL_MARKER) {
+
+ if (e[1] == PB_MARKER) {
+ e[3] = 2;
+ } else {
+ e[3] = 3;
+ };
+
+ // Mark as empty
+ e[1] = e[0]; // Remove pagebreak marker
+
+ if (DEBUG)
+ log.trace(
+ "Add pagebreak or marker with {} to stack at {}-{} as {}",
+ e[2], e[0], e[1], e[3]
+ );
+
+ // Add empty pagebreak
+ stack.add(e);
+ };
+
+ continue;
};
int clpf = closeList.peekFirst()[1];
int olpf = openList.peekFirst()[1];
- // Closener is pagebreak or marker
- if (clpf == PB_MARKER || clpf == ALL_MARKER) {
+ // Closener is pagebreak or marker
+ if (clpf == PB_MARKER || clpf == ALL_MARKER) {
+
if (DEBUG)
- log.debug("Close is pagebreak or a marker -- ignore (2)");
+ log.debug("Close is pagebreak or a marker -- remove (2)");
// Remove closing pagebreak
closeList.removeFirst();
}
// Opener is pagebreak or marker
- else if (olpf == PB_MARKER || olpf == ALL_MARKER) {
- int[] e = openList.removeFirst().clone();
-
- if (DEBUG)
- log.debug("Open is pagebreak or a marker");
+ else if ((olpf == PB_MARKER || olpf == ALL_MARKER) && closeList.peekFirst()[1] >= openList.peekFirst()[0]) {
+
+ int[] e = openList.removeFirst().clone();
// Mark as empty
e[1] = e[0]; // Remove pagebreak marker
@@ -1951,14 +2111,21 @@
} else {
e[3] = 3;
};
+
+ if (DEBUG)
+ log.trace(
+ "Add pagebreak or marker with {} to stack at {}-{} as {}",
+ e[2], e[0], e[1], e[3]
+ );
+
// Add empty pagebreak
stack.add(e);
}
-
+
// check if the opener is smaller than the closener
else if (openList.peekFirst()[0] < closeList.peekFirst()[1]) {
-
+
if (DEBUG)
log.debug("Open tag starts before close tag ends");
@@ -1987,9 +2154,9 @@
else {
int[] e = closeList.removeFirst();
-
+
if (DEBUG) {
- log.debug("Close ends before open");
+ log.debug("Close ends before next opens or at the same position");
log.trace(
"Add close with number {} to stack at {}-{}",
@@ -2022,6 +2189,7 @@
/**
* This will retrieve character offsets for all spans.
+ * This includes pagebreaks and markers.
*/
private boolean _processHighlightSpans () {
@@ -2075,6 +2243,7 @@
// Recalculate startOffsetChar
int startOffsetChar = startPosChar - intArray[0];
+ int endRelOffsetChar = intArray[1];
// Add match span, in case no inner match is defined
if (this.innerMatchEndPos == -1) {
@@ -2084,7 +2253,18 @@
};
// Add context highlight
- this.span.add(new int[]{intArray[0], intArray[1], CONTEXT, 0});
+ intArray = new int[]{intArray[0], intArray[1], CONTEXT, 0};
+
+ this.span.add(intArray);
+
+ if (DEBUG)
+ log.debug("Added array to context span with {} (1)", intArray);
+
+
+ // All spans starting before startOffsetChar and end before
+ // endOffsetChar can be dismissed, as they are not part of tempSnippet
+ // This can actually be seen based on the first element of this.span
+ // at the moment.
// highlights
// -- I'm not sure about this.
@@ -2092,13 +2272,12 @@
if (DEBUG)
log.trace("There are highlights!");
- for (Highlight highlight : this.highlight) {
- if (DEBUG && highlight.start > highlight.end) {
- log.warn("Start position is before end position {}-{}!",
+ for (Highlight highlight : this.highlight) {
+ if (DEBUG && (highlight.start > highlight.end)) {
+ log.warn("Start position is before end position {} - {}!",
highlight.start,
highlight.end);
};
-
int start = -1;
int end = -1;
@@ -2117,25 +2296,35 @@
// there is already a character
start = highlight.start;
end = highlight.end;
- };
-
- if (DEBUG)
- log.trace("PTO has retrieved {}-{} for class {}", start,
- end, highlight.number);
+ };
start -= startOffsetChar;
// Keep end equal -1
if (end != PB_MARKER && end != ALL_MARKER) {
- end -= startOffsetChar;
+ if (DEBUG)
+ log.trace("PTO whas retrieved {}-{} for class {}", start,
+ end, highlight.number);
+ end -= startOffsetChar;
+
+ // Cut longer spans (e.g. from relation references)
+ if (end > endRelOffsetChar) {
+ end = endRelOffsetChar;
+ };
}
else if (DEBUG) {
log.debug("Pagebreak keeps end position");
};
- if (start < 0 || (end < 0 && end != PB_MARKER && end != ALL_MARKER))
+ if (start < 0 ||
+ ((end < 0 | start > endRelOffsetChar) && end != PB_MARKER && end != ALL_MARKER)) {
continue;
+ };
+ if (DEBUG && (start > endRelOffsetChar))
+ log.debug("Ignore marker {}/{}/{}/{}", start, end, highlight.number, endRelOffsetChar);
+
+
// Create intArray for highlight
intArray = new int[] {
start,
@@ -2209,7 +2398,6 @@
log.trace("PTO will retrieve {} (Right context)",
endOffset);
pto.add(ldid, endOffset);
-
}
// The right context is defined by characters
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
index 0a72156..396301d 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
@@ -88,11 +88,10 @@
this.combine.add(new HighlightCombinatorElement((byte) 4, annonumber));
};
-
// Add closing highlight combinator to the stack
public void addClose (int number) {
HighlightCombinatorElement lastComb;
-
+
// Clean up temporary stack
this.tempStack.clear();
@@ -119,8 +118,8 @@
int eold = this.balanceStack.pop();
// the closing element is not balanced, i.e. the last element differs
- while (eold != number) {
-
+ while (eold != number) {
+
// Retrieve last combinator on stack
lastComb = this.combine.peekLast();
@@ -178,7 +177,7 @@
log.trace("Stack for checking 2: {}|{}|{}|{}", lastComb.type,
lastComb.number, lastComb.characters, number);
};
-
+
if (lastComb.type == 1 && lastComb.number == number) {
while (lastComb.type == 1 && lastComb.number == number) {
// Remove the damn thing - It's empty and uninteresting!
@@ -186,9 +185,7 @@
lastComb = this.combine.peekLast();
};
}
- else if (lastComb.type == 3) {
- System.err.println("öööööööööööööööööööööööö");
- }
+
else {
if (DEBUG)
log.trace("close element b) {}", number);
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
index d72d46b..3fa04ce 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
@@ -224,6 +224,26 @@
return sb.toString();
}
+
+ else if (this.type == 3) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("{%");
+ sb.append(this.number);
+ sb.append("}");
+ return sb.toString();
+ }
+
+ else if (this.type == 4) {
+ String[] parts = match.getAnnotationID(this.number).split(":", 2);
+ StringBuilder sb = new StringBuilder();
+ sb.append("{*");
+ sb.append(escapeBrackets(parts[0]));
+ sb.append("=");
+ sb.append(escapeBrackets(parts[1]));
+ sb.append("}");
+ return sb.toString();
+ }
+
else if (this.type == 2) {
// This is context
@@ -234,6 +254,10 @@
return "]";
return "}";
};
+
+ if (this.characters == null) {
+ return "";
+ };
return escapeBrackets(this.characters);
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
index 27f03c9..257e32c 100644
--- a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
+++ b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
@@ -529,7 +529,7 @@
km = kr.getMatch(3);
assertEquals(
- "<span class=\"context-left\"><span class=\"match\"></span></span>",
+ "<span class=\"context-left\"></span><span class=\"match\"></span><span class=\"context-right\"></span>",
km.getSnippetHTML());
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 97aa429..39385eb 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -458,7 +458,7 @@
snippet
);
- // Expansion - no context
+ // Expansion - no context but inner match
km = ki.getMatchInfo("match-WPD17/H81/63495-p88-91", "tokens",
"xyz", "s", true, true, true);
snippet = km.getSnippetHTML();
@@ -1138,40 +1138,39 @@
"match-Corpus/Doc/0002-p0-6",
"tokens", "malt", null, true, false);
- assertEquals(
- "SnippetHTML (1)",
- "<span class=\"context-left\">"+
- "</span>"+
- "<span class=\"match\">"+
- "<span xml:id=\"token-Corpus/Doc/0002-p0-6\">"+
- "<mark>"+
- "<span xml:id=\"token-Corpus/Doc/0002-p0\">"+
- "<span xlink:title=\"malt/d:ROOT\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0-6\">Maximen</span>"+
- "</span>"+
- " "+
- "<span xml:id=\"token-Corpus/Doc/0002-p1\">"+
- "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0\">und</span>"+
- "</span>"+
- " "+
- "<span xlink:title=\"malt/d:CJ\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p1\">Reflexionen</span>"+
- " "+
- "<span xml:id=\"token-Corpus/Doc/0002-p3\">"+
- "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0\">Religion</span>"+
- "</span>"+
- " "+
- "<span xml:id=\"token-Corpus/Doc/0002-p4\">"+
- "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p3\">und</span>"+
- "</span>"+
- " "+
- "<span xlink:title=\"malt/d:CJ\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p4\">Christentum</span>"+
- "</mark>"+
- "</span>"+
- "</span>"+
- "<span class=\"context-right\">"+
- "<span class=\"more\"></span>"+
- "</span>",
- km.getSnippetHTML()
- );
+ assertEquals("SnippetHTML (1)",
+ "<span class=\"context-left\"></span>"+
+ "<span class=\"match\">"+
+ "<span xml:id=\"token-Corpus/Doc/0002-p0-6\">"+
+ "<mark>"+
+ "<span xml:id=\"token-Corpus/Doc/0002-p0\">"+
+ "<span xlink:title=\"malt/d:ROOT\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0-6\">Maximen</span>"+
+ "</span>"+
+ " "+
+ "<span xml:id=\"token-Corpus/Doc/0002-p1\">"+
+ "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0\">und</span>"+
+ "</span>"+
+ " "+
+ "<span xlink:title=\"malt/d:CJ\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p1\">Reflexionen</span>"+
+ " "+
+ "<span xml:id=\"token-Corpus/Doc/0002-p3\">"+
+ "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0\">Religion</span>"+
+ "</span>"+
+ " "+
+ "<span xml:id=\"token-Corpus/Doc/0002-p4\">"+
+ "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p3\">und</span>"+
+ "</span>"+
+ " "+
+ "<span xlink:title=\"malt/d:CJ\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p4\">Christentum</span>"+
+ "</mark>"+
+ "</span>"+
+ "</span>"+
+ "<span class=\"context-right\">"+
+ "<span class=\"more\"></span>"+
+ "</span>",
+ km.getSnippetHTML()
+ );
+
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
index 4492313..fc612ac 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
@@ -77,10 +77,12 @@
assertEquals(5, kr.getMatch(2).getStartPos());
assertEquals(6, kr.getMatch(2).getEndPos());
- assertEquals(528, kr.getMatch(2).getStartPage());
- assertEquals("<span class=\"context-left\">abcab</span><span class=\"match\"><mark>c</mark></span><span class=\"context-right\">abac</span>",
+ assertEquals(529, kr.getMatch(2).getStartPage());
+ assertEquals("<span class=\"context-left\"><span class=\"pb\" data-after=\"528\"></span>abcab</span><span class=\"match\"><mark><span class=\"pb\" data-after=\"529\"></span>c</mark></span><span class=\"context-right\">ab<span class=\"pb\" data-after=\"530\"></span>ac</span>",
kr.getMatch(2).getSnippetHTML());
- assertEquals(529, kr.getMatch(2).getEndPage()); // Debatable
+ assertEquals("{%528}abcab[[{%529}c]]ab{%530}ac",
+ kr.getMatch(2).getSnippetBrackets());
+ assertEquals(-1, kr.getMatch(2).getEndPage()); // Debatable
assertEquals(9, kr.getMatch(3).getStartPos());
assertEquals(10, kr.getMatch(3).getEndPos());
@@ -106,10 +108,11 @@
assertEquals(3, kr.getMatch(0).getEndPos());
assertEquals(528, kr.getMatch(0).getStartPage());
assertEquals(-1, kr.getMatch(0).getEndPage());
+
assertEquals(
"snippetHTML",
"<span class=\"context-left\">"+
- // "<span class=\"pb\" data-after=\"528\"></span>"+
+ "<span class=\"pb\" data-after=\"528\"></span>"+
"ab"+
"</span>"+
"<span class=\"match\">"+
@@ -119,15 +122,17 @@
"</span>"+
"<span class=\"context-right\">"+
"ab"+
- // "<span class=\"pb\" data-after=\"528\"></span>"+
+ "<span class=\"pb\" data-after=\"529\"></span>"+
"cab"+
- // "<span class=\"pb\" data-after=\"528\"></span>"+
+ "<span class=\"pb\" data-after=\"530\"></span>"+
"a"+
"<span class=\"more\">"+
"</span>"+
"</span>",
kr.getMatch(0).getSnippetHTML());
+ assertEquals("snippetBrackets","{%528}ab[[c]]ab{%529}cab{%530}a ...",kr.getMatch(0).getSnippetBrackets());
+
QueryBuilder qb = new QueryBuilder("tokens");
sq = qb.seq().append(
qb.repeat(
@@ -139,11 +144,10 @@
assertEquals(sq.toString(), "spanNext(spanRepetition(spanNext(spanNext(tokens:s:a, tokens:s:b), tokens:s:c){2,2}), tokens:s:a)");
-
kr = ki.search(sq, (short) 10);
assertEquals(528, kr.getMatch(0).getStartPage());
- assertEquals(529, kr.getMatch(0).getEndPage());
+ assertEquals(-1, kr.getMatch(0).getEndPage());
assertEquals(
"snippetHTML",
@@ -157,8 +161,10 @@
"</mark>"+
"</span>"+
"<span class=\"context-right\">"+
- "bac"+
+ "b<span class=\"pb\" data-after=\"530\"></span>ac"+
"</span>",
kr.getMatch(0).getSnippetHTML());
+
+ assertEquals("snippetBrackets","[[{%528}abcab{%529}ca]]b{%530}ac",kr.getMatch(0).getSnippetBrackets());
};
};
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index ec38fa4..0b34e56 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -759,7 +759,41 @@
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
Match m = kr.getMatch(0);
- assertEquals("<span class=\"context-left\"></span><span class=\"match\"><span class=\"inline-marker\" data-key=\"who\" data-value=\"Mai Thi Nguyen-Kim\"></span><span class=\"inline-marker\" data-key=\"start\" data-value=\"0:00\"></span><span class=\"inline-marker\" data-key=\"end\" data-value=\"01:20\"></span>(<mark>Räuspern</mark></span><span class=\"context-right\">) Wie viele Geschlechter gibt es? Wenn<span class=\"more\"></span></span>", m.getSnippetHTML());
+ assertEquals(
+ "<span class=\"context-left\"><span class=\"inline-marker\" data-key=\"who\" data-value=\"Mai Thi Nguyen-Kim\"></span><span class=\"inline-marker\" data-key=\"start\" data-value=\"0:00\"></span><span class=\"inline-marker\" data-key=\"end\" data-value=\"01:20\"></span>(</span><span class=\"match\"><mark>Räuspern</mark></span><span class=\"context-right\">) Wie viele Geschlechter gibt es? Wenn<span class=\"more\"></span></span>",
+ m.getSnippetHTML());
+
+ assertEquals(
+ "{*who=Mai Thi Nguyen-Kim}{*start=0:00}{*end=01:20}([[Räuspern]]) Wie viele Geschlechter gibt es? Wenn ...",
+ m.getSnippetBrackets());
+
+ ks = new Krill(new QueryBuilder("tokens").seg("s:Geschlechter"));
+ kr = ks.apply(ki);
+
+ assertEquals(5, kr.getTotalResults());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
+ m = kr.getMatch(0);
+ assertEquals("<span class=\"context-left\"><span class=\"inline-marker\" data-key=\"who\" data-value=\"Mai Thi Nguyen-Kim\"></span><span class=\"inline-marker\" data-key=\"start\" data-value=\"0:00\"></span><span class=\"inline-marker\" data-key=\"end\" data-value=\"01:20\"></span>(Räuspern) Wie viele </span><span class=\"match\"><mark>Geschlechter</mark></span><span class=\"context-right\"> gibt es? Wenn man hierzu öffentliche<span class=\"more\"></span></span>", m.getSnippetHTML());
+
+ assertEquals(
+ "{*who=Mai Thi Nguyen-Kim}{*start=0:00}{*end=01:20}(Räuspern) Wie viele [[Geschlechter]] gibt es? Wenn man hierzu öffentliche ...",
+ m.getSnippetBrackets());
+
+ ks = new Krill(new QueryBuilder("tokens").seg("s:Zunächst"));
+ kr = ks.apply(ki);
+
+ assertEquals(1, kr.getTotalResults());
+ assertEquals(0, kr.getStartIndex());
+ assertEquals(25, kr.getItemsPerPage());
+ m = kr.getMatch(0);
+ assertEquals("<span class=\"context-left\"><span class=\"more\"></span>Perspektiven, die dazu einladen, aneinander vorbeizureden </span><span class=\"match\"><mark><span class=\"inline-marker\" data-key=\"who\" data-value=\"Mai Thi Nguyen-Kim\"></span><span class=\"inline-marker\" data-key=\"start\" data-value=\"0:00\"></span><span class=\"inline-marker\" data-key=\"end\" data-value=\"01:20\"></span>Zunächst</mark></span><span class=\"context-right\"> einmal bezeichnet Geschlecht eine Rolle bei<span class=\"more\"></span></span>", m.getSnippetHTML());
+
+ assertEquals(
+ "... Perspektiven, die dazu einladen, aneinander vorbeizureden [[{*who=Mai Thi Nguyen-Kim}{*start=0:00}{*end=01:20}Zunächst]] einmal bezeichnet Geschlecht eine Rolle bei ...",
+ m.getSnippetBrackets());
+
+
};