Added highlights with pagebreak
Change-Id: I250d6c7b1c94e90a6ae40e9a3f2eff156847d67a
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index c8a5ebd..5714bf3 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -56,8 +56,11 @@
// Logger
private final static Logger log = LoggerFactory.getLogger(Match.class);
+ // end marker of highlights that are pagebreaks
+ private static final int PB_MARKER = -99999;
+
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
+ public static final boolean DEBUG = true;
// Mapper for JSON serialization
ObjectMapper mapper = new ObjectMapper();
@@ -177,12 +180,13 @@
/**
* Private class of highlights.
+ * TODO: This should probably be renamed, as it not only contains highlights
+ * but also annotations and pagebreaks
*/
private class Highlight {
public int start, end;
public int number = -1;
-
// Relational highlight
public Highlight (int start, int end, String annotation, int ref) {
this.start = start;
@@ -212,6 +216,13 @@
this.end = end;
this.number = number;
};
+
+ // Pagebreak
+ public Highlight (int start, int pagenumber) {
+ this.start = start;
+ this.end = PB_MARKER;
+ this.number = pagenumber;
+ };
};
@@ -395,6 +406,10 @@
};
+ public void addPagebreak (int start, int number) {
+ this.addHighlight(new Highlight(start, number));
+ };
+
/**
* Get document id.
*/
@@ -455,7 +470,7 @@
// Iterate over highlights to find matching class
for (Highlight h : this.highlight) {
- if (h.number == number)
+ if (h.number == number && h.end != PB_MARKER)
return h.start;
};
@@ -499,7 +514,7 @@
for (Highlight h : this.highlight) {
// Get the number (incremented by 1)
- if (h.number == number)
+ if (h.number == number && h.end != PB_MARKER)
return h.end + 1;
};
@@ -608,7 +623,7 @@
// There are highlights to integrate
if (this.highlight != null) {
for (Highlight h : this.highlight) {
- if (h.number >= 256)
+ if (h.number >= 256 || h.end == PB_MARKER)
continue;
// Add highlight to the snippet
@@ -685,6 +700,8 @@
// List of relevant pagebreaks
List<int[]> pagebreaks = new ArrayList<>(24);
+
+ int charOffset = 0, pagenumber = 0;
try {
@@ -727,7 +744,8 @@
if (pagebreakSpans.start() <= this.getStartPos()) {
if (DEBUG)
- log.debug("PB start position is before at {}", pagebreakSpans.start());
+ log.debug("PB start position is before at {}",
+ pagebreakSpans.start());
// Only the first payload is relevant
b = pagebreakSpans.getPayload().iterator().next();
@@ -744,13 +762,13 @@
if (DEBUG)
log.debug("Add pagebreak to list");
+
+ charOffset = bb.getInt();
+ pagenumber = bb.getInt();
// This is the first pagebreak!
- pagebreaks.add(
- new int[]{
- bb.getInt(),
- bb.getInt()
- });
+ pagebreaks.add(new int[]{charOffset, pagenumber});
+ this.addPagebreak(charOffset, pagenumber);
}
// b wasn't used yet
@@ -763,13 +781,12 @@
bb.put(b);
bb.rewind();
+ charOffset = bb.getInt();
+ pagenumber = bb.getInt();
+
// This is the first pagebreak!
- pagebreaks.add(
- new int[]{
- bb.getInt(),
- bb.getInt()
- });
-
+ pagebreaks.add(new int[]{charOffset, pagenumber});
+ this.addPagebreak(charOffset,pagenumber);
}
// Pagebreak beyond the current position
@@ -977,13 +994,22 @@
for (Highlight hl : this.highlight) {
if (hl.start >= this.getStartPos()
&& hl.end <= this.getEndPos()) {
- pto.add(this.localDocID, hl.start);
- pto.add(this.localDocID, hl.end);
- if (DEBUG)
- log.trace(
+ // Highlight is no pagebreak
+ if (hl.end != PB_MARKER) {
+ pto.add(this.localDocID, hl.start);
+ pto.add(this.localDocID, hl.end);
+
+ if (DEBUG)
+ log.trace(
"PTO will retrieve {} & {} (Highlight boundary)",
hl.start, hl.end);
+
+ }
+
+ else if (DEBUG) {
+ log.trace("Highlight is a pagebreak - do not retrieve PTO");
+ };
};
};
};
@@ -1001,7 +1027,6 @@
if (DEBUG)
log.trace("The snippet is {}", this.tempSnippet);
-
// The temporary snippet is empty, nothing to do
if (this.tempSnippet == null) {
processed = true;
@@ -1093,22 +1118,32 @@
// The position
pos = element[3] != 0 ? element[0] : element[1];
+ // The new position is behind the old position
if (pos > oldPos) {
+ // The position is behind the string length,
+ // which may end when an element ends beyond
if (pos > clean.length()) {
+
+ // Reposition to the end
pos = clean.length() - 1;
};
+ // Add partial string
snippetArray.addString(clean.substring(oldPos, pos));
+ // Remember the new position
oldPos = pos;
};
- if (element[3] != 0) {
- snippetArray.addOpen(element[2]);
- }
- else {
+ // close tag
+ if (element[3] == 0) {
snippetArray.addClose(element[2]);
+ }
+
+ // open tag
+ else {
+ snippetArray.addOpen(element[2]);
};
};
@@ -1279,6 +1314,7 @@
// Create stack unless both lists are empty
while (!openList.isEmpty() || !closeList.isEmpty()) {
+ // Nothing more to open -- close all
if (openList.isEmpty()) {
stack.addAll(closeList);
break;
@@ -1289,12 +1325,19 @@
break;
};
+ // check if the opener is smaller than the closener
if (openList.peekFirst()[0] < closeList.peekFirst()[1]) {
int[] e = openList.removeFirst().clone();
+
+ // Mark as opener
e[3] = 1;
+
+ // Add opener to stack
stack.add(e);
}
else {
+
+ // Add closener to stack
stack.add(closeList.removeFirst());
};
};
@@ -1384,22 +1427,44 @@
log.trace("There are highlights!");
for (Highlight highlight : this.highlight) {
- int start = this.positionsToOffset.start(ldid, highlight.start);
+ int start = -1;
+ int end = -1;
- int end = this.positionsToOffset.end(ldid, highlight.end);
+ // Highlight is a pagebreak
+ if (highlight.end != PB_MARKER) {
+ start = this.positionsToOffset.start(ldid, highlight.start);
+ end = this.positionsToOffset.end(ldid, highlight.end);
+ }
+ else {
+
+ if (DEBUG)
+ log.trace("Highlight is pagebreak -- do not retrieve offset");
+
+ // In pagebreak highlights
+ // there is already a character
+ start = highlight.start;
+ };
if (DEBUG)
log.trace("PTO has retrieved {}-{} for class {}", start,
- end, highlight.number);
+ end, highlight.number);
start -= startOffsetChar;
- end -= startOffsetChar;
- if (start < 0 || end < 0)
+ // Keep end equal -1
+ if (end != PB_MARKER) {
+ end -= startOffsetChar;
+ };
+
+ if (start < 0 || (end < 0 && end != PB_MARKER))
continue;
// Create intArray for highlight
- intArray = new int[] { start, end, highlight.number, 0 // Dummy value for later
+ intArray = new int[] {
+ start,
+ end,
+ highlight.number,
+ 0 // Dummy value for later use
};
this.span.add(intArray);
@@ -1610,6 +1675,7 @@
ArrayList<Integer> removeDuplicate = new ArrayList<>(10);
HashSet<Integer> identifiers = new HashSet<>(20);
for (int i = 0; i < this.span.size(); i++) {
+
// span is an int array: [Start, End, Number, Dummy]
int highlightNumber = this.span.get(i)[2];
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
index 386afff..f73017f 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
@@ -72,7 +72,6 @@
this.balanceStack.push(number);
};
-
// Add closing highlight combinator to the stack
public void addClose (int number) {
HighlightCombinatorElement lastComb;
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 2805e78..ee1dce7 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-log4j.rootLogger = ERROR, stdout
+# log4j.rootLogger = ERROR, stdout
# Queries:
# log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -29,7 +29,7 @@
# Responses:
# log4j.logger.de.ids_mannheim.korap.server.Node = TRACE, stdout
-# log4j.logger.de.ids_mannheim.korap.response.Match = TRACE, stdout
+log4j.logger.de.ids_mannheim.korap.response.Match = TRACE, stdout
# Index:
# log4j.logger.de.ids_mannheim.korap.KrillIndex = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
index 572f1eb..56cf1d3 100644
--- a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
+++ b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
@@ -190,6 +190,7 @@
Result kr = ki.search(q);
assertEquals((long) 14, kr.getTotalResults());
assertEquals("[[{1:a}]]bab", kr.getMatch(0).getSnippetBrackets());
+
assertEquals("a[[{2:b}]]ab", kr.getMatch(1).getSnippetBrackets());
assertEquals("ab[[{1:a}]]b", kr.getMatch(2).getSnippetBrackets());
assertEquals("aba[[{2:b}]]", kr.getMatch(3).getSnippetBrackets());