Fixed pagebreak retrieval
Change-Id: Ic7fa5478606649a2a10614f3c89c24f6abbe6200
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 29ac699..145ac6f 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -88,7 +88,7 @@
private static final int PB_MARKER = -99999;
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = true;
+ public static final boolean DEBUG = false;
// Mapper for JSON serialization
ObjectMapper mapper = new ObjectMapper();
@@ -712,7 +712,8 @@
return this.retrievePagebreaks(
this.positionsToOffset.getLeafReader(),
(Bits) null,
- "tokens", pb
+ "tokens",
+ pb
);
};
@@ -729,8 +730,13 @@
// List of relevant pagebreaks
List<int[]> pagebreaks = new ArrayList<>(24);
- int charOffset = 0, pagenumber = 0;
-
+ int charOffset = 0, pagenumber = 0, start = 0;
+
+ if (DEBUG)
+ log.debug("Retrieve pagebreaks between {}-{}",
+ this.getStartPos(),
+ this.getEndPos());
+
try {
// Store character offsets in ByteBuffer
@@ -752,7 +758,9 @@
while (pagebreakSpans.next() == true) {
if (DEBUG) {
- log.debug("There is a pagebreak at {}", pagebreakSpans.doc());
+ log.debug("There is a pagebreak at {}/{}",
+ pagebreakSpans.doc(),
+ pagebreakSpans.start());
};
// Current pagebreak is not in the correct document
@@ -772,11 +780,12 @@
if (pagebreakSpans.start() <= this.getStartPos()) {
if (DEBUG)
- log.debug("PB start position is before at {}",
+ log.debug("PB start position is before match at {}",
pagebreakSpans.start());
// Only the first payload is relevant
b = pagebreakSpans.getPayload().iterator().next();
+ start = pagebreakSpans.start();
}
// This is the first pagebreak!
@@ -796,11 +805,18 @@
// This is the first pagebreak!
pagebreaks.add(new int[]{charOffset, pagenumber});
- this.addPagebreak(charOffset, pagenumber);
+ if (start >= this.getStartPos()) {
+
+ if (DEBUG)
+ log.debug("Add pagebreak to rendering: {}-{}",
+ charOffset,
+ pagenumber);
+ this.addPagebreak(charOffset, pagenumber);
+ };
}
// b wasn't used yet
- else if (pagebreakSpans.start() <= this.getEndPos()) {
+ if (pagebreakSpans.start() <= this.getEndPos()) {
// Set new pagebreak
// Only the first payload is relevant
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
index 7286436..296fc33 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
@@ -17,7 +17,7 @@
private final static Logger log = LoggerFactory.getLogger(Match.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = true;
+ public static final boolean DEBUG = false;
private LinkedList<HighlightCombinatorElement> combine;
private Stack<Integer> balanceStack = new Stack<>();
@@ -137,7 +137,9 @@
// Last element is empty
else if (lastComb.type == 3) {
- System.err.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
+ log.error(
+ "The last element was empty -- and this is not correctly handled yet"
+ );
}
// combinator is either closing (??) or another opener
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index ee1dce7..2805e78 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-# log4j.rootLogger = ERROR, stdout
+log4j.rootLogger = ERROR, stdout
# Queries:
# log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -29,7 +29,7 @@
# Responses:
# log4j.logger.de.ids_mannheim.korap.server.Node = TRACE, stdout
-log4j.logger.de.ids_mannheim.korap.response.Match = TRACE, stdout
+# log4j.logger.de.ids_mannheim.korap.response.Match = TRACE, stdout
# Index:
# log4j.logger.de.ids_mannheim.korap.KrillIndex = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
index 288e672..9c303da 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
@@ -58,10 +58,12 @@
SpanQuery sq;
Result kr;
- /*
- sq = new SpanTermQuery(new Term("tokens", "s:c"));
+
+ sq = new SpanTermQuery(new Term("tokens", "s:c"));
kr = ki.search(sq, (short) 10);
-
+
+ assertEquals(2, kr.getMatch(0).getStartPos());
+ assertEquals(3, kr.getMatch(0).getEndPos());
assertEquals(528, kr.getMatch(0).getStartPage());
assertEquals(-1, kr.getMatch(0).getEndPage());
assertEquals(
@@ -85,7 +87,8 @@
"</span>"+
"</span>",
kr.getMatch(0).getSnippetHTML());
-*/
+
+ /*
QueryBuilder qb = new QueryBuilder("tokens");
sq = qb.seq().append(
@@ -102,7 +105,7 @@
kr = ki.search(sq, (short) 10);
assertEquals(528, kr.getMatch(0).getStartPage());
- assertEquals(-1, kr.getMatch(0).getEndPage());
+ assertEquals(529, kr.getMatch(0).getEndPage());
assertEquals(
"snippetHTML",
"<span class=\"context-left\"></span>"+
@@ -110,7 +113,7 @@
"<mark>"+
"<span class=\"pb\" data-after=\"528\"></span>"+
"abcab"+
- // "<span class=\"pb\" data-after=\"529\"></span>"+
+ "<span class=\"pb\" data-after=\"529\"></span>"+
"ca"+
"</mark>"+
"</span>"+
@@ -118,5 +121,6 @@
"bac"+
"</span>",
kr.getMatch(0).getSnippetHTML());
+ */
};
};