Fix expansion of matches to respect character positions as well as token positions
Change-Id: Ic84282613730540c7f15638dfd76cc15c032f189
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index d3d507a..bbeb953 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1119,6 +1119,8 @@
&& spanContext[0] < spanContext[1]) {
match.setStartPos(spanContext[0]);
match.setEndPos(spanContext[1]);
+ match.potentialStartPosChar = spanContext[2];
+ match.potentialEndPosChar = spanContext[3];
match.startMore = false;
match.endMore = false;
}
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index b3f045d..d3f45f4 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -88,7 +88,7 @@
private static final int PB_MARKER = -99999;
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
+ public static final boolean DEBUG = true;
// Mapper for JSON serialization
ObjectMapper mapper = new ObjectMapper();
@@ -1753,7 +1753,7 @@
startOffsetChar = spanContext[2];
endOffsetChar = spanContext[3];
if (DEBUG)
- log.trace("Got context is based from span {}-{}/{}-{}",
+ log.trace("Got context based on span {}-{}/{}-{}",
startOffset, endOffset, startOffsetChar, endOffsetChar);
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
index 346a259..93c1f67 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
@@ -156,6 +156,10 @@
// add this element number temporarily on the stack
tempStack.push(eold);
+ // There are no more elements on the balance stack
+ if (this.balanceStack.empty())
+ break;
+
// Check next element
eold = this.balanceStack.pop();
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
index f0ead37..d2701a6 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
@@ -30,7 +30,7 @@
private final static Logger log = LoggerFactory.getLogger(Match.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
+ public static final boolean DEBUG = true;
// Constructor for highlighting elements
public HighlightCombinatorElement (byte type, int number) {
diff --git a/src/test/java/de/ids_mannheim/korap/TestIndexer.java b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
index 7bf63e6..52a0094 100644
--- a/src/test/java/de/ids_mannheim/korap/TestIndexer.java
+++ b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
@@ -43,7 +43,7 @@
public void testMultipleInputFiles () throws IOException {
Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",
"-i", "src/test/resources/wiki" });
- assertEquals("Indexed 16 files.", outputStream.toString());
+ assertEquals("Indexed 17 files.", outputStream.toString());
}
@Test
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 135c1f9..7f659ed 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -395,12 +395,70 @@
"<span xlink:title=\"lwc/d:CJ\" xlink:show=\"none\" xlink:href=\"#token-WDD17/982/72848-p15843\">flach</span>"+
"</span>"+
"</mark>"+
+ "."+
"</span>"+
"</span>"+
"<span class=\"context-right\"></span>"
);
};
+
+ @Test
+ public void snippetBugTest3 () throws IOException, QueryException {
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(getClass().getResourceAsStream("/wiki/WPD17-H81-63495.json.gz"), true);
+ ki.commit();
+
+ Match km = ki.getMatchInfo("match-WPD17/H81/63495-p88-91", "tokens",
+ "xyz", "s", true, true, true);
+ String snippet = km.getSnippetHTML();
+ assertEquals(
+ "<span class=\"context-left\">"+
+ "</span>"+
+ "<span class=\"match\">"+
+ "<mark>Der alte Baum</mark>"+
+ "</span>"+
+ "<span class=\"context-right\">"+
+ " war eine Sommerlinde (Tilia platyphyllos) , der neue ist eine Winterlinde (Tilia cordata)."+
+ "</span>",
+ snippet
+ );
+ /*
+
+ Match km = ki.getMatchInfo("match-WPD17/H81/63495-p88-91", "tokens",
+ "dereko", "s", true, true, true);
+
+ String snippet = km.getSnippetHTML();
+ assertEquals(
+ "<span class=\"context-left\"></span>"+
+ "<span class=\"match\">"+
+ "<span title=\"dereko/s:s\">"+
+ "<mark>"+
+ "Der alte Baum"+
+ "</mark>"+
+ " war eine "+
+ "<span title=\"dereko/s:ref\">Sommerlinde</span>"+
+ " ("+
+ "<span title=\"dereko/s:hi\">Tilia platyphyllos</span>"+
+ "</span>"+
+ "</span>"+
+ "<span title=\"dereko/s:s\">"+
+ ") , "+
+ "<span title=\"dereko/s:ptr\">"+
+ "der neue ist eine "+
+ "<span title=\"dereko/s:ref\">Winterlinde</span>"+
+ " ("+
+ "<span title=\"dereko/s:hi\">Tilia cordata</span>"+
+ "</span>"+
+ "</span>"+
+ "<span title=\"dereko/s:ptr\"></span>"+
+ "<span class=\"context-right\"></span>",
+ snippet
+ );
+ */
+ };
+
+
@Test
public void indexExample5Spans () throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
diff --git a/src/test/resources/wiki/WPD17-H81-63495.json.gz b/src/test/resources/wiki/WPD17-H81-63495.json.gz
new file mode 100644
index 0000000..e1241c1
--- /dev/null
+++ b/src/test/resources/wiki/WPD17-H81-63495.json.gz
Binary files differ