Fix expansion of matches to respect character positions as well as token positions

Change-Id: Ic84282613730540c7f15638dfd76cc15c032f189
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index d3d507a..bbeb953 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1119,6 +1119,8 @@
                             && spanContext[0] < spanContext[1]) {
                         match.setStartPos(spanContext[0]);
                         match.setEndPos(spanContext[1]);
+						match.potentialStartPosChar = spanContext[2];
+						match.potentialEndPosChar = spanContext[3];
                         match.startMore = false;
                         match.endMore = false;
                     }
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index b3f045d..d3f45f4 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -88,7 +88,7 @@
 	private static final int PB_MARKER = -99999;
 
     // This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = false;
+    public static final boolean DEBUG = true;
 
     // Mapper for JSON serialization
     ObjectMapper mapper = new ObjectMapper();
@@ -1753,7 +1753,7 @@
             startOffsetChar = spanContext[2];
             endOffsetChar = spanContext[3];
             if (DEBUG)
-                log.trace("Got context is based from span {}-{}/{}-{}",
+                log.trace("Got context based on span {}-{}/{}-{}",
                         startOffset, endOffset, startOffsetChar, endOffsetChar);
         };
 
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
index 346a259..93c1f67 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
@@ -156,6 +156,10 @@
             // add this element number temporarily on the stack
             tempStack.push(eold);
 
+			// There are no more elements on the balance stack
+			if (this.balanceStack.empty())
+				break;
+
             // Check next element
             eold = this.balanceStack.pop();
         };
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
index f0ead37..d2701a6 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
@@ -30,7 +30,7 @@
 	private final static Logger log = LoggerFactory.getLogger(Match.class);
 
 	// This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = false;
+    public static final boolean DEBUG = true;
 
     // Constructor for highlighting elements
     public HighlightCombinatorElement (byte type, int number) {
diff --git a/src/test/java/de/ids_mannheim/korap/TestIndexer.java b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
index 7bf63e6..52a0094 100644
--- a/src/test/java/de/ids_mannheim/korap/TestIndexer.java
+++ b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
@@ -43,7 +43,7 @@
     public void testMultipleInputFiles () throws IOException {

         Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",

                 "-i", "src/test/resources/wiki" });

-        assertEquals("Indexed 16 files.", outputStream.toString());

+        assertEquals("Indexed 17 files.", outputStream.toString());

     }

 

     @Test

diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 135c1f9..7f659ed 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -395,12 +395,70 @@
 			"<span xlink:title=\"lwc/d:CJ\" xlink:show=\"none\" xlink:href=\"#token-WDD17/982/72848-p15843\">flach</span>"+
 			"</span>"+
 			"</mark>"+
+			"."+
 			"</span>"+
 			"</span>"+
 			"<span class=\"context-right\"></span>"
 			);
 	};
 
+
+	@Test
+    public void snippetBugTest3 () throws IOException, QueryException {
+        KrillIndex ki = new KrillIndex();
+        ki.addDoc(getClass().getResourceAsStream("/wiki/WPD17-H81-63495.json.gz"), true);
+        ki.commit();
+
+        Match km = ki.getMatchInfo("match-WPD17/H81/63495-p88-91", "tokens",
+								   "xyz", "s", true, true, true);
+		String snippet = km.getSnippetHTML();
+		assertEquals(
+			"<span class=\"context-left\">"+
+			"</span>"+
+			"<span class=\"match\">"+
+			"<mark>Der alte Baum</mark>"+
+			"</span>"+
+			"<span class=\"context-right\">"+
+			" war eine Sommerlinde (Tilia platyphyllos) , der neue ist eine Winterlinde (Tilia cordata)."+
+			"</span>",
+			snippet
+			);
+		/*
+
+		Match km = ki.getMatchInfo("match-WPD17/H81/63495-p88-91", "tokens",
+								   "dereko", "s", true, true, true);
+
+		String snippet = km.getSnippetHTML();
+		assertEquals(
+			"<span class=\"context-left\"></span>"+
+		    "<span class=\"match\">"+
+		      "<span title=\"dereko/s:s\">"+
+			    "<mark>"+
+			      "Der alte Baum"+
+			    "</mark>"+
+			    " war eine "+
+			    "<span title=\"dereko/s:ref\">Sommerlinde</span>"+
+			    " ("+
+			    "<span title=\"dereko/s:hi\">Tilia platyphyllos</span>"+
+			  "</span>"+
+			"</span>"+
+			"<span title=\"dereko/s:s\">"+
+			  ") , "+
+			  "<span title=\"dereko/s:ptr\">"+
+			    "der neue ist eine "+
+			    "<span title=\"dereko/s:ref\">Winterlinde</span>"+
+			    " ("+
+			    "<span title=\"dereko/s:hi\">Tilia cordata</span>"+
+			  "</span>"+
+			"</span>"+
+			"<span title=\"dereko/s:ptr\"></span>"+
+			"<span class=\"context-right\"></span>",
+			snippet
+			);
+		*/
+	};
+
+	
     @Test
     public void indexExample5Spans () throws IOException, QueryException {
         KrillIndex ki = new KrillIndex();
diff --git a/src/test/resources/wiki/WPD17-H81-63495.json.gz b/src/test/resources/wiki/WPD17-H81-63495.json.gz
new file mode 100644
index 0000000..e1241c1
--- /dev/null
+++ b/src/test/resources/wiki/WPD17-H81-63495.json.gz
Binary files differ