Merge "Failing test for offset bug in snippets"
diff --git a/src/test/java/de/ids_mannheim/korap/TestIndexer.java b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
index 8b15301..f5aa676 100644
--- a/src/test/java/de/ids_mannheim/korap/TestIndexer.java
+++ b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
@@ -43,7 +43,7 @@
     public void testMultipleInputFiles () throws IOException {

         Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",

                 "-i", "src/test/resources/wiki" });

-        assertEquals("Indexed 14 files.", outputStream.toString());

+        assertEquals("Indexed 15 files.", outputStream.toString());

     }

 

     @Test

diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index 44cc689..9156585 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -293,6 +293,48 @@
         assertEquals("Goethe, Johann Wolfgang von", res.at("/author").asText());
 	};
 
+	@Test
+    public void snippetBugTest () throws IOException, QueryException {
+        KrillIndex ki = new KrillIndex();
+        ki.addDoc(getClass().getResourceAsStream("/wiki/wpd15-u43-34816.json.gz"), true);
+        ki.commit();
+
+        Match km = ki.getMatchInfo("match-WPD15/U43/34816-p420-422", "tokens",
+								   "tt", "l", false, false);
+
+		assertEquals("SnippetBrackets (with Spans)",
+					 "<span class=\"context-left\">"+
+					 "<span class=\"more\"></span></span>"+
+					 "<span class=\"match\">"+
+					 "<mark>"+
+					 "<span title=\"tt/l:online\">online</span> "+
+					 "<span title=\"tt/l:verfügbar\">verfügbar</span>"+
+					 "</mark>"+
+					 "</span>"+
+					 "<span class=\"context-right\">"+
+					 "<span class=\"more\"></span>"+
+					 "</span>",
+					 km.getSnippetHTML());
+
+		 km = ki.getMatchInfo("match-WPD15/U43/34816-p420-422", "tokens",
+								   "dereko", null, true, false);
+
+		 assertEquals("SnippetBrackets (with Spans)",
+					  "<span class=\"context-left\">"+
+					  "<span class=\"more\"></span>"+
+					  "</span>"+
+					  "<span class=\"match\">"+
+					  "<mark>"+
+					  "<span title=\"dereko/s:ref\">online</span> verfügbar"+
+					  "</mark>"+
+					  "</span>"+
+					  "<span class=\"context-right\">"+
+					  "<span class=\"more\"></span>"+
+					  "</span>",
+					 km.getSnippetHTML());
+
+	};
+
 
     @Test
     public void indexExample5Spans () throws IOException, QueryException {
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
index 16ced0a..7407ed3 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
@@ -346,13 +346,71 @@
         assertEquals(2, kr.getMatch(0).getEndPos());
         assertEquals(3, kr.getMatch(1).getStartPos());
         assertEquals(5, kr.getMatch(1).getEndPos());
-    }
+    };
+
+
+	@Test
+	public void sequenceSkipBug () throws IOException {
+		KrillIndex ki = new KrillIndex();
+
+        ki.addDoc(createFieldDoc1());
+		ki.addDoc(createFieldDoc3());
+		ki.addDoc(createFieldDoc4());
+        ki.addDoc(createFieldDoc5()); // match for 2
+        ki.addDoc(createFieldDoc1());
+        ki.addDoc(createFieldDoc3());
+        ki.addDoc(createFieldDoc4());
+        ki.addDoc(createFieldDoc1());
+        ki.addDoc(createFieldDoc1());
+        ki.addDoc(createFieldDoc3());
+        ki.addDoc(createFieldDoc1());
+        ki.commit();
+		
+		ki.addDoc(createFieldDoc5()); // match for 2
+        ki.addDoc(createFieldDoc1());
+        ki.addDoc(createFieldDoc2()); // match for 1 and 2
+		ki.addDoc(createFieldDoc1());
+        ki.addDoc(createFieldDoc3());
+        ki.addDoc(createFieldDoc4());
+        ki.addDoc(createFieldDoc1());
+        ki.addDoc(createFieldDoc3());
+
+        ki.commit();
+
+		// "cab" is in 2
+        SpanQuery sq =
+			new SpanNextQuery(
+				new SpanNextQuery(
+					new SpanTermQuery(new Term("base", "s:c")),
+					new SpanTermQuery(new Term("base", "s:a"))
+					),
+                new SpanTermQuery(new Term("base", "s:b"))
+				);
+
+        Result kr = ki.search(sq, (short) 10);
+
+        assertEquals(0, kr.getMatch(0).getStartPos());
+        assertEquals(3, kr.getMatch(0).getEndPos());
+        assertEquals("totalResults", kr.getTotalResults(), 1);
+
+		// "aba" is in 2 and 5
+		sq = new SpanNextQuery(
+			new SpanNextQuery(
+				new SpanTermQuery(new Term("base", "s:a")),
+				new SpanTermQuery(new Term("base", "s:b"))
+				),
+			new SpanTermQuery(new Term("base", "s:a"))
+			);
+
+        kr = ki.search(sq, (short) 10);
+        assertEquals("totalResults", kr.getTotalResults(), 3);
+	};
 
 
     private FieldDocument createFieldDoc1 () {
         FieldDocument fd = new FieldDocument();
         fd.addString("ID", "doc-0");
-        fd.addTV("base", "bcbadb",
+        fd.addTV("base", "bbadb", // bba[dc]b
                 "[(0-1)s:b|i:b|_0$<i>0<i>1]" + "[(1-2)s:c|i:c|s:b|_1$<i>1<i>2]"
                         + "[(2-3)s:b|i:b|_2$<i>2<i>3]"
                         + "[(3-4)s:a|i:a|_3$<i>3<i>4|<>:e$<b>64<i>3<i>6<i>6<b>0]"
@@ -365,7 +423,7 @@
     private FieldDocument createFieldDoc2 () {
         FieldDocument fd = new FieldDocument();
         fd.addString("ID", "doc-1");
-        fd.addTV("base", "caba",
+        fd.addTV("base", "caba", // c[ac][ba]a
                 "[(0-1)s:c|i:c|_0$<i>0<i>1]"
                         + "[(1-2)s:a|i:a|s:c|_1$<i>1<i>2|<>:e$<b>64<i>1<i>3<i>3<b>0]"
                         + "[(2-3)s:b|i:b|s:a|_2$<i>2<i>3]"
@@ -377,7 +435,7 @@
     private FieldDocument createFieldDoc3 () {
         FieldDocument fd = new FieldDocument();
         fd.addString("ID", "doc-2");
-        fd.addTV("base", "cdbd",
+        fd.addTV("base", "cdbd", //  c[ba]d
                 "[(0-1)s:c|i:c|_0$<i>0<i>1]" + "[(1-2)s:d|i:d|_1$<i>1<i>2]"
                         + "[(2-3)s:b|i:b|s:a|_2$<i>2<i>3]"
                         + "[(3-4)s:d|i:d|_3$<i>3<i>4]");
@@ -389,7 +447,7 @@
     private FieldDocument createFieldDoc4 () {
         FieldDocument fd = new FieldDocument();
         fd.addString("ID", "doc-3");
-        fd.addTV("base", "bcbadb",
+        fd.addTV("base", "bcbadb", // b[cb]ba[dc]b
                 "[(0-1)s:b|i:b|_0$<i>0<i>1]"
                         + "[(1-2)s:c|i:c|s:b|<>:s$<b>64<i>1<i>3<i>3<b>0|_1$<i>1<i>2<b>0]"
                         + "[(2-3)s:b|i:b|_2$<i>2<i>3]"
@@ -399,5 +457,17 @@
         return fd;
     }
 
+    private FieldDocument createFieldDoc5 () {
+        FieldDocument fd = new FieldDocument();
+        fd.addString("ID", "doc-4");
+        fd.addTV("base", "dabaca",
+                "[(0-1)s:d|i:d|_0$<i>0<i>1]"
+				 + "[(1-2)s:a|i:a|_1$<i>1<i>2|<>:e$<b>64<i>1<i>3<i>3<b>0]"
+				 + "[(2-3)s:b|i:b|_2$<i>2<i>3]"
+				 + "[(3-4)s:a|i:a|_3$<i>3<i>4]"
+				 + "[(4-5)s:c|i:c|_4$<i>4<i>5]"
+				 + "[(5-6)s:a|i:a|_5$<i>5<i>6]");
+        return fd;
+    }	
 
 };
diff --git a/src/test/resources/wiki/wpd15-u43-34816.json.gz b/src/test/resources/wiki/wpd15-u43-34816.json.gz
new file mode 100644
index 0000000..1b8cc9e
--- /dev/null
+++ b/src/test/resources/wiki/wpd15-u43-34816.json.gz
Binary files differ