Described snippet building algorithm and fixed repetition constructor

Change-Id: Id7a38c611645e685b92274ddba77f3462aad9a39
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
index 808b064..ec88b04 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRepetitionQueryWrapper.java
@@ -27,10 +27,12 @@
 
     // This is for exact enumbered repetition, like in a{3}
     public SpanRepetitionQueryWrapper (SpanQueryWrapper subquery, int exact) {
+
         if (!subquery.isEmpty()) {
             this.subquery = subquery;
             if (subquery.maybeUnsorted())
                 this.maybeUnsorted = true;
+			this.isEmpty = false;
         }
         else
             this.isEmpty = true;
@@ -43,6 +45,7 @@
             return;
         };
 
+		this.isNull = false;
         this.min = exact;
         this.max = exact;
     };
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
index a1edeef..b457d77 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
@@ -675,7 +675,6 @@
 
         // No real sequence - only one element
         if (size == 1) {
-
             // But the element may be expanded
             if (this.segments.get(0).isExtended()
                     && (this.hasConstraints() || !this.isInOrder())) {
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 5714bf3..890b2a8 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -34,13 +34,38 @@
 import de.ids_mannheim.korap.response.match.Relation;
 
 /*
-  Todo: The implemented classes and private names are horrible!
-  Refactor, future-me!
+ * The snippet building algorithm is quite complicated for now
+ * and should probably be refactored.
+ * It works like this:
+ *
+ * 1. For all spans and highlights, pagebreaks etc. all necessary
+ *    positions are collected (processHighlight)
+ * 2. For all collected positions the character offsets are retrieved
+ *    and based on that for all spans and highlights a list
+ *    is created with arrays of the spans with the structure
+ *    [startchar, endchar, highlightClass] (processHighlightSpans)
+ *    2.1 The primary data and optional context information is retrieved
+ *        (processOffsetChars)
+ * 3. Based on the collected spans 2 lists are created for opening and
+ *    closing tags (pretty much clones of the initial span list),
+ *    sorted for opening resp. closing, and processed in parallel
+ *    to form an open/close stack. The new structure on the stack is
+ *    [startchar, endchar, highlightclass, open=1/close=0]
+ *    (processHighlightStack)
+ *    3.1. If the element is a relation with an identifier, this may
+ *         be removed if duplicate (filterMultipleIdentifiers)
+ * 4. Based on the stack and the primary data the snippet is created.
+ *    (processHighlightSnippet)
+ */
 
-  The number based Highlighttype is ugly - UGLY!
-
-  substrings may be out of range - e.g. if snippets are not lifted!
-*/
+/*
+ * Todo: The implemented classes and private names are horrible!
+ * Refactor, future-me!
+ *
+ * The number based Highlighttype is ugly - UGLY!
+ *
+ * substrings may be out of range - e.g. if snippets are not lifted!
+ */
 
 /**
  * Representation of Matches in a Result.
@@ -406,8 +431,8 @@
     };
 
 
-	public void addPagebreak (int start, int number) {
-		this.addHighlight(new Highlight(start, number));
+	public void addPagebreak (int start, int pagenumber) {
+		this.addHighlight(new Highlight(start, pagenumber));
 	};
 
     /**
@@ -763,8 +788,8 @@
 						if (DEBUG)
 							log.debug("Add pagebreak to list");
 
-						charOffset = bb.getInt();
 						pagenumber = bb.getInt();
+						charOffset = bb.getInt();
 						
 						// This is the first pagebreak!
 						pagebreaks.add(new int[]{charOffset, pagenumber});
@@ -781,8 +806,8 @@
 						bb.put(b);
 						bb.rewind();
 							
-						charOffset = bb.getInt();
 						pagenumber = bb.getInt();
+						charOffset = bb.getInt();
 						
 						// This is the first pagebreak!
 						pagebreaks.add(new int[]{charOffset, pagenumber});
@@ -804,9 +829,9 @@
 		};
 
 		if (pagebreaks.size() > 0) {
-			this.startPage = pagebreaks.get(0)[0];
+			this.startPage = pagebreaks.get(0)[1];
 			if (pagebreaks.size() > 1 && pagebreaks.get(pagebreaks.size()-1) != null)
-				this.endPage = pagebreaks.get(pagebreaks.size()-1)[0];
+				this.endPage = pagebreaks.get(pagebreaks.size()-1)[1];
 		}
 		
 		return pagebreaks;
@@ -1327,6 +1352,7 @@
 
 			// check if the opener is smaller than the closener
             if (openList.peekFirst()[0] < closeList.peekFirst()[1]) {
+				
                 int[] e = openList.removeFirst().clone();
 
 				// Mark as opener
@@ -1335,8 +1361,7 @@
 				// Add opener to stack
                 stack.add(e);
             }
-            else {
-
+			else {
 				// Add closener to stack
                 stack.add(closeList.removeFirst());
             };
@@ -1417,8 +1442,11 @@
         int startOffsetChar = startPosChar - intArray[0];
 
         // Add match span, in case no inner match is defined
-        if (this.innerMatchEndPos == -1)
+        if (this.innerMatchEndPos == -1) {
+			if (DEBUG)
+				log.debug("Added array to span with {} (1)", intArray);
             this.span.add(intArray);
+		};
 
         // highlights
         // -- I'm not sure about this.
@@ -1467,6 +1495,9 @@
 					0 // Dummy value for later use
                 };
 
+				if (DEBUG)
+					log.debug("Added array to span with {} (2)", intArray);
+
                 this.span.add(intArray);
             };
         };
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
index ca4905b..dae3aa9 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
@@ -23,6 +23,8 @@
 import de.ids_mannheim.korap.query.SpanFocusQuery;
 import de.ids_mannheim.korap.query.SpanNextQuery;
 import de.ids_mannheim.korap.query.SpanWithinQuery;
+import de.ids_mannheim.korap.query.QueryBuilder;
+import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
 import de.ids_mannheim.korap.response.Match;
 import de.ids_mannheim.korap.response.Result;
 import de.ids_mannheim.korap.response.SearchContext;
@@ -35,7 +37,7 @@
 public class TestPagebreakIndex {
 
     @Test
-    public void indexExample1 () throws IOException {
+    public void indexExample1 () throws Exception {
 		KrillIndex ki = new KrillIndex();
 
 		// abcabcabac
@@ -55,15 +57,14 @@
         ki.commit();
 
 		SpanQuery sq = new SpanTermQuery(new Term("tokens", "s:c"));
-
         Result kr = ki.search(sq, (short) 10);
-
+		
 		assertEquals(528, kr.getMatch(0).getStartPage());
 		assertEquals(-1, kr.getMatch(0).getEndPage());
 		assertEquals(
 			"snippetHTML",
 			"<span class=\"context-left\">"+
-			"<span class=\"pb\" data-after=\"528\"></span>"+
+			// "<span class=\"pb\" data-after=\"528\"></span>"+
 			"ab"+
 			"</span>"+
 			"<span class=\"match\">"+
@@ -73,13 +74,25 @@
 			"</span>"+
 			"<span class=\"context-right\">"+
 			"ab"+
-			"<span class=\"pb\" data-after=\"528\"></span>"+
+			// "<span class=\"pb\" data-after=\"528\"></span>"+
 			"cab"+
-			"<span class=\"pb\" data-after=\"528\"></span>"+
+			// "<span class=\"pb\" data-after=\"528\"></span>"+
 			"a"+
 			"<span class=\"more\">"+
 			"</span>"+
 			"</span>",
 			kr.getMatch(0).getSnippetHTML());
+
+
+		QueryBuilder qb = new QueryBuilder("tokens");
+		sq = qb.seq().append(
+			qb.repeat(
+				qb.seq().append(qb.seg("s:a")).append(qb.seg("s:b")).append(qb.seg("s:c")),
+				2
+				)
+			).append(qb.seg("s:a"))
+			.toQuery();
+
+		assertEquals(sq.toString(), "spanNext(spanRepetition(spanNext(spanNext(tokens:s:a, tokens:s:b), tokens:s:c){2,2}), tokens:s:a)");
 	};
 };