Improved fix in NextSpans so the candidatelist is recreated, in case the matchlist is empty

Change-Id: I69778b018458f235945dc64b4bf0601269a45f06
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
index 9f71154..8a22191 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
@@ -85,12 +85,22 @@
                 || !candidateList.isEmpty()) {
 
             // Check, if the matchlist is fine
-            // It may be enough to clear it though
-            while (!matchList.isEmpty() && matchList.get(0).getDoc() != firstSpans.doc()) {
-                matchList.remove(0);
+            if (!matchList.isEmpty() &&
+                candidateListDocNum != firstSpans.doc()) {
+
                 if (DEBUG) {
-                    log.debug("Remove first entry from matchlist because it's not in the same doc");
+                    log.debug(
+                        "Remove entries from matchlist because " +
+                        "it's not in the same doc {}!={}",
+                        firstSpans.doc(),
+                        candidateListDocNum);
                 };
+
+                // Clear matchList
+                matchList.clear();
+
+                // Set new matchlist
+                setMatchList();
             };
 
             if (!matchList.isEmpty()) {                
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
index 22314e4..7e57a80 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
@@ -86,13 +86,17 @@
 
         while (hasMoreSpans || !matchList.isEmpty()) {
             if (!matchList.isEmpty()) {
+
+                // Take the first element of the matchlist
                 setMatchProperties(matchList.get(0));
                 matchList.remove(0);
+               
                 return true;
             }
             matchCost = 0;
 
             List<CandidateSpan> adjacentSpans = collectAdjacentSpans();
+
             setMatchList(adjacentSpans);
         }
         return false;
@@ -135,23 +139,30 @@
             i++;
         }
         while ((hasMoreSpans = firstSpans.next())
-                && startSpan.getDoc() == firstSpans.doc()) {
+               && startSpan.getDoc() == firstSpans.doc()) {
 
             if (DEBUG) {
-                log.debug("Check adjacency at {}-{}|{}-{} in {}",
-                        prevSpan.getStart(), prevSpan.getEnd(),
-                        firstSpans.start(), firstSpans.end(),
-                        startSpan.getDoc());
+                log.debug("Check adjacency of rep-spans at {}-{}|{}-{} in {}={}={}",
+                          prevSpan.getStart(), prevSpan.getEnd(),
+                          firstSpans.start(), firstSpans.end(),
+                          startSpan.getDoc(), firstSpans.doc(), prevSpan.getDoc());
             };
 
             if (firstSpans.start() > prevSpan.getEnd()) {
                 candidates.add(new CandidateSpan(firstSpans));
                 break;
             }
+
+            /*
+             * ND: This seems to be suboptimal, in cases of searching
+             *     for "ab{2,3}c" and a match like "abbbbbbbbbbbbbbbbbbbbbbbbbbc".
+             */
             else if (firstSpans.start() == prevSpan.getEnd()) {
                 prevSpan = new CandidateSpan(firstSpans);
                 adjacentSpans.add(prevSpan);
             }
+
+            // firstSpan.start() < prevSpan.getEnd()
             else {
                 candidates.add(new CandidateSpan(firstSpans));
             }
@@ -169,6 +180,7 @@
      */
     private void setMatchList (List<CandidateSpan> adjacentSpans) {
         CandidateSpan startSpan, endSpan, matchSpan;
+        
         for (int i = min; i < max + 1; i++) {
             int j = 0;
             int endIndex;
@@ -180,6 +192,14 @@
                         matchSpan = startSpan.clone();
                         matchSpan.setPayloads(computeMatchPayload(adjacentSpans,
                                 0, endIndex - 1));
+
+                        if (DEBUG) {
+                            log.debug("1. Add span to matchlist: {}-{} at {}",
+                                      matchSpan.getStart(),
+                                      matchSpan.getEnd(),
+                                      matchSpan.getDoc());
+                        };
+
                         matchList.add(matchSpan);
                     }
                     catch (CloneNotSupportedException e) {
@@ -188,21 +208,36 @@
                 }
                 else {
                     endSpan = adjacentSpans.get(endIndex);
-                    matchSpan = new CandidateSpan(startSpan.getStart(),
-                            endSpan.getEnd(), startSpan.getDoc(),
-                            computeMatchCost(adjacentSpans, 0, endIndex),
-                            computeMatchPayload(adjacentSpans, 0, endIndex));
+                    matchSpan = new CandidateSpan(
+                        startSpan.getStart(),
+                        endSpan.getEnd(),
+                        startSpan.getDoc(),
+                        computeMatchCost(adjacentSpans, 0, endIndex),
+                        computeMatchPayload(adjacentSpans, 0, endIndex)
+                        );
                     //System.out.println("c:"+matchSpan.getCost() +" p:"+ matchSpan.getPayloads().size());
                     //System.out.println(startSpan.getStart() +","+endSpan.getEnd());
+
+                    if (DEBUG) {
+                        log.debug("2. Add span to matchlist: {}-{} at {}={}",
+                                  matchSpan.getStart(),
+                                  matchSpan.getEnd(),
+                                  matchSpan.getDoc(),
+                                  endSpan.getDoc());
+                    };
+                    
                     matchList.add(matchSpan);
                 }
                 j++;
             }
 
+            /*
             if (j + i == adjacentSpans.size()) {
 
             }
+            */
         }
+
         Collections.sort(matchList);
     }
 
@@ -269,6 +304,14 @@
      */
     private void setMatchProperties (CandidateSpan candidateSpan)
             throws IOException {
+
+        if (DEBUG) {
+            log.debug("Set match properties to {}-{} at {}",
+                      candidateSpan.getStart(),
+                      candidateSpan.getEnd(),
+                      candidateSpan.getDoc()                              
+                );
+        };
         matchDocNumber = candidateSpan.getDoc();
         matchStartPosition = candidateSpan.getStart();
         matchEndPosition = candidateSpan.getEnd();
@@ -280,6 +323,12 @@
 
     @Override
     public boolean skipTo (int target) throws IOException {
+        if (DEBUG) {
+            log.debug("Skip repetitionSpans to {}", target);
+        };
+        matchDocNumber = -1;
+        matchStartPosition = -1;
+        matchEndPosition = -1;
         if (!candidates.isEmpty()) {
             Iterator<CandidateSpan> i = candidates.iterator();
             while (i.hasNext()) {
@@ -298,6 +347,13 @@
                 hasMoreSpans = false;
                 return false;
             }
+            if (DEBUG) {
+                log.debug("Skip firstSpans to {}={} succeed with positions {}-{}",
+                          target,
+                          firstSpans.doc(),
+                          firstSpans.start(),
+                          firstSpans.end());
+            };
         }
         matchList.clear();
         return advance();