Added sorting to Subspans.

Change-Id: If93a1e97eee01fa30e9aeb7586704888733ead22
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
index ace6183..5f18ff1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
@@ -37,7 +37,7 @@
     private boolean isSorted = true;
     private boolean matchTemporaryClass = false;
     private boolean removeTemporaryClasses = false;
-
+    private int windowSize = 10;
 
     /**
      * Construct a new SpanFocusQuery.
@@ -60,7 +60,6 @@
         this.classNumbers = classNumbers;
     };
 
-
     /**
      * Construct a new SpanFocusQuery. The class to focus on defaults
      * to
@@ -209,4 +208,14 @@
         this.removeTemporaryClasses = rem;
     }
 
+
+    public int getWindowSize() {
+        return windowSize;
+    }
+
+
+    public void setWindowSize(int windowSize) {
+        this.windowSize = windowSize;
+    }
+
 };
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
index 1a1ab0a..c547edc 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
@@ -10,7 +10,6 @@
 import org.apache.lucene.search.spans.Spans;
 import org.apache.lucene.util.Bits;
 
-import de.ids_mannheim.korap.query.spans.ElementSpans;
 import de.ids_mannheim.korap.query.spans.SubSpans;
 
 /**
@@ -41,6 +40,7 @@
 public class SpanSubspanQuery extends SimpleSpanQuery {
 
     private int startOffset, length;
+    private int windowSize = 10;
 
 
     /**
@@ -138,4 +138,13 @@
     public void setLength (int length) {
         this.length = length;
     }
+
+
+    public int getWindowSize() {
+        return windowSize;
+    }
+
+    public void setWindowSize(int windowSize) {
+        this.windowSize = windowSize;
+    }
 }
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java
new file mode 100644
index 0000000..0ba3c29
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java
@@ -0,0 +1,27 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.util.Comparator;
+
+public class CandidateSpanComparator implements Comparator<CandidateSpan> {
+
+    @Override
+    public int compare(CandidateSpan o1, CandidateSpan o2) {
+        if (o1.doc == o2.doc) {
+            if (o1.getStart() == o2.getStart()) {
+                if (o1.getEnd() == o2.getEnd()) return 0;
+                if (o1.getEnd() > o2.getEnd())
+                    return 1;
+                else
+                    return -1;
+            }
+            else if (o1.getStart() < o2.getStart())
+                return -1;
+            else
+                return 1;
+        }
+        else if (o1.doc < o2.doc)
+            return -1;
+        else
+            return 1;
+    }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
index a23240d..202d3d4 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
@@ -54,12 +54,12 @@
 
     private boolean isSorted, matchTemporaryClass, removeTemporaryClasses;
     // private List<CandidateSpan> candidateSpans;
-    private int windowSize = 10;
+    private int windowSize;
     private int currentDoc;
     private int prevStart;
     private int prevDoc;
     private PriorityQueue<CandidateSpan> candidates;
-    private FocusSpanComparator comparator;
+    private CandidateSpanComparator comparator;
 
     /**
      * Construct a FocusSpan for the given {@link SpanQuery}.
@@ -86,10 +86,11 @@
                     "At least one class number must be specified.");
         }
         classNumbers = query.getClassNumbers();
+        windowSize = query.getWindowSize();
         isSorted = query.isSorted();
         matchTemporaryClass = query.matchTemporaryClass();
         removeTemporaryClasses = query.removeTemporaryClasses();
-        // candidateSpans = new ArrayList<CandidateSpan>();
+
         candidates = new PriorityQueue<>(windowSize, comparator);
         hasMoreSpans = firstSpans.next();
         currentDoc = firstSpans.doc();
@@ -284,28 +285,4 @@
     public long cost () {
         return firstSpans.cost();
     };
-    
-    class FocusSpanComparator implements Comparator<CandidateSpan> {
-
-        @Override
-        public int compare(CandidateSpan o1, CandidateSpan o2) {
-            if (o1.doc == o2.doc) {
-                if (o1.getStart() == o2.getStart()) {
-                    if (o1.getEnd() == o2.getEnd()) return 0;
-                    if (o1.getEnd() > o2.getEnd())
-                        return 1;
-                    else
-                        return -1;
-                }
-                else if (o1.getStart() < o2.getStart())
-                    return -1;
-                else
-                    return 1;
-            }
-            else if (o1.doc < o2.doc)
-                return -1;
-            else
-                return 1;
-        }
-    }
 };
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
index 3b85685..5570b8f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
@@ -3,6 +3,7 @@
 import java.io.IOException;
 import java.util.Map;
 import java.util.ArrayList;
+import java.util.PriorityQueue;
 
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;
@@ -33,6 +34,12 @@
     public static final boolean DEBUG = false;
 
     private int startOffset, length;
+    private int windowSize;
+    private int currentDoc;
+    private int prevStart;
+    private int prevDoc;
+    private PriorityQueue<CandidateSpan> candidates;
+    private CandidateSpanComparator comparator;
 
     /**
      * Constructs SubSpans for the given {@link SpanSubspanQuery}
@@ -52,6 +59,8 @@
         this.startOffset = subspanQuery.getStartOffset();
         this.length = subspanQuery.getLength();
         this.matchPayload = new ArrayList<byte[]>(6);
+        this.windowSize = subspanQuery.getWindowSize();
+        candidates = new PriorityQueue<>(windowSize, comparator);
 
         if (DEBUG) {
             log.trace("Init SubSpan at {} with length {}", this.startOffset, this.length);
@@ -75,82 +84,128 @@
      * @throws IOException
      */
     private boolean advance () throws IOException {
-        while (hasMoreSpans) {
-            if (findMatch()) {
+        while (hasMoreSpans || candidates.size() > 0) {
+            CandidateSpan cs = new CandidateSpan(firstSpans);
+            if (startOffset > 0) {
+                if (findMatch(cs)) {
+                    setMatch(cs);
+                    hasMoreSpans = firstSpans.next();
+                    return true;
+                }
                 hasMoreSpans = firstSpans.next();
+            }
+            else if (candidates.isEmpty()) {
+                currentDoc = firstSpans.doc();
+                collectCandidates();
+            }
+            else {
+                setMatch(candidates.poll());
+                collectCandidates();
                 return true;
             }
-            hasMoreSpans = firstSpans.next();
         }
         return false;
     }
 
+    private void collectCandidates() throws IOException {
+
+        while (hasMoreSpans && candidates.size() < windowSize
+                && firstSpans.doc() == currentDoc) {
+            CandidateSpan cs;
+            if (findMatch(cs = new CandidateSpan(firstSpans))) {
+                if (cs.getDoc() == prevDoc && cs.getStart() < prevStart) {
+                    log.warn("Span (" + cs.getStart() + ", " + cs.getEnd()
+                            + ") is out of order and skipped.");
+                }
+                else {
+                    candidates.add(cs);
+                }
+            }
+            hasMoreSpans = firstSpans.next();
+        }
+    }
 
     /**
      * Sets the properties of the current match/subspan.
      * 
      * @throws IOException
      */
-    public boolean findMatch () throws IOException {
+    public boolean findMatch(CandidateSpan cs) throws IOException {
 
         // Check at span ending
         if (this.startOffset < 0) {
-            matchStartPosition = firstSpans.end() + startOffset;
-            if (matchStartPosition < firstSpans.start()) {
-                matchStartPosition = firstSpans.start();
+            cs.setStart(firstSpans.end() + startOffset);
+            if (cs.getStart() < firstSpans.start()) {
+                cs.setStart(firstSpans.start());
             };
         }
         // Check at span beginning
         else {
-            matchStartPosition = firstSpans.start() + startOffset;
-            if (matchStartPosition >= firstSpans.end()) {
+            cs.setStart(firstSpans.start() + startOffset);
+            if (cs.getStart() >= firstSpans.end()) {
                 return false;
             }
         }
 
         // Find end position of span
         if (this.length > 0) {
-            matchEndPosition = matchStartPosition + this.length;
-            if (matchEndPosition > firstSpans.end()) {
-                matchEndPosition = firstSpans.end();
+            cs.setEnd(cs.getStart() + this.length);
+            if (cs.getEnd() > firstSpans.end()) {
+                cs.setEnd(firstSpans.end());
             }
         }
         else {
-            matchEndPosition = firstSpans.end();
+            cs.setEnd(firstSpans.end());
         }
 
-        matchPayload.clear();
+        // matchPayload.clear();
 
         // Remove element payloads
         for (byte[] payload : firstSpans.getPayload()) {
             if ((payload[0] & ((byte) 64)) != 0) {
                 continue;
             };
-            matchPayload.add(payload.clone());
+            cs.getPayloads().add(payload.clone());
         };
 
-        matchDocNumber = firstSpans.doc();
+        cs.setDoc(firstSpans.doc());
 
         if (DEBUG) {
             log.trace("Start at absolute position {} " +
                       "and end at absolute position {}",
-                      matchStartPosition,
-                      matchEndPosition);
+                    cs.getStart(),
+                    cs.getEnd());
         };
 
         return true;
     }
 
+    private void setMatch(CandidateSpan cs) {
+        matchStartPosition = cs.getStart();
+        prevStart = matchStartPosition;
+        matchEndPosition = cs.getEnd();
+        matchDocNumber = cs.getDoc();
+        prevDoc = matchDocNumber;
+        matchPayload.clear();
+        matchPayload.addAll(cs.getPayloads());
+    }
 
     @Override
     public boolean skipTo (int target) throws IOException {
-        if (hasMoreSpans && (firstSpans.doc() < target)) {
-            if (!firstSpans.skipTo(target)) {
-                hasMoreSpans = false;
-                return false;
+        if (candidates.size() > 0) {
+            CandidateSpan cs;
+            while ((cs = candidates.poll()) != null) {
+                if (cs.getDoc() == target) {
+                    return next();
+                }
             }
         }
-        matchPayload.clear();
+        if (firstSpans.doc() == target) {
+            return next();
+        }
+        if (firstSpans.doc() < target && firstSpans.skipTo(target)) {
+            return next();
+        }
         return advance();
     }
 
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
index f09301f..eb719b1 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
@@ -9,10 +9,13 @@
 import org.junit.Test;
 
 import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.response.Match;
 import de.ids_mannheim.korap.response.Result;
 import de.ids_mannheim.korap.query.DistanceConstraint;
+import de.ids_mannheim.korap.query.SpanClassQuery;
 import de.ids_mannheim.korap.query.SpanDistanceQuery;
 import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanFocusQuery;
 import de.ids_mannheim.korap.query.SpanSubspanQuery;
 
 /*
@@ -139,6 +142,10 @@
         SpanSubspanQuery ssq = new SpanSubspanQuery(new SpanElementQuery("base", "x"), -1, 1, true);
         kr = ki.search(ssq, (short) 10);
         
+        for (Match km : kr.getMatches()) {
+            System.out.println(km.getStartPos() + "," + km.getEndPos()
+                    + km.getSnippetBrackets());
+        }
         assertEquals(2, kr.getTotalResults());
         assertEquals("a [b ]c ", kr.getMatch(0).getSnippetBrackets());
         assertEquals("a b [c ]", kr.getMatch(1).getSnippetBrackets());