Presort Multiterms in MultiTermTokenStream
diff --git a/Changes b/Changes
index c6d33fa..0b28d58 100644
--- a/Changes
+++ b/Changes
@@ -3,6 +3,9 @@
    	- [performance] Updated Lucene dependency from 4.5.1 to 4.10.3,
 	  Updated Jackson dependency from 2.4.0 to 2.4.4,
    	  Updated Jersey dependency from 2.4.1 to 2.15 (diewald)
+        - [feature] Presorting of element terms in the index for coherent
+	  SpanQuery sorting (diewald)
+	  Warning: This is a breaking change!
 
 0.49.3 2015-02-03
         - [documentation] Improved documentation for API classes (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/analysis/MultiTerm.java b/src/main/java/de/ids_mannheim/korap/analysis/MultiTerm.java
index 1f72e8d..754268d 100644
--- a/src/main/java/de/ids_mannheim/korap/analysis/MultiTerm.java
+++ b/src/main/java/de/ids_mannheim/korap/analysis/MultiTerm.java
@@ -35,11 +35,10 @@
  *
  * @author diewald
  */
-public class MultiTerm {
+public class MultiTerm implements Comparable<MultiTerm> {
     public int start, end = 0;
     public String term = null;
-    public Integer posIncr = 1;
-    public boolean storeOffsets = false;
+    private boolean storeOffsets = false;
     public BytesRef payload = null;
 
     private static ByteBuffer bb = ByteBuffer.allocate(8);
@@ -306,6 +305,21 @@
         return sb.toString();
     };
 
+    @Override
+    public int compareTo (MultiTerm o) {
+        if (this.payload == null || o.payload == null)
+            return 0;
+        if (this.end < o.end)
+            return -1;
+        else if (this.end > o.end)
+            return 1;
+        else if (this.start < o.start)
+            return -1;
+        else if (this.start > o.start)
+            return 1;
+        return 0;
+    };
+
 
     /**
      * Represent the MultiTerm as a string.
diff --git a/src/main/java/de/ids_mannheim/korap/analysis/MultiTermToken.java b/src/main/java/de/ids_mannheim/korap/analysis/MultiTermToken.java
index 5332090..b3f0375 100644
--- a/src/main/java/de/ids_mannheim/korap/analysis/MultiTermToken.java
+++ b/src/main/java/de/ids_mannheim/korap/analysis/MultiTermToken.java
@@ -1,6 +1,5 @@
 package de.ids_mannheim.korap.analysis;
 
-import de.ids_mannheim.korap.analysis.MultiTerm;
 import java.util.*;
 
 
@@ -20,10 +19,9 @@
  * @author diewald
  */
 public class MultiTermToken {
-    public int start, end = 0;
     public List<MultiTerm> terms;
     private static short i = 0;
-
+    private boolean sorted = false;
 
     /**
      * Construct a new MultiTermToken by passing a stream of
@@ -34,18 +32,10 @@
     public MultiTermToken (MultiTerm terms, MultiTerm ... moreTerms) {
         this.terms = new ArrayList<MultiTerm>(16);
         
-        // Start position is not equal to end position
-        if (terms.start != terms.end) {
-            this.start = terms.start;
-            this.end   = terms.end;
-        };
-
-        terms.posIncr = 1;
         this.terms.add( terms );
 
         // Further elements on same position
         for (i = 0; i < moreTerms.length; i++) {
-            moreTerms[i].posIncr = 0;
             this.terms.add(moreTerms[i]);
         };
     };
@@ -65,10 +55,7 @@
         // Create a new MultiTerm
         MultiTerm term = new MultiTerm(prefix, surface);
 
-        this.setOffset(term.start, term.end);
-        
         // First word element
-        term.posIncr = 1;
         terms.add( term );
     };
     
@@ -83,17 +70,13 @@
         this.terms = new ArrayList<MultiTerm>(16);
 
         MultiTerm term = new MultiTerm(terms);
-        this.setOffset(term.start, term.end);
 
         // First word element
-        term.posIncr = 1;
         this.terms.add( term );
 
         // Further elements on same position
         for (i = 0; i < moreTerms.length; i++) {
             term = new MultiTerm( moreTerms[i] );
-            this.setOffset(term.start, term.end);
-            term.posIncr = 0;
             this.terms.add(term);
         };
     };
@@ -106,9 +89,8 @@
      * @return The {@link MultiTermToken} object for chaining.
      */
     public MultiTermToken add (MultiTerm term) {
-        term.posIncr = 0;
-        this.setOffset(term.start, term.end);
         terms.add(term);
+        this.sorted = false;
         return this;
     };
 
@@ -122,11 +104,7 @@
     public MultiTermToken add (String term) {
         if (term.length() == 0)
             return this;
-        MultiTerm mt = new MultiTerm(term);
-        this.setOffset(mt.start, mt.end);
-        mt.posIncr = 0;
-        terms.add(mt);
-        return this;
+        return this.add(new MultiTerm(term));
     };
 
 
@@ -140,34 +118,19 @@
     public MultiTermToken add (char prefix, String term) {
         if (term.length() == 0)
             return this;
-        MultiTerm mt = new MultiTerm(prefix, term);
-        this.setOffset(mt.start, mt.end);
-        mt.posIncr = 0;
-        terms.add(mt);
-        return this;
+        return this.add(new MultiTerm(prefix, term));
     };
 
 
     /**
-     * Set the start and end character offset information
-     * of the MultiTermToken.
+     * Get a {@link MultiTerm} by index.
      *
-     * @param start The character position of the token start.
-     * @param end The character position of the token end.
-     * @return The {@link MultiTermToken} object for chaining.
+     * @param index The index position of a {@link MultiTerm}
+     *        in the {@link MultiTermToken}.
+     * @return A {@link MultiTerm}.
      */
-    public MultiTermToken setOffset (int start, int end) {
-
-        // No value to set - offsets indicating a null string
-        if (start != end) {
-            this.start =
-                (this.start == 0 || start < this.start) ?
-                start : this.start;
-
-            this.end = end > this.end ? end : this.end;
-        };
-
-        return this;
+    public MultiTerm get (int index) {
+        return this.sort().terms.get(index);
     };
 
 
@@ -183,6 +146,20 @@
     };
 
 
+    /**
+     * Sort the {@link MultiTerm MultiTerms} in the correct order.
+     *
+     * @return The {@link MultiTermToken} object for chaining.
+     */
+    public MultiTermToken sort () {
+        if (this.sorted)
+            return this;
+
+        Collections.sort(this.terms);
+        this.sorted = true;
+        return this;
+    };
+
 
     /**
      * Serialize the MultiTermToken to a string.
@@ -191,16 +168,9 @@
      *         with leading offset information.
      */
     public String toString () {
+        this.sort();
         StringBuffer sb = new StringBuffer();
         sb.append('[');
-        if (this.start != this.end) {
-            sb.append('(')
-                .append(this.start)
-                .append('-')
-                .append(this.end)
-                .append(')');
-        };
-
         for (i = 0; i < this.terms.size() - 1; i++) {
             sb.append(this.terms.get(i).toString()).append('|');
         };
diff --git a/src/main/java/de/ids_mannheim/korap/analysis/MultiTermTokenStream.java b/src/main/java/de/ids_mannheim/korap/analysis/MultiTermTokenStream.java
index 8a7f3a7..521f7b2 100644
--- a/src/main/java/de/ids_mannheim/korap/analysis/MultiTermTokenStream.java
+++ b/src/main/java/de/ids_mannheim/korap/analysis/MultiTermTokenStream.java
@@ -1,7 +1,5 @@
 package de.ids_mannheim.korap.analysis;
 
-import de.ids_mannheim.korap.analysis.MultiTerm;
-import de.ids_mannheim.korap.analysis.MultiTermToken;
 import static de.ids_mannheim.korap.util.KorapByte.*;
 import org.apache.lucene.util.BytesRef;
 
@@ -119,8 +117,7 @@
      */
     public MultiTermTokenStream addMultiTermToken
         (MultiTerm mts, MultiTerm ... moreTerms) {
-        this.addMultiTermToken(new MultiTermToken(mts, moreTerms));
-        return this;
+        return this.addMultiTermToken(new MultiTermToken(mts, moreTerms));
     };
 
 
@@ -134,8 +131,7 @@
      */
     public MultiTermTokenStream addMultiTermToken
         (char prefix, String surface) {
-        this.addMultiTermToken(new MultiTermToken(prefix, surface));
-        return this;
+        return this.addMultiTermToken(new MultiTermToken(prefix, surface));
     };
 
 
@@ -149,8 +145,7 @@
      */
     public MultiTermTokenStream addMultiTermToken
         (String surface, String ... moreTerms) {
-        this.addMultiTermToken(new MultiTermToken(surface, moreTerms));
-        return this;
+        return this.addMultiTermToken(new MultiTermToken(surface, moreTerms));
     };
 
 
@@ -310,6 +305,9 @@
         // Get current token
         MultiTermToken mtt = this.multiTermTokens.get( this.mttIndex );
 
+        // Sort the MultiTermToken
+        mtt.sort();
+
         // Last term reached
         if (mtt.terms.size() == this.mtIndex) {
             this.mtIndex = 0;
@@ -331,7 +329,7 @@
         MultiTerm mt = mtt.terms.get(this.mtIndex);
 
         // Set the relative position to the former term
-        posIncrAttr.setPositionIncrement( mt.posIncr );
+        posIncrAttr.setPositionIncrement( this.mtIndex == 0 ? 1 : 0 );
         charTermAttr.setEmpty();
         charTermAttr.append( mt.term );
 
@@ -368,7 +366,7 @@
             if (payload.length > 0)
                 sb.append('$').append(payload.toString());
             sb.append(']');
-            sb.append(" with increment ").append(mt.posIncr);
+            sb.append(" with increment ").append(this.mtIndex == 0 ? 1 : 0);
             
             log.trace(sb.toString());
         };
diff --git a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
index 138c031..9d46025 100644
--- a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
+++ b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermToken.java
@@ -25,12 +25,12 @@
         assertEquals("[t:test|a:abbruch|b:banane]", mtt.toString());
         mtt.add("c:chaos#21-26");
         assertEquals(
-            "[(21-26)t:test|a:abbruch|b:banane|c:chaos#21-26]",
+            "[t:test|a:abbruch|b:banane|c:chaos#21-26]",
             mtt.toString()
         );
         mtt.add("d:dadaismus#21-28$vergleich");
         assertEquals(
-            "[(21-28)t:test|a:abbruch|b:banane|c:chaos#21-26|" +
+            "[t:test|a:abbruch|b:banane|c:chaos#21-26|" +
             "d:dadaismus#21-28$vergleich]",
             mtt.toString()
         );
@@ -40,14 +40,12 @@
     @Test
     public void multiTermTokenOffsets () {
         MultiTermToken mtt = new MultiTermToken("t:test#23-27");
-        assertEquals("[(23-27)t:test#23-27]", mtt.toString());
+        assertEquals("[t:test#23-27]", mtt.toString());
         mtt.add("b:baum#34-45");
-        assertEquals("[(23-45)t:test#23-27|b:baum#34-45]", mtt.toString());
+        assertEquals("[t:test#23-27|b:baum#34-45]", mtt.toString());
         mtt.add("c:cannonball#34-45$tatsache");
-        assertEquals("[(23-45)t:test#23-27|b:baum#34-45|" +
+        assertEquals("[t:test#23-27|b:baum#34-45|" +
                      "c:cannonball#34-45$tatsache]", mtt.toString());
-        assertEquals(23, mtt.start);
-        assertEquals(45, mtt.end);
         assertEquals(3, mtt.getSize());
     };
 };
diff --git a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
index 04f08b6..feade6f 100644
--- a/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
+++ b/src/test/java/de/ids_mannheim/korap/analysis/TestMultiTermTokenStream.java
Binary files differ
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
index d1dd608..42d18ed 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
@@ -78,17 +78,11 @@
 	assertEquals(test.terms.get(0).term, "hunde");
 	assertEquals(test.terms.get(1).term, "pos:n");
 	assertEquals(test.terms.get(2).term, "m:gen:pl");
-	assertEquals(test.terms.get(0).posIncr, 1, 1);
-	assertEquals(test.terms.get(1).posIncr, 0, 1);
-	assertEquals(test.terms.get(2).posIncr, 0, 1);
 
 	test = new MultiTermToken("hunde", "pos:n", "m:gen:pl");
 	assertEquals(test.terms.get(0).term, "hunde");
 	assertEquals(test.terms.get(1).term, "pos:n");
 	assertEquals(test.terms.get(2).term, "m:gen:pl");
-	assertEquals(test.terms.get(0).posIncr, 1, 1);
-	assertEquals(test.terms.get(1).posIncr, 0, 1);
-	assertEquals(test.terms.get(2).posIncr, 0, 1);
     };
 
     private List initIndexer () throws IOException {