Merge "Added payload type identifier in element spans." into payload-id
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index 9616ba3..49ac41e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -20,23 +20,38 @@
 import de.ids_mannheim.korap.query.SpanElementQuery;
 
 /**
- * Enumeration of special spans which length is stored in their
- * payload,
- * representing elements such as phrases, sentences and paragraphs.
+ * Enumeration of spans representing elements such as phrases, sentences and
+ * paragraphs. Span length is stored as a payload.
+ * 
+ * Depth and certainty value payloads have not been loaded and handled yet.
  * 
  * @author margaretha
  * @author diewald
  */
 public final class ElementSpans extends SimpleSpans {
     private final TermSpans termSpans;
-    private boolean lazyLoaded = false;
+	private boolean isPayloadLoaded;
 
     private final Logger log = LoggerFactory.getLogger(ElementSpans.class);
     // This advices the java compiler to ignore all loggings
     public static final boolean DEBUG = false;
 
     private byte[] b = new byte[8];
+    
+	public static enum PayloadTypeIdentifier {
+		ELEMENT(64), 
+		ELEMENT_WITH_TUI(65), 
+		ELEMENT_WITH_CERTAINTY_VALUE (66), 
+		ELEMENT_WITH_TUI_AND_CERTAINTY_VALUE (67), 
+		MILESTONE (68);
+		
+		private int value;
 
+		private PayloadTypeIdentifier(int value) {
+			this.value = value;
+		}
+    }
+    
 
     /**
      * Constructs ElementSpans for the given {@link SpanElementQuery}.
@@ -58,31 +73,28 @@
             throws IOException {
         super(spanElementQuery, context, acceptDocs, termContexts);
         termSpans = (TermSpans) this.firstSpans;
-        hasMoreSpans = true;
-        // hasSpanId = true;
+		hasMoreSpans = true;
+		// termSpans.next();
     };
 
 
     @Override
     public boolean next () throws IOException {
-        isStartEnumeration = false;
+		isStartEnumeration = false;
+		this.matchPayload = null;
+		matchEndPosition = -1;
+		return advance();
+	};
 
-        if (!hasMoreSpans || !(hasMoreSpans = termSpans.next()))
-            return false;
+	private boolean advance() throws IOException {
+		if (!hasMoreSpans || !(hasMoreSpans = termSpans.next()))
+			return false;
 
-        // Set current values
-        return this.setToCurrent();
-    };
-
-
-    // Set term values to current
-    private boolean setToCurrent () throws IOException {
-        // Get payload
-        this.matchStartPosition = termSpans.start();
-        this.matchDocNumber = termSpans.doc();
-        this.lazyLoaded = false;
-        return true;
-    };
+		this.matchStartPosition = termSpans.start();
+		this.matchDocNumber = termSpans.doc();		
+		isPayloadLoaded = false;
+		return true;
+	};
 
 
     /*
@@ -91,48 +103,51 @@
      * position queries, where spans can be rejected
      * solely based on their starting and doc position.
      */
-    private void processPayload () {
-        if (this.lazyLoaded)
+    private void loadPayload () {
+		if (this.isPayloadLoaded) {
             return;
-
-        // This will prevent failures for IOExceptions
-        this.lazyLoaded = true;
-
-        // No need to check if there is a pl - there has to be a payload!
+		} 
+		else{
+			this.isPayloadLoaded = true;
+		}
+		
+		List<byte[]> payload;
         try {
-            this.matchPayload = termSpans.getPayload();
+			payload = (List<byte[]>) termSpans.getPayload();
         }
         catch (IOException e) {
+			// silently setting empty element and payload
             this.matchEndPosition = this.matchStartPosition;
             this.setSpanId((short) -1);
             this.matchPayload = null;
             return;
-        };
+		}
 
-        List<byte[]> payload = (List<byte[]>) this.matchPayload;
 
-        if (!payload.isEmpty()) {
 
+		if (!payload.isEmpty()) {
             // Get payload one by one
             final int length = payload.get(0).length;
             final ByteBuffer bb = ByteBuffer.allocate(length);
             bb.put(payload.get(0));
+			
+			this.payloadTypeIdentifier = bb.get(0);
+			this.matchEndPosition = bb.getInt(9);
 
-            // set element end position from payload
-            this.matchEndPosition = bb.getInt(8);
-
-            // Copy element id
-            if (length >= 14) {
-                this.setSpanId(bb.getShort(12));
-                this.hasSpanId = true;
-            }
+			if (payloadTypeIdentifier == PayloadTypeIdentifier.ELEMENT_WITH_TUI.value
+					|| payloadTypeIdentifier == PayloadTypeIdentifier.ELEMENT_WITH_TUI_AND_CERTAINTY_VALUE.value) {
+				this.setSpanId(bb.getShort(14));
+				this.hasSpanId = true;
+			}
             else {
+				// System.out.println(payloadTypeIdentifier);
                 this.setSpanId((short) -1);
             }
 
-            // Copy the start and end character offsets
-            b = Arrays.copyOfRange(bb.array(), 0, 8);
-            this.matchPayload = Collections.singletonList(b);
+			// FIX ME
+			// Copy the start and end character offsets
+			b = Arrays.copyOfRange(bb.array(), 1, 9);
+			this.matchPayload = Collections.singletonList(b);
             return;
         }
 
@@ -144,28 +159,28 @@
 
     @Override
     public int end () {
-        this.processPayload();
+        this.loadPayload();
         return this.matchEndPosition;
     };
 
 
     @Override
     public Collection<byte[]> getPayload () {
-        this.processPayload();
+        this.loadPayload();
         return this.matchPayload;
     };
 
 
     @Override
     public boolean isPayloadAvailable () {
-        this.processPayload();
+        this.loadPayload();
         return !this.matchPayload.isEmpty();
     };
 
 
     @Override
     public short getSpanId () {
-        this.processPayload();
+        this.loadPayload();
         return spanId;
     };
 
@@ -178,7 +193,7 @@
 
         if (hasMoreSpans && firstSpans.doc() < target
                 && firstSpans.skipTo(target)) {
-            return this.setToCurrent();
+			return this.advance();
         };
 
         hasMoreSpans = false;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index 76a1cec..4a383ee 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -36,6 +36,7 @@
     protected short spanId;
     protected boolean hasSpanId = false;
 
+	protected byte payloadTypeIdentifier;
 
     public SimpleSpans () {
         collectPayloads = true;
@@ -126,10 +127,10 @@
     }
 
 
-    @Override
+	@Override
     public int end () {
         return matchEndPosition;
-    }
+	}
 
 
     @Override
@@ -176,4 +177,22 @@
         this.spanId = spanId;
     }
 
+	/**
+	 * Gets the payload type identifier (PTI) of the current span
+	 * 
+	 * @return a payload type identifier
+	 */
+	public byte getPayloadTypeIdentifier() {
+		return payloadTypeIdentifier;
+	}
+
+	/**
+	 * Sets the payload type identifier (PTI) of the current span
+	 * 
+	 * @param payloadTypeIdentifier
+	 */
+	public void setPayloadTypeIdentifier(byte payloadTypeIdentifier) {
+		this.payloadTypeIdentifier = payloadTypeIdentifier;
+	}
+
 }
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
index fb61883..ceee21d 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
@@ -36,13 +36,33 @@
         fd.addTV(
                 "base",
                 "bcbabd",
-                "[(0-1)s:a|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|<>:div#0-2$<i>2<s>2|@:class=header$<i>3<s>1|@:class=header$<i>2<s>2]"
-                        + "[(1-2)s:e|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<i>2<s>1]"
-                        + "[(2-3)s:e|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<i>5<s>1]"
-                        + "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=header$<i>5<s>1]"
-                        + "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|<>:a#4-5$<i>5<s>2|@:class=header$<i>5<s>2]"
-                        + "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>1|<>:div#5-6$<i>6<s>-1|@:class=header$<i>6<s>1]"
-                        + "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<i>7<s>1|@:class=header$<i>7<s>2]");
+				"[(0-1)s:a|_1#0-1|"						
+				+ "<>:div$<b>65<i>0<i>2<i>2<b>0<s>2|"
+				+ "<>:div$<b>65<i>0<i>3<i>3<b>0<s>1|"
+				+ "<>:s$<b>65<i>0<i>5<i>5<b>0<s>3|"
+				+ "@:class=header$<i>3<s>1|@:class=header$<i>2<s>2]"
+
+				+ "[(1-2)s:e|_2#1-2|"
+				+ "<>:a$<b>65<i>1<i>2<i>2<b>0<s>1|@:class=header$<i>2<s>1]"
+  
+				+ "[(2-3)s:e|_3#2-3|"
+				+ "<>:div$<b>65<i>2<i>5<i>5<b>0<s>1|@:class=time$<i>5<s>1]"
+  
+				+ "[(3-4)s:a|_4#3-4|"
+				+ "<>:div$<b>65<i>3<i>5<i>5<b>0<s>1|@:class=header$<i>5<s>1]"
+  
+				+ "[(4-5)s:b|_5#4-5|"
+				+ "<>:div$<b>65<i>4<i>5<i>5<b>0<s>1|"
+				+ "<>:a$<b>65<i>4<i>5<i>5<b>0<s>2|@:class=header$<i>5<s>2]"
+		  
+				+ "[(5-6)s:d|_6#5-6|"
+				+ "<>:s$<b>65<i>5<i>6<i>6<b>0<s>1|"
+				+ "<>:div$<b>65<i>5<i>6<i>6<b>0<s>2|@:class=header$<i>6<s>1]"
+  
+				+ "[(6-7)s:d|_7#6-7|"
+				+ "<>:div$<b>65<i>6<i>7<i>7<b>0<s>1"
+				+ "<>:s$<b>65<i>6<i>7<i>7<b>0<s>2|"
+				+ "|@:class=header$<i>7<s>1|@:class=header$<i>7<s>2]");
 
         return fd;
     }
@@ -54,13 +74,24 @@
         fd.addTV(
                 "base",
                 "bcbabd",
-                "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|@:class=header$<i>3<s>1|@:class=title$<i>3<s>1|@:class=book$<i>3<s>1]"
-                        + "[(1-2)s:c|_2#1-2|<>:div#1-2$<i>2<s>1|@:class=header$<i>2<s>1|@:class=title$<i>2<s>1]"
-                        + "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=book$<i>5<s>1]"
-                        + "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<i>5<s>1]"
-                        + "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<i>5<s>1|@:class=book$<i>5<s>1|@:class=title$<i>5<s>1]"
-                        + "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<i>6<s>1]"
-                        + "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<i>7<s>1|@:class=title$<i>7<s>1]");
+				"[(0-1)s:b|_1#0-1|"
+						+ "<>:div$<b>65<i>0<i>3<i>3<b>0<s>1|@:class=header$<i>3<s>1|@:class=title$<i>3<s>1|@:class=book$<i>3<s>1]"
+						+ "<>:s<b>65<i>0<i>5<i>5<b>0<s>2|"
+						+ "[(1-2)s:c|_2#1-2|"
+						+ "<>:div$<b>65<i>1<i>2<i>2<b>0<s>1|@:class=header$<i>2<s>1|@:class=title$<i>2<s>1]"
+						+ "[(2-3)s:b|_3#2-3|"
+						+ "<>:div$<b>65<i>2<i>3<i>5<b>0<s>1|@:class=book$<i>5<s>1]"
+						+ "[(3-4)s:a|_4#3-4|"
+						+ "<>:div$<b>65<i>3<i>5<i>5<b>0<s>1|@:class=title$<i>5<s>1]"
+						+ "[(4-5)s:b|_5#4-5|"
+						+ "<>:div$<b>65<i>4<i>5<i>5<b>0<s>1|@:class=header$<i>5<s>1|@:class=book$<i>5<s>1|@:class=title$<i>5<s>1]"
+						+ "[(5-6)s:d|_6#5-6|"						
+						+ "<>:div$<b>65<i>5<i>6<i>6<b>0<s>1|@:class=header$<i>6<s>1]"
+						+ "<>:s$<b>65<i>5<i>6<i>6<b>0<s>2|"
+						+ "[(6-7)s:d|_7#6-7|"
+						+ "<>:div$<b>65<i>6<i>7<i>7<b>0<s>1|"
+						+ "<>:s$<b>65<i>6<i>7<i>7<b>0<s>2|"
+						+ "@:class=header$<i>7<s>1|@:class=title$<i>7<s>1]");
 
         return fd;
     }
@@ -72,13 +103,26 @@
         fd.addTV(
                 "base",
                 "bcbabd",
-                "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>1|<>:div#0-3$<i>3<s>2|@:class=header$<i>3<s>2|@:class=book$<i>5<s>1|@:class=book$<i>3<s>2]"
-                        + "[(1-2)s:e|_2#1-2|<>:div#1-2$<i>2<s>1|<>:a#1-2$<i>2<s>2|@:class=book$<i>2<s>2|@:class=header$<i>2<s>1]"
-                        + "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|<>:a#1-2$<i>2<s>2|@:class=header$<i>2<s>2|@:class=book$<i>5<s>1]"
-                        + "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<i>5<s>1]"
-                        + "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<i>5<s>1|@:class=book$<i>5<s>1]"
-                        + "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<i>6<s>1]"
-                        + "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<i>7<s>1|@:class=book$<i>7<s>2]");
+				"[(0-1)s:b|_1#0-1|"
+						+ "<>:s$<b>65<i>0<i>5<i>5<b>0<s>1|"
+						+ "<>:div$<b>65<i>0<i>3<i>3<b>0<s>2|@:class=header$<i>3<s>2|@:class=book$<i>5<s>1|@:class=book$<i>3<s>2]"
+						+ "[(1-2)s:e|_2#1-2|"
+						+ "<>:div$<b>65<i>1<i>2<i>2<b>0<s>1|"
+						+ "<>:a$<b>65<i>1<i>2<i>2<b>0<s>2|@:class=book$<i>2<s>2|@:class=header$<i>2<s>1]"
+						+ "[(2-3)s:b|_3#2-3|"
+						+ "<>:div$<b>65<i>2<i>3<i>5<b>0<s>1|"
+						+ "<>:a$<b>65<i>1<i>2<i>2<b>0<s>2|@:class=header$<i>2<s>2|@:class=book$<i>5<s>1]"
+						+ "[(3-4)s:a|_4#3-4|"
+						+ "<>:div$<b>65<i>3<i>5<i>5<b>0<s>1|@:class=title$<i>5<s>1]"
+						+ "[(4-5)s:b|_5#4-5|"
+						+ "<>:div$<b>65<i>4<i>5<i>5<b>0<s>1|@:class=header$<i>5<s>1|@:class=book$<i>5<s>1]"
+						+ "[(5-6)s:d|_6#5-6|"
+						+ "<>:s$<b>65<i>5<i>6<i>6<b>0<s>1|"
+						+ "<>:div$<b>65<i>5<i>6<i>6<b>0<s>1|@:class=header$<i>6<s>1]"
+						+ "[(6-7)s:d|_7#6-7|"
+						+ "<>:div$<b>65<i>6<i>7<i>7<b>0<s>1|"
+						+ "<>:s$<b>65<i>6<i>7<i>7<b>0<s>2|"
+						+ "@:class=header$<i>7<s>1|@:class=book$<i>7<s>2]");
 
         return fd;
     }
@@ -97,21 +141,27 @@
         SpanAttributeQuery saq = new SpanAttributeQuery(new SpanTermQuery(
                 new Term("base", "@:class=header")), true);
 
-        // div with @class=header
-        SpanQuery sq = new SpanWithAttributeQuery(new SpanElementQuery("base",
-                "div"), saq, true);
+		SpanElementQuery seq = new SpanElementQuery("base", "div");
+
+		// div with @class=header
+		SpanQuery sq = new SpanWithAttributeQuery(seq, saq, true);
 
         kr = ki.search(sq, (short) 10);
 
-        assertEquals((long) 4, kr.getTotalResults());
-        assertEquals(0, kr.getMatch(0).getStartPos());
-        assertEquals(2, kr.getMatch(0).getEndPos());
-        assertEquals(0, kr.getMatch(1).getStartPos());
-        assertEquals(3, kr.getMatch(1).getEndPos());
-        assertEquals(3, kr.getMatch(2).getStartPos());
-        assertEquals(5, kr.getMatch(2).getEndPos());
-        assertEquals(6, kr.getMatch(3).getStartPos());
-        assertEquals(7, kr.getMatch(3).getEndPos());
+		// for (int i = 0; i < kr.getTotalResults(); i++) {
+		// System.out.println(kr.getMatch(i).getLocalDocID() + " "
+		// + kr.getMatch(i).startPos + " " + kr.getMatch(i).endPos);
+		// }
+		//
+		assertEquals((long) 4, kr.getTotalResults());
+		assertEquals(0, kr.getMatch(0).getStartPos());
+		assertEquals(2, kr.getMatch(0).getEndPos());
+		assertEquals(0, kr.getMatch(1).getStartPos());
+		assertEquals(3, kr.getMatch(1).getEndPos());
+		assertEquals(3, kr.getMatch(2).getStartPos());
+		assertEquals(5, kr.getMatch(2).getEndPos());
+		assertEquals(6, kr.getMatch(3).getStartPos());
+		assertEquals(7, kr.getMatch(3).getEndPos());
     }
 
 
@@ -217,10 +267,7 @@
         assertEquals(3, kr.getMatch(0).getStartPos());
         assertEquals(5, kr.getMatch(0).getEndPos());
 
-        //		for (int i = 0; i < kr.getTotalResults(); i++) {
-        //			System.out.println(kr.getMatch(i).getLocalDocID() + " "
-        //					+ kr.getMatch(i).startPos + " " + kr.getMatch(i).endPos);
-        //		}
+
     }
 
 
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
index 6309d59..8f61549 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
@@ -1,6 +1,6 @@
 package de.ids_mannheim.korap.index;
 
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
 
 import java.io.IOException;
 
@@ -10,11 +10,11 @@
 import org.junit.Test;
 
 import de.ids_mannheim.korap.KrillIndex;
-import de.ids_mannheim.korap.response.Result;
 import de.ids_mannheim.korap.query.DistanceConstraint;
 import de.ids_mannheim.korap.query.SpanDistanceQuery;
 import de.ids_mannheim.korap.query.SpanElementQuery;
 import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.response.Result;
 
 public class TestDistanceExclusionIndex {
 
@@ -122,6 +122,7 @@
                         false, true), true);
 
         kr = ki.search(sq, (short) 10);
+
         assertEquals((long) 1, kr.getTotalResults());
         assertEquals(9, kr.getMatch(0).getStartPos());
         assertEquals(10, kr.getMatch(0).getEndPos());
@@ -178,12 +179,14 @@
         FieldDocument fd = new FieldDocument();
         fd.addString("ID", "doc-0");
         fd.addTV("base", "text", "[(0-1)s:c|_1#0-1]" + "[(1-2)s:e|_2#1-2]"
-                + "[(2-3)s:c|_3#2-3|<>:y#2-4$<i>4]"
-                + "[(3-4)s:c|_4#3-4|<>:x#3-7$<i>7]"
-                + "[(4-5)s:d|_5#4-5|<>:y#4-6$<i>6]"
-                + "[(5-6)s:c|_6#5-6|<>:y#5-8$<i>8]" + "[(6-7)s:d|_7#6-7]"
-                + "[(7-8)s:e|_8#7-8|<>:x#7-9$<i>9]" + "[(8-9)s:e|_9#8-9]"
-                + "[(9-10)s:d|_10#9-10|<>:x#9-10$<i>10]");
+				+ "[(2-3)s:c|_3#2-3|<>:y$<b>64<i>2<i>4<i>4]"
+				+ "[(3-4)s:c|_4#3-4|<>:x$<b>64<i>3<i>7<i>7]"
+				+ "[(4-5)s:d|_5#4-5|<>:y$<b>64<i>4<i>6<i>6]"
+				+ "[(5-6)s:c|_6#5-6|<>:y$<b>64<i>5<i>8<i>8]"
+				+ "[(6-7)s:d|_7#6-7]"
+				+ "[(7-8)s:e|_8#7-8|<>:x$<b>64<i>7<i>9<i>9]"
+				+ "[(8-9)s:e|_9#8-9]"
+				+ "[(9-10)s:d|_10#9-10|<>:x$<b>64<i>9<i>10<i>10]");
         return fd;
     }