Support general inline markers (instead of just pagebreaks) (fixes #132)

Change-Id: If7318080d5195387cd414e1741d87d032c817d7a
diff --git a/Changes b/Changes
index 601eb41..6c66f00 100644
--- a/Changes
+++ b/Changes
@@ -1,7 +1,8 @@
-0.62.4 2024-05-22
+0.62.4 2024-05-27
     - [feature] Make match and context size configurable (address #128, 
       diewald & margaretha)
     - [enhancement] Separate max length for token and char context (margaretha)  
+    - [feature] Support for inline markers (fixes #132, diewald)
     
 0.62.3 2024-04-16
     - [cleanup] Added getDocBitsSupplier to VirtualCorpusFilter (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index d1e3cde..3f68608 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -1584,7 +1584,8 @@
                     // Add snippet if existing
                     if (snippets) {
                         match.setContext(kr.getContext());
-                        match.retrievePagebreaks("~:base/s:pb");
+                        match.retrieveMarkers("~:base/s:pb");
+                        match.retrieveMarkers("~:base/s:marker");
 
                         if (DEBUG)
                             log.trace("Retrieve pagebreaks from index");
diff --git a/src/main/java/de/ids_mannheim/korap/index/MultiTerm.java b/src/main/java/de/ids_mannheim/korap/index/MultiTerm.java
index ae49892..d06b00c 100644
--- a/src/main/java/de/ids_mannheim/korap/index/MultiTerm.java
+++ b/src/main/java/de/ids_mannheim/korap/index/MultiTerm.java
@@ -1,10 +1,9 @@
 package de.ids_mannheim.korap.index;
 
-import static de.ids_mannheim.korap.util.KrillArray.*;
 import de.ids_mannheim.korap.util.CorpusDataException;
 import org.apache.lucene.util.BytesRef;
 import java.nio.ByteBuffer;
-import java.util.*;
+import java.nio.charset.StandardCharsets;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -51,7 +50,7 @@
     private boolean storeOffsets = false;
     public BytesRef payload = null;
 
-    private static ByteBuffer bb = ByteBuffer.allocate(8);
+    private static ByteBuffer bb = ByteBuffer.allocate(64);
     private static String[] stringOffset;
 
     private static short i, l;
@@ -403,7 +402,6 @@
 
                 try {
                     for (i = 1; i < pls.length;) {
-
                         // Resize the bytebuffer
                         if ((bb.capacity() - l) < 8) {
                             bb = ByteBuffer.allocate(bb.capacity() + 8)
@@ -428,6 +426,20 @@
                                 bb.putLong(Long.parseLong(pls[i]));
                                 l += 8;
                                 break;
+                            case "<x>": // bytes
+
+                                byte[] data = pls[i].getBytes(StandardCharsets.UTF_8);
+                                
+                                if ((bb.capacity() - l) < (data.length + 4)) {
+                                    bb = ByteBuffer.allocate(bb.capacity() + data.length + 4)
+                                            .put(bb.array());
+                                    bb.position(l);
+                                };
+
+                                bb.putInt(data.length);
+                                bb.put(data);
+                                l += data.length + 4;
+                                break;
                         };
                         i += 2;
                     };
diff --git a/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java b/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java
index 50e39aa..f777184 100644
--- a/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java
+++ b/src/main/java/de/ids_mannheim/korap/index/MultiTermTokenStream.java
@@ -56,7 +56,7 @@
     private int mttIndex = 0, mtIndex = 0;
     private short i = 0;
 
-    private ByteBuffer payload = ByteBuffer.allocate(36);
+    private ByteBuffer payload = ByteBuffer.allocate(512);
 
 
     /**
diff --git a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
index efb5535..f6eecdc 100644
--- a/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
+++ b/src/main/java/de/ids_mannheim/korap/index/TermInfo.java
@@ -97,6 +97,12 @@
                 tterm = tterm.substring(2);
                 break;
 
+            case '~':
+                this.type = "mark";
+                ttype = 5;
+                tterm = tterm.substring(2);
+                break;
+
             default:
                 // term
                 this.type = "term";
@@ -131,6 +137,18 @@
             };
         }
 
+        else if (ttype == 5) {
+            pti = this.payload.get(); // Ignore PTI - temporary!!!
+            this.value = tterm;
+
+            // This also means that the startchar may contain
+            // the position (aka page number) of the marker
+            this.startChar = this.payload.getInt();
+            this.endChar = this.payload.getInt();
+
+            // This is interesting, because we may have an <x> here!
+        }
+
         // for positions (aka offset tokens)
         else {
             this.value = tterm;
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 5bb6ad1..7d91745 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -13,6 +13,8 @@
 import java.util.LinkedList;
 import java.util.List;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
@@ -92,12 +94,13 @@
 
     // Logger
     private final static Logger log = LoggerFactory.getLogger(Match.class);
-	
+
 	// end marker of highlights that are pagebreaks
 	private static final int PB_MARKER = -99999;
+    private static final int ALL_MARKER = -99998;
 
 	// Textual elements that are in context
-	private static final int CONTEXT = -99998;
+	private static final int CONTEXT = -99997;
 
     // This advices the java compiler to ignore all loggings
     public static final boolean DEBUG = false;
@@ -237,7 +240,7 @@
     /**
      * Private class of highlights.
 	 * TODO: This should probably be renamed, as it not only contains highlights
-	 * but also annotations, pagebreaks and relations
+	 * but also annotations, markers, pagebreaks and relations
      */
     private class Highlight {
         public int start, end;
@@ -285,6 +288,19 @@
 			this.end = PB_MARKER;
 			this.number = pagenumber;
 		};
+
+		// Marker
+		public Highlight (int start, String marker) {
+			this.start = start;
+			this.end = ALL_MARKER;
+
+            // TODO: This can overflow!
+            if (annotationNumberCounter < 2048) {
+                this.number = annotationNumberCounter++;
+                annotationNumber.put(this.number, marker);
+            };
+		};
+
     };
 
 
@@ -513,6 +529,11 @@
 		this.addHighlight(new Highlight(start, pagenumber));
 	};
 
+	public void addMarker (int start, String data) {
+		this.addHighlight(new Highlight(start, data));
+	};
+
+
     /**
      * Get document id.
      */
@@ -562,7 +583,7 @@
 
         // Iterate over highlights to find matching class
         for (Highlight h : this.highlight) {
-            if (h.number == number && h.end != PB_MARKER)
+            if (h.number == number && h.end != PB_MARKER && h.end != ALL_MARKER)
                 return h.start;
         };
 
@@ -734,7 +755,7 @@
         // There are highlights to integrate
         if (this.highlight != null) {
             for (Highlight h : this.highlight) {
-                if (h.number >= 256 || h.end == PB_MARKER)
+                if (h.number >= 256 || h.end == PB_MARKER || h.end == ALL_MARKER)
                     continue;
 
                 // Add highlight to the snippet
@@ -767,7 +788,11 @@
 	@JsonIgnore
     public String getPosID (String pos) {
 
-		String[] startEnd = pos.split("-");
+        if (pos == null) {
+            return "";
+        };
+
+        String[] startEnd = pos.split("-");
 		if (startEnd.length == 2) {
 			return this.getPosID(
 				Integer.parseInt(startEnd[0]),
@@ -845,35 +870,35 @@
     };  
 
 	
-	// Retrieve pagebreaks in a certain area
-	public List<int[]> retrievePagebreaks (String pb) {
+	// Retrieve markers in a certain area
+	public List<int[]> retrieveMarkers (String marker) {
 		if (this.positionsToOffset != null) {
-			return this.retrievePagebreaks(
+			return this.retrieveMarkers(
 				this.positionsToOffset.getLeafReader(),
 				(Bits) null,
 				"tokens",
-				pb
+				marker
 				);
 		};
 
 		return null;
 	};
 
-	// Retrieve pagebreaks in a certain area
+	// Retrieve markers in a certain area
     // THIS IS NOT VERY CLEVER - MAKE IT MORE CLEVER!
-    public List<int[]> retrievePagebreaks (LeafReaderContext atomic,
+    public List<int[]> retrieveMarkers (LeafReaderContext atomic,
 										   Bits bitset,
 										   String field,
-										   String pb) {
+										   String marker) {
 
-		// List of relevant pagebreaks
+		// List of relevant pagebreaks - only used for pagebreak markers!
 		List<int[]> pagebreaks = new ArrayList<>(24);
 
 		int charOffset = 0, pagenumber = 0, start = 0;
 
 		if (DEBUG) {
             log.debug("=================================");
-			log.debug("Retrieve pagebreaks between {}-{}",
+			log.debug("Retrieve markers between {}-{}",
 					  this.getStartPos(),
 					  this.getEndPos());
         };
@@ -881,37 +906,37 @@
 		try {
 
             // Store character offsets in ByteBuffer
-            ByteBuffer bb = ByteBuffer.allocate(16);
+            ByteBuffer bb = ByteBuffer.allocate(256);
 
-			// Store last relevant pagebreak in byte array
+			// Store last relevant marker in byte array
 			byte[] b = null;
 
-			SpanTermQuery stq = new SpanTermQuery(new Term(field, pb));
+			SpanTermQuery stq = new SpanTermQuery(new Term(field, marker));
 
 			if (DEBUG)
-				log.trace("Check pagebreaks with {}", stq.toString());
+				log.trace("Check markers with {}", stq.toString());
 
-			Spans pagebreakSpans = stq.getSpans(
+			Spans markerSpans = stq.getSpans(
 				atomic, bitset, new HashMap<Term, TermContext>()
 				);
 
 			// Iterate over all pagebreaks
-			while (pagebreakSpans.next() == true) {
+			while (markerSpans.next() == true) {
 
 				if (DEBUG) {
 					log.debug("There is a pagebreak at {}/{} and we are at {}",
-							  pagebreakSpans.doc(),
-							  pagebreakSpans.start(),
+							  markerSpans.doc(),
+							  markerSpans.start(),
                               this.localDocID);
 				};
 				
 				// Current pagebreak is not in the correct document
-                if (pagebreakSpans.doc() != this.localDocID) {
-                    if (pagebreakSpans.doc() < this.localDocID) {
-                        pagebreakSpans.skipTo(this.localDocID);
+                if (markerSpans.doc() != this.localDocID) {
+                    if (markerSpans.doc() < this.localDocID) {
+                        markerSpans.skipTo(this.localDocID);
                         
                         // No pagebreaks in this document
-                        if (pagebreakSpans.doc() != this.localDocID)
+                        if (markerSpans.doc() != this.localDocID)
                             break;
                     }
                     else {
@@ -921,19 +946,19 @@
                 };
 
 				if (DEBUG)
-					log.debug("The pagebreak occurs in the document");
-				
-				// There is a pagebreak found - check,
+					log.debug("The marker occurs in the document");
+
+				// There is a marker found - check,
 				// if it is in the correct area
-				if (pagebreakSpans.start() <= this.getStartPos()) {
+				if (markerSpans.start() < this.getStartPos()) {
 
 					// Only the first payload is relevant
-					b = pagebreakSpans.getPayload().iterator().next();
-					start = pagebreakSpans.start();
+					b = markerSpans.getPayload().iterator().next();
+					start = markerSpans.start();
 
                     if (DEBUG)
-						log.debug("PB start position is before match at {}:{}",
-								  pagebreakSpans.start(),
+						log.debug("Marker start position is before match at {}:{}",
+								  markerSpans.start(),
                                   b);
 					
 				}
@@ -941,48 +966,85 @@
 				// This is the first pagebreak inside the match!
 				else {
 
-					// b is already defined!
+                    // b is already defined!
+                    // This may be due to the last next
 					if (b != null) {
 						bb.rewind();
 						bb.put(b);
 						bb.rewind();
 
 						pagenumber = bb.getInt();
-						charOffset = bb.getInt();
+                        charOffset = bb.getInt();
 
-						if (DEBUG)
-							log.debug("Add pagebreak to list: {}-{}", charOffset, pagenumber);
+                        // This marker is a pagebreak
+                        if (pagenumber != 0) {
+                            if (DEBUG)
+						    	log.debug("Add pagebreak to list: {}-{}", charOffset, pagenumber);
 						
-						// This is the first pagebreak!
-						pagebreaks.add(new int[]{charOffset, pagenumber});
+						    // This is the first pagebreak!
+						    pagebreaks.add(new int[]{charOffset, pagenumber});
                         
-						if (start >= this.getStartPos()) {
+						    if (start >= this.getStartPos()) {
+    							if (DEBUG)
+	    							log.debug("Add marker to rendering: {}-{}",
+		    								  charOffset,
+			    							  pagenumber);
+				    			this.addPagebreak(charOffset, pagenumber);
+					    	};
 
-							if (DEBUG)
-								log.debug("Add pagebreak to rendering: {}-{}",
-										  charOffset,
-										  pagenumber);
-							this.addPagebreak(charOffset, pagenumber);
-						};
+                            // This marker is no pagebreak
+                        } else {
+                            int bytelength = bb.getInt();
+                            byte[] anno = new byte[bytelength];
+                            bb.get(anno, 0, bytelength);
+                            String annoStr = new String(anno, StandardCharsets.UTF_8);
+                            this.addMarker(charOffset, annoStr);
+                        }
+
                         b = null;
 					}
 
 					// b wasn't used yet
-					if (pagebreakSpans.start() <= this.getEndPos()) {
+					if (markerSpans.start() <= this.getEndPos()) {
 
 						// Set new pagebreak
 						// Only the first payload is relevant
-						b = pagebreakSpans.getPayload().iterator().next();
+						b = markerSpans.getPayload().iterator().next();
 						bb.rewind();
 						bb.put(b);
 						bb.rewind();
 							
 						pagenumber = bb.getInt();
 						charOffset = bb.getInt();
+
+                        // This marker is a pagebreak
+                        if (pagenumber != 0) {
+                            if (DEBUG)
+						    	log.debug("Add pagebreak to list: {}-{}", charOffset, pagenumber);
 						
-						// This is the first pagebreak!
-						pagebreaks.add(new int[]{charOffset, pagenumber});
-						this.addPagebreak(charOffset,pagenumber);
+						    // This is the first pagebreak!
+						    pagebreaks.add(new int[]{charOffset, pagenumber});
+                        
+						    if (start >= this.getStartPos()) {
+    							if (DEBUG)
+	    							log.debug("Add pagebreak to rendering: {}-{}",
+		    								  charOffset,
+			    							  pagenumber);
+				    			this.addPagebreak(charOffset, pagenumber);
+					    	};
+
+                        
+                        }
+                            // This marker is no pagebreak
+                        else {
+                            int bytelength = bb.getInt();
+
+                            byte[] anno = new byte[bytelength];
+                            bb.get(anno);
+                            String annoStr = new String(anno, StandardCharsets.UTF_8);
+                            this.addMarker(charOffset, annoStr);
+                        }
+
                         b = null;
 					}
 
@@ -993,6 +1055,7 @@
 				};
 			};
 
+            // That's identical to the above approach and should only occur once
             if (b != null) {
                 bb.rewind();
                 bb.put(b);
@@ -1001,21 +1064,34 @@
                 pagenumber = bb.getInt();
                 charOffset = bb.getInt();
 
-                if (DEBUG)
-                    log.debug("Add pagebreak to list: {}-{}", charOffset, pagenumber);
-						
-                // This is a remembered pagebreak!
-                pagebreaks.add(new int[]{charOffset, pagenumber});
+                // This marker is a pagebreak
+                if (pagenumber != 0) {
 
-                if (start >= this.getStartPos()) {
-                                            
                     if (DEBUG)
-                        log.debug("Add pagebreak to rendering: {}-{}",
-                                  charOffset,
-                                  pagenumber);
-                    this.addPagebreak(charOffset, pagenumber);
-                };
+                        log.debug("Add pagebreak to list: {}-{}", charOffset, pagenumber);
+						
+                    // This is a remembered pagebreak!
+                    pagebreaks.add(new int[]{charOffset, pagenumber});
 
+                    if (start >= this.getStartPos()) {
+                                            
+                        if (DEBUG)
+                            log.debug("Add pagebreak to rendering: {}-{}",
+                                      charOffset,
+                                      pagenumber);
+                        this.addPagebreak(charOffset, pagenumber);
+                    };
+                }
+                // This marker is no pagebreak
+                else {
+                    int bytelength = bb.getInt();
+                    
+                    byte[] anno = new byte[bytelength];
+                    bb.get(anno);
+                    String annoStr = new String(anno, StandardCharsets.UTF_8);
+                    this.addMarker(charOffset, annoStr);
+                }
+                
                 b = null;
             };
 		}
@@ -1218,7 +1294,7 @@
                         && hl.end <= this.getEndPos()) {
 
 					// Highlight is no pagebreak
-					if (hl.end != PB_MARKER) {
+					if (hl.end != PB_MARKER && hl.end != ALL_MARKER) {
 						pto.add(this.localDocID, hl.start);
 						pto.add(this.localDocID, hl.end);
 
@@ -1381,13 +1457,19 @@
                 snippetArray.addClose(element[2]);
             }
 
-			// empty tag
+			// empty tag (pagebreak)
 			else if (element[3] == 2) {
 
 				// Add Empty (pagebreak)
                 snippetArray.addEmpty(element[2]);
-			}
-			
+			}            
+
+            // empty tag (marker)
+            else if (element[3] == 3) {
+
+                // Add Empty (pagebreak)
+                snippetArray.addMarker(element[2]);
+            } 
 
 			// open tag
             else {
@@ -1555,7 +1637,7 @@
                     continue;
 
                 // Highlight is a pagebreak
-                if (highlight.end == PB_MARKER)
+                if (highlight.end == PB_MARKER || highlight.end == ALL_MARKER)
                     continue;
 
                 if (classes == null)
@@ -1769,7 +1851,9 @@
 				if (DEBUG)
 					log.debug("No more open tags -- close all non pagebreaks");
 
-				if (closeList.peekFirst()[1] != PB_MARKER) {
+                int pf = closeList.peekFirst()[1];
+
+				if (pf != PB_MARKER && pf != ALL_MARKER) {
 					stack.add(closeList.removeFirst());
 				}
 				else if (DEBUG) {
@@ -1785,26 +1869,34 @@
                 break;
             };
 
-			// Closener is pagebreak
-			if (closeList.peekFirst()[1] == PB_MARKER) {
+            int clpf = closeList.peekFirst()[1];
+            int olpf = openList.peekFirst()[1];
+
+			// Closener is pagebreak or marker
+			if (clpf == PB_MARKER || clpf == ALL_MARKER) {
 
 				if (DEBUG)
-					log.debug("Close is pagebreak -- ignore (2)");
+					log.debug("Close is pagebreak or a marker -- ignore (2)");
 
 				// Remove closing pagebreak
 				closeList.removeFirst();
 			}
 
-			// Opener is pagebreak
-			else if (openList.peekFirst()[1] == PB_MARKER) {
+			// Opener is pagebreak or marker
+			else if (olpf == PB_MARKER || olpf == ALL_MARKER) {
 				int[] e = openList.removeFirst().clone();
 
 				if (DEBUG)
-					log.debug("Open is pagebreak");
+					log.debug("Open is pagebreak or a marker");
 
 				// Mark as empty
                 e[1] = e[0]; // Remove pagebreak marker
-                e[3] = 2;
+
+                if (olpf == PB_MARKER) {
+                    e[3] =  2;
+                } else {
+                    e[3] = 3;
+                };
 
 				// Add empty pagebreak
 				stack.add(e);
@@ -1958,7 +2050,7 @@
                 int end = -1;
 
 				// Highlight is a pagebreak
-				if (highlight.end != PB_MARKER) {
+				if (highlight.end != PB_MARKER && highlight.end != ALL_MARKER) {
 					start = this.positionsToOffset.start(ldid, highlight.start);
 					end = this.positionsToOffset.end(ldid, highlight.end);
 				}
@@ -1980,14 +2072,14 @@
                 start -= startOffsetChar;
 
 				// Keep end equal -1
-				if (end != PB_MARKER) {
+				if (end != PB_MARKER && end != ALL_MARKER) {
 					end -= startOffsetChar;
 				}
 				else if (DEBUG) {
 					log.debug("Pagebreak keeps end position");
 				};
 
-                if (start < 0 || (end < 0 && end != PB_MARKER))
+                if (start < 0 || (end < 0 && end != PB_MARKER && end != ALL_MARKER))
                     continue;
 
                 // Create intArray for highlight
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
index 36360c5..0a72156 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
@@ -83,6 +83,11 @@
         this.combine.add(new HighlightCombinatorElement((byte) 3, pagenumber));
     };
 
+    // Add marker highlight to the stack
+    public void addMarker (int annonumber) {
+        this.combine.add(new HighlightCombinatorElement((byte) 4, annonumber));
+    };
+
 
     // Add closing highlight combinator to the stack
     public void addClose (int number) {
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
index c39825a..d72d46b 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
@@ -2,10 +2,8 @@
 
 import org.apache.lucene.util.FixedBitSet;
 import de.ids_mannheim.korap.response.Match;
-import de.ids_mannheim.korap.response.match.Relation;
 import static de.ids_mannheim.korap.util.KrillString.*;
 import java.util.*;
-import java.io.*;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -16,13 +14,14 @@
 public class HighlightCombinatorElement {
 
 	// Number -1:     Match
-	// Number -99998: Context
-	private final static int CONTEXT = -99998;
+	// Number -99997: Context
+	private final static int CONTEXT = -99997;
 	
     // Type 0: Textual data
     // Type 1: Opening
     // Type 2: Closing
-	// Type 3: Empty
+	// Type 3: Empty (pagebreak)
+    // Type 4: Empty (marker)
     public byte type;
 
     public int number = 0;
@@ -168,6 +167,12 @@
 		// Empty element
 		else if (this.type == 3) {
 			return "<span class=\"pb\" data-after=\"" + number + "\"></span>";
+		}
+        
+        // Marker
+		else if (this.type == 4) {
+            String[] parts = match.getAnnotationID(this.number).split(":", 2);
+			return "<span class=\"inline-marker\" data-key=\"" + escapeHTML(parts[0]) + "\" data-value=\"" + escapeHTML(parts[1]) + "\"></span>";
 		};
 
         // HTML encode primary data
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
index 93d7925..4492313 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestPagebreakIndex.java
@@ -63,11 +63,13 @@
         Result kr = ki.search(sq, (short) 10);
         assertEquals(4, kr.getMatches().size());
 
+        // Doc 0
         assertEquals(2, kr.getMatch(0).getStartPos());
 		assertEquals(3, kr.getMatch(0).getEndPos());
 		assertEquals(-1, kr.getMatch(0).getStartPage());
 		assertEquals(-1, kr.getMatch(0).getEndPage());
 
+        // Doc 1
         assertEquals(2, kr.getMatch(1).getStartPos());
 		assertEquals(3, kr.getMatch(1).getEndPos());
 		assertEquals(528, kr.getMatch(1).getStartPage());
@@ -75,8 +77,10 @@
 
         assertEquals(5, kr.getMatch(2).getStartPos());
 		assertEquals(6, kr.getMatch(2).getEndPos());
-		assertEquals(529, kr.getMatch(2).getStartPage());
-		assertEquals(-1, kr.getMatch(2).getEndPage());
+		assertEquals(528, kr.getMatch(2).getStartPage());
+        assertEquals("<span class=\"context-left\">abcab</span><span class=\"match\"><mark>c</mark></span><span class=\"context-right\">abac</span>",
+                     kr.getMatch(2).getSnippetHTML());
+		assertEquals(529, kr.getMatch(2).getEndPage()); // Debatable
 
         assertEquals(9, kr.getMatch(3).getStartPos());
 		assertEquals(10, kr.getMatch(3).getEndPos());
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index 4db8b15..ec38fa4 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -740,6 +740,29 @@
 		assertEquals(529, res.at("/pages/0").asInt());
     };
 
+    @Test
+    public void searchJSONwithUtteranceAttributes () throws IOException {
+        // Construct index
+        KrillIndex ki = new KrillIndex();
+        // Indexing test files
+        FieldDocument fd = ki.addDoc(1,
+                getClass().getResourceAsStream("/others/kokokom-example.json.gz"), true);
+        ki.commit();
+
+        assertEquals(fd.getUID(), 1);
+        assertEquals(fd.getTextSigle(), "KTC/001/000001");
+
+        Krill ks = new Krill(new QueryBuilder("tokens").seg("s:Räuspern"));
+        Result kr = ks.apply(ki);
+
+        assertEquals(1, kr.getTotalResults());
+        assertEquals(0, kr.getStartIndex());
+        assertEquals(25, kr.getItemsPerPage());
+        Match m = kr.getMatch(0);
+        assertEquals("<span class=\"context-left\"></span><span class=\"match\"><span class=\"inline-marker\" data-key=\"who\" data-value=\"Mai Thi Nguyen-Kim\"></span><span class=\"inline-marker\" data-key=\"start\" data-value=\"0:00\"></span><span class=\"inline-marker\" data-key=\"end\" data-value=\"01:20\"></span>(<mark>Räuspern</mark></span><span class=\"context-right\">) Wie viele Geschlechter gibt es? Wenn<span class=\"more\"></span></span>", m.getSnippetHTML());
+    };
+
+    
 
     @Test
     public void searchJSONnewJSON2 () throws IOException {
@@ -1463,7 +1486,7 @@
         assertEquals(2, kr.getMatch(0).getStartPos());
         assertEquals(52, kr.getMatch(0).getEndPos());
         assertEquals(kr.getMatch(0).getSnippetBrackets(),
-                "Maximen und [[Reflexionen Religion und Christentum. wir sind naturforschend Pantheisten, dichtend Polytheisten, sittlich Monotheisten. Gott, wenn wir hoch stehen, ist alles; stehen wir niedrig, so ist er ein Supplement unsrer Armseligkeit. die Kreatur ist sehr schwach; denn sucht sie etwas, findet sie's nicht. stark aber ist Gott; denn sucht er die Kreatur]<!>], so hat er sie gleich in ...");
+                     "Maximen und [[Reflexionen Religion und Christentum. wir sind naturforschend Pantheisten, dichtend Polytheisten, sittlich Monotheisten. Gott, wenn wir hoch stehen, ist alles; stehen wir niedrig, so ist er ein Supplement unsrer Armseligkeit. die Kreatur ist sehr schwach; denn sucht sie etwas, findet sie's nicht. stark aber ist Gott; denn sucht er die Kreatur]<!>], so hat er sie gleich in ...");
         assertEquals(kr.getMatch(0).getSnippetHTML(),
                 "<span class=\"context-left\">Maximen und </span><span class=\"match\"><mark>Reflexionen Religion und Christentum. wir sind naturforschend Pantheisten, dichtend Polytheisten, sittlich Monotheisten. Gott, wenn wir hoch stehen, ist alles; stehen wir niedrig, so ist er ein Supplement unsrer Armseligkeit. die Kreatur ist sehr schwach; denn sucht sie etwas, findet sie's nicht. stark aber ist Gott; denn sucht er die Kreatur</mark><span class=\"cutted\"></span></span><span class=\"context-right\">, so hat er sie gleich in<span class=\"more\"></span></span>");
         assertEquals(kr.getMatch(0).getTextSigle(), "GOE_AGX.00002");
diff --git a/src/test/resources/others/kokokom-example.json.gz b/src/test/resources/others/kokokom-example.json.gz
new file mode 100644
index 0000000..d0ed313
--- /dev/null
+++ b/src/test/resources/others/kokokom-example.json.gz
Binary files differ