Fix ids for relation anchors to include end positions

Change-Id: Ia4563559cf8f6ec2ad1b27293229e716e47e53d6
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index ef4bf06..ba506dd 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -115,7 +115,7 @@
 
     private HashMap<Integer, String> annotationNumber = new HashMap<>(16);
     private HashMap<Integer, Relation> relationNumber = new HashMap<>(16);
-    private HashMap<Integer, Integer> identifierNumber = new HashMap<>(16);
+    private HashMap<Integer, String> identifierNumber = new HashMap<>(16);
 
     // -1 is match highlight
     int annotationNumberCounter = 256;
@@ -209,7 +209,7 @@
     /**
      * Private class of highlights.
 	 * TODO: This should probably be renamed, as it not only contains highlights
-	 * but also annotations and pagebreaks
+	 * but also annotations, pagebreaks and relations
      */
     private class Highlight {
         public int start, end;
@@ -221,6 +221,12 @@
             this.end = end;
             // TODO: This can overflow!
             this.number = relationNumberCounter++;
+
+			if (DEBUG) {
+				log.trace("Add relation (2) '{}': source={}-{} >> target={}-{}",
+						  annotation, start, end, refStart, refEnd);
+			};
+			
             relationNumber.put(this.number, new Relation(annotation, refStart, refEnd));
         };
 
@@ -433,7 +439,7 @@
 							 String annotation) {
 
 		if (DEBUG)
-			log.trace("Add relation '{}': source={}-{} >> target={}-{}",
+			log.trace("Add relation (1) '{}': source={}-{} >> target={}-{}",
 					  annotation, srcStart, srcEnd, targetStart, targetEnd);
 
 		// Add source token
@@ -450,16 +456,24 @@
 		};
 
         int id = identifierNumberCounter--;
-        identifierNumber.put(id, targetStart);
+
+		// Here is probably the problem: the identifier-number
+		// needs to incorporate targetEnd as well
 
 		// Add target token
-		if (targetEnd == -1) { // || targetStart == targetEnd) {
+		// (The last part was previously commented
+		// out for unknown reason)
+		if (targetEnd == -1 || targetStart == targetEnd) {
 			this.addHighlight(new Highlight(targetStart, targetStart, id));
+
+			identifierNumber.put(id, String.valueOf(targetStart));
 		}
 
 		// Add target span
 		else {
 			this.addHighlight(new Highlight(targetStart, targetEnd, id));
+			identifierNumber.put(id, targetStart + "-" + targetEnd);
+
 		};
     };
 
@@ -703,6 +717,29 @@
 		return this.getPosID(pos, -1);
 	};
 
+
+	/**
+     * Get identifier for a specific position.
+     * 
+     * @param String
+     *            Start and optional end position to get
+	 *            identifier on, separated by a dash.
+     */
+	@JsonIgnore
+    public String getPosID (String pos) {
+
+		String[] startEnd = pos.split("-");
+		if (startEnd.length == 2) {
+			return this.getPosID(
+				Integer.parseInt(startEnd[0]),
+				Integer.parseInt(startEnd[1])
+				);
+		}
+		return this.getPosID(Integer.parseInt(startEnd[0]), -1);
+	};
+
+	
+
     /**
      * Get identifier for a specific position.
      * 
@@ -712,10 +749,10 @@
      *            End position to get identifier on.
      */
     @JsonIgnore
-		public String getPosID (int start, int end) {
+	public String getPosID (int start, int end) {
 
 		if (DEBUG)
-			log.trace("Retrieve the identifier for pos");
+			log.trace("Retrieve identifier for position {}-{}", start, end);
 
         // Identifier already given
         if (this.identifier != null)
@@ -738,7 +775,7 @@
 
 		if (DEBUG)
 			log.trace(
-				"The identifier is {} in {} ({}-{}) {}",
+				"Identifier is {} in {} ({}-{}) {}",
 				id.toString(),
 				this.getTextSigle(),
 				this.getCorpusID(),
@@ -1262,6 +1299,7 @@
                 snippetArray.addClose(element[2]);
             }
 
+			// empty tag
 			else if (element[3] == 2) {
 
 				// Add Empty (pagebreak)
@@ -1349,10 +1387,16 @@
         // Iterate through all remaining elements
         sb.append("<span class=\"match\">");
         for (short i = start; i <= end; i++) {
+
 			elem = this.snippetArray.get(i);
 			// UNTESTED
-			if (elem != null)
-				sb.append(elem.toHTML(this, level, levelCache));
+			if (elem != null) {
+				String elemString = elem.toHTML(this, level, levelCache);
+				if (DEBUG) {
+					log.trace("Add node {}", elemString);
+				};
+				sb.append(elemString);
+			}
         };
         sb.append("</span>");
         sb.append(rightContext);
@@ -1493,23 +1537,44 @@
 			else if (openList.peekFirst()[0] < closeList.peekFirst()[1]) {
 
 				if (DEBUG)
-					log.debug("Open starts before close ends");
+					log.debug("Open tag starts before close tag ends");
 
                 int[] e = openList.removeFirst().clone();
 
 				// Mark as opener
                 e[3] = 1;
 
+				if (DEBUG) {
+
+					//      -1: match
+					//    < -1: relation target
+					// >= 2048: relation source
+					// >=  256: annotation
+					
+					log.trace(
+						"Add open with number {} to stack at {}-{}",
+						e[2], e[0], e[1]
+						);
+				};
+
 				// Add opener to stack
                 stack.add(e);
             }
 
 			else {
-				if (DEBUG)
+				int[] e = closeList.removeFirst();
+				
+				if (DEBUG) {
 					log.debug("Close ends before open");
 
+					log.trace(
+						"Add close with number {} to stack at {}-{}",
+						e[2], e[0], e[1]
+						);
+				};
+
 				// Add closener to stack
-                stack.add(closeList.removeFirst());
+                stack.add(e);
             };
         };
         return stack;
@@ -1854,7 +1919,7 @@
     // Yeah ... I mean ... why not?
     private void _filterMultipleIdentifiers () {
         ArrayList<Integer> removeDuplicate = new ArrayList<>(10);
-        HashSet<Integer> identifiers = new HashSet<>(20);
+        HashSet<String> identifiers = new HashSet<>(20);
         for (int i = 0; i < this.span.size(); i++) {
 
             // span is an int array: [Start, End, Number, Dummy]
@@ -1864,7 +1929,8 @@
             if (highlightNumber < -1) {
 
                 // Get the real identifier
-                int idNumber = identifierNumber.get(highlightNumber);
+                String idNumber =
+					identifierNumber.get(highlightNumber);
                 if (identifiers.contains(idNumber)) {
                     removeDuplicate.add(i);
                 }
@@ -1889,7 +1955,7 @@
      * Get identifier based on class number
      */
     @JsonIgnore
-    public int getClassID (int nr) {
+    public String getClassID (int nr) {
         return this.identifierNumber.get(nr);
     };
 
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
index 296fc33..0d22c0c 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinator.java
@@ -17,7 +17,7 @@
     private final static Logger log = LoggerFactory.getLogger(Match.class);
 
     // This advices the java compiler to ignore all loggings
-    public static final boolean DEBUG = false;
+    public static final boolean DEBUG = true;
 
     private LinkedList<HighlightCombinatorElement> combine;
     private Stack<Integer> balanceStack = new Stack<>();
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
index 93e4ff2..a74ba24 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
@@ -69,6 +69,7 @@
                 sb.append("<mark>");
             }
 
+			// This is a relation target
             else if (this.number < -1) {
                 sb.append("<span xml:id=\"")
                         .append(escapeHTML(
@@ -76,6 +77,7 @@
                         .append("\">");
             }
 
+			// This is an annotation
             else if (this.number >= 256) {
                 sb.append("<span ");
                 if (this.number < 2048) {
@@ -84,6 +86,8 @@
                                     match.getAnnotationID(this.number)))
                             .append('"');
                 }
+
+				// This is a relation source
                 else {
                     Relation rel = match.getRelationID(this.number);
 
@@ -101,7 +105,8 @@
                 sb.append('>');
             }
 
-            // Highlight - < 256
+            // This is a highlight
+			// < 256
             else {
                 // Get the first free level slot
                 byte pos;
@@ -119,7 +124,7 @@
             return sb.toString();
         }
 
-        // Closing
+        // This is a Closing tag
         else if (this.type == 2) {
             if (this.number < -1 || this.number >= 256)
                 return "</span>";
diff --git a/src/main/resources/log4j2.xml b/src/main/resources/log4j2.xml
index 2528735..bca8731 100644
--- a/src/main/resources/log4j2.xml
+++ b/src/main/resources/log4j2.xml
@@ -15,6 +15,7 @@
       <AppenderRef ref="Console"/>
     </Logger>
     <Logger name="de.ids_mannheim.korap.KrillIndex"
+            additivity="false"
             level="trace">
       <AppenderRef ref="Console"/>
     </Logger>
diff --git a/src/test/java/de/ids_mannheim/korap/TestIndexer.java b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
index f5aa676..7bf63e6 100644
--- a/src/test/java/de/ids_mannheim/korap/TestIndexer.java
+++ b/src/test/java/de/ids_mannheim/korap/TestIndexer.java
@@ -43,7 +43,7 @@
     public void testMultipleInputFiles () throws IOException {

         Indexer.main(new String[] { "-c", "src/test/resources/krill.properties",

                 "-i", "src/test/resources/wiki" });

-        assertEquals("Indexed 15 files.", outputStream.toString());

+        assertEquals("Indexed 16 files.", outputStream.toString());

     }

 

     @Test

diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index b455116..11f3d4a 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -339,6 +339,70 @@
 	};
 
 
+	@Test
+    public void snippetBugTest2 () throws IOException, QueryException {
+        KrillIndex ki = new KrillIndex();
+        ki.addDoc(getClass().getResourceAsStream("/wiki/wdd17-982-72848.json.gz"), true);
+        ki.commit();
+
+        Match km = ki.getMatchInfo("match-WDD17/982/72848-p15844-15846", "tokens",
+								   "lwc", "d", true, true, true);
+
+		// TODO:
+		//   This test is broken - it should not introduce
+		//   IDs multiple times
+		String snippet = km.getSnippetHTML();
+		assertEquals(
+			"SnippetBrackets (with Spans)",
+			snippet,
+			"<span class=\"context-left\"></span>"+
+			"<span class=\"match\">"+
+			"<span xml:id=\"token-WDD17/982/72848-p15836-15839\">"+
+			"<span xlink:title=\"lwc/d:NK\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15838\">Ein</span>"+
+			" "+
+			"<span xlink:title=\"lwc/d:NK\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15838\">letztes</span>"+
+			" "+
+			"<span xml:id=\"token-WDD17/982/72848-p15838\">"+
+			"<span xlink:title=\"lwc/d:--\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15836-15839\">mal</span>"+
+			"</span>"+
+			": "+
+			"<span xml:id=\"token-WDD17/982/72848-p15839-15840\">"+
+			"<span xlink:title=\"lwc/d:--\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15839-15840\">AL</span>"+
+			"</span>"+
+			"</span>"+
+			"<span xml:id=\"token-WDD17/982/72848-p15839-15840\">"+
+			":"+
+			"<span xml:id=\"token-WDD17/982/72848-p15840-15846\">"+
+			"<span xml:id=\"token-WDD17/982/72848-p15840\">"+
+			"<span xlink:title=\"lwc/d:--\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15840-15846\">halt</span>"+
+			"</span>"+
+			"</span>"+
+			"</span>"+
+			"<span xml:id=\"token-WDD17/982/72848-p15840-15846\">"+
+			" "+
+			"<span xlink:title=\"lwc/d:NK\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15842\">den</span>"+
+			" "+
+			"<span xml:id=\"token-WDD17/982/72848-p15842\">"+
+			"<span xlink:title=\"lwc/d:OA\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15843\">Ball</span>"+
+			"</span>"+
+			" "+
+			"<span xml:id=\"token-WDD17/982/72848-p15843\">"+
+			"<span xlink:title=\"lwc/d:PD\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15840\">flach</span>"+
+			"</span>"+
+			", "+
+			"<mark>"+
+			"<span xlink:title=\"lwc/d:MO\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15845\">ganz</span>"+
+			" "+
+			"<span xml:id=\"token-WDD17/982/72848-p15845\">"+
+			"<span xlink:title=\"lwc/d:CJ\" xlink:type=\"simple\" xlink:href=\"#token-WDD17/982/72848-p15843\">flach</span>"+
+			"</span>"+
+			"</mark>"+
+			"</span>"+
+			"</span>"+
+			"<span class=\"context-right\"></span>"
+			);
+	};
+
     @Test
     public void indexExample5Spans () throws IOException, QueryException {
         KrillIndex ki = new KrillIndex();
@@ -774,28 +838,35 @@
         					 "tokens", "malt", null, true, false);
 
 		        assertEquals("SnippetHTML (1)",
-					 "<span class=\"context-left\"></span>"+
+							 "<span class=\"context-left\">"+
+							 "</span>"+
 							 "<span class=\"match\">"+
+							 "<span xml:id=\"token-Corpus/Doc/0002-p0-6\">"+
 							 "<mark>"+
 							 "<span xml:id=\"token-Corpus/Doc/0002-p0\">"+
 							 "<span xlink:title=\"malt/d:ROOT\" xlink:type=\"simple\" xlink:href=\"#token-Corpus/Doc/0002-p0-6\">Maximen</span>"+
-							 "</span> "+
+							 "</span>"+
+							 " "+
 							 "<span xml:id=\"token-Corpus/Doc/0002-p1\">"+
 							 "<span xlink:title=\"malt/d:KON\" xlink:type=\"simple\" xlink:href=\"#token-Corpus/Doc/0002-p0\">und</span>"+
-							 "</span> "+
-							 "<span xlink:title=\"malt/d:CJ\" xlink:type=\"simple\" xlink:href=\"#token-Corpus/Doc/0002-p1\">Reflexionen</span> "+
-							 "<span xml:id=\"token-Corpus/Doc/0002-p3\">" +
+							 "</span>"+
+							 " "+
+							 "<span xlink:title=\"malt/d:CJ\" xlink:type=\"simple\" xlink:href=\"#token-Corpus/Doc/0002-p1\">Reflexionen</span>"+
+							 " "+
+							 "<span xml:id=\"token-Corpus/Doc/0002-p3\">"+
 							 "<span xlink:title=\"malt/d:KON\" xlink:type=\"simple\" xlink:href=\"#token-Corpus/Doc/0002-p0\">Religion</span>"+
-							 "</span> "+
+							 "</span>"+
+							 " "+
 							 "<span xml:id=\"token-Corpus/Doc/0002-p4\">"+
 							 "<span xlink:title=\"malt/d:KON\" xlink:type=\"simple\" xlink:href=\"#token-Corpus/Doc/0002-p3\">und</span>"+
-							 "</span> "+
+							 "</span>"+
+							 " "+
 							 "<span xlink:title=\"malt/d:CJ\" xlink:type=\"simple\" xlink:href=\"#token-Corpus/Doc/0002-p4\">Christentum</span>"+
 							 "</mark>"+
 							 "</span>"+
-							 "<span class=\"context-right\">"+
-							 "<span class=\"more\">"+
 							 "</span>"+
+							 "<span class=\"context-right\">"+
+							 "<span class=\"more\"></span>"+
 							 "</span>",
 							 km.getSnippetHTML());
 	};
diff --git a/src/test/resources/wiki/wdd17-982-72848.json.gz b/src/test/resources/wiki/wdd17-982-72848.json.gz
new file mode 100644
index 0000000..8f15a4b
--- /dev/null
+++ b/src/test/resources/wiki/wdd17-982-72848.json.gz
Binary files differ