Fixed escaping in snippets (HTML and brackets)

Change-Id: I51b4b44998e6bfb0750e716c82d57ea5a820c741
diff --git a/Changes b/Changes
index 6bbcacf..e643469 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.55.6 2016-06-29
+0.55.6 2016-07-27
         - [bugfix] distance with key "t" uses default foundry (diewald)
 	- [cleanup] Renamed fromJson() to fromKoral() (diewald)
 	- [cleanup] Removed deprecated methods in Krill:
@@ -13,6 +13,7 @@
 	- [feature] Added document method to Web-API (diewald)
 	- [feature] Added experimental KrillStats class (diewald)
 	- [bugfix] Escape quotes in JSON strings (diewald)
+	- [bugfix] Escape XML and Brackets in snippets correctly (diewald)
 
 0.55.5 2016-05-02
 	- [performance] Changed to a dynamic window for sorting in FocusSpans (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
index 55837b8..42f1c08 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/HighlightCombinatorElement.java
@@ -58,7 +58,7 @@
 
             else if (this.number < -1) {
                 sb.append("<span xml:id=\"")
-                        .append(match.getPosID(match.getClassID(this.number)))
+                    .append(escapeHTML(match.getPosID(match.getClassID(this.number))))
                         .append("\">");
             }
 
@@ -66,14 +66,14 @@
                 sb.append("<span ");
                 if (this.number < 2048) {
                     sb.append("title=\"")
-                            .append(match.getAnnotationID(this.number))
+                        .append(escapeHTML(match.getAnnotationID(this.number)))
                             .append('"');
                 }
                 else {
                     Relation rel = match.getRelationID(this.number);
-                    sb.append("xlink:title=\"").append(rel.annotation)
+                    sb.append("xlink:title=\"").append(escapeHTML(rel.annotation))
                             .append("\" xlink:type=\"simple\" xlink:href=\"#")
-                            .append(match.getPosID(rel.ref)).append('"');
+                        .append(escapeHTML(match.getPosID(rel.ref))).append('"');
                 };
                 sb.append('>');
             }
@@ -146,6 +146,7 @@
                 else if (this.number != 0)
                     sb.append(this.number).append(':');
             };
+
             return sb.toString();
         }
         else if (this.type == 2) {
@@ -153,6 +154,6 @@
                 return "]";
             return "}";
         };
-        return this.characters;
+        return escapeBrackets(this.characters);
     };
 };
diff --git a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
index b41cd2e..10f0303 100644
--- a/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
+++ b/src/main/java/de/ids_mannheim/korap/response/match/MatchIdentifier.java
@@ -8,9 +8,10 @@
 
     private ArrayList<int[]> pos = new ArrayList<>(8);
 
-    // TODO: "contains" is necessary for a compatibility bug in Kustvakt
-    Pattern idRegex = Pattern.compile("^(?:match-|contains-)"
-            + "(?:([^!]+?)[!\\.])?" + "([^!]+)-p([0-9]+)-([0-9]+)"
+    // Remember: "contains" is necessary for a compatibility bug in Kustvakt
+    Pattern idRegex = Pattern.compile("^(?:match-|contains-)" +
+                                      "(?:([^!]+?)[!\\.])?" +
+                                      "([^!]+)[-/]p([0-9]+)-([0-9]+)"
             + "((?:\\(-?[0-9]+\\)-?[0-9]+--?[0-9]+)*)" + "(?:c.+?)?$");
     Pattern posRegex = Pattern.compile("\\(([0-9]+)\\)([0-9]+)-([0-9]+)");
 
@@ -18,6 +19,12 @@
     public MatchIdentifier () {};
 
 
+    /**
+     * Construct a new MatchIdentifier.
+     * Due to lots of internal changes and compatibility reasons,
+     * the structure of the identifier has changed a lot.
+     * The constructor supports different legacy structures for test compatibility.
+     */
     public MatchIdentifier (String id) {
 
         // Replace for legacy reasons with incompatible versions of Kustvakt
@@ -26,17 +33,25 @@
         Matcher matcher = idRegex.matcher(id);
         if (matcher.matches()) {
 
+            // textSigle is provided directly
+            if (matcher.group(1) == null && id.contains("/")) {
+                // Todo: potentially use UID!
+                this.setTextSigle(matcher.group(2));
+            }
+
             // <legacy>
-            // and test compatibility
-            if (id.contains("!") || !id.contains("_")) {
+            else if (id.contains("!") || !id.contains("_")) {
                 this.setCorpusID(matcher.group(1));
                 this.setDocID(matcher.group(2));
             }
-            // </legacy>
+            // </legacy>     
+
+            // textSigle is provided indirectly
+            // <legacy>
             else {
-                // this.getCorpusID() + "." + this.getDocID()
                 this.setTextSigle(matcher.group(1) + '.' + matcher.group(2));
             };
+            // </legacy>
 
             this.setStartPos(Integer.parseInt(matcher.group(3)));
             this.setEndPos(Integer.parseInt(matcher.group(4)));
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillString.java b/src/main/java/de/ids_mannheim/korap/util/KrillString.java
index cc1f357..8a11d33 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillString.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillString.java
@@ -58,6 +58,17 @@
 
 
     /**
+     * Escape Bracket relevant characters.
+     * 
+     * @param text
+     *            The string to escape.
+     * @return The secured string.
+     */
+    public static String escapeBrackets (String text) {
+        return text.replaceAll("([\\{\\}\\[\\]\\\\])", "\\\\$1");
+    };
+
+    /**
      * Add surrounding double quotes.
      * 
      * @param text
diff --git a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
index c2da28b..d363d7d 100644
--- a/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
+++ b/src/test/java/de/ids_mannheim/korap/highlight/TestHighlight.java
@@ -351,4 +351,54 @@
         assertEquals(kr.getError(0).getMessage(),
                 "Valid class numbers exceeded");
     };
+
+    @Test
+    public void highlightEscapes () throws IOException, QueryException {
+        KrillIndex ki = new KrillIndex();
+        FieldDocument fd = new FieldDocument();
+        fd.addString("ID", "doc-1");
+        fd.addString("UID", "1");
+        fd.addString("textSigle", "c1/d1/1");
+
+        // Make this clean for HTML and Brackets!
+
+        fd.addTV("base", "Mit \"Mann\" & {Ma\\us}",
+                 "[(0-3)s:Mit|i:mit|_0#0-3|-:t$<i>4|<>:base/t:t$<b>64<i>0<i>20<i>4<b>0]" +
+                 "[(4-10)s:\"Mann\"|i:\"mann\"|base/l:\"Mann\"|_1#4-10]" +
+                 "[(11-12)s:&|i:&|base/l:&|_2#11-12]" +
+                 "[(13-20)s:{Ma\\us}|i:{ma\\us}|_3#13-20]");
+        ki.addDoc(fd);
+
+        // Commit!
+        ki.commit();
+        QueryBuilder kq = new QueryBuilder("base");
+        SpanQuery q = (SpanQuery) kq.tag("base/t:t").toQuery();
+
+        Krill qs = new Krill(q);
+        qs.getMeta().getContext().left.setToken(true).setLength((short) 0);
+        qs.getMeta().getContext().right.setToken(true).setLength((short) 0);
+
+        Result kr = ki.search(qs);
+        assertEquals((long) 1, kr.getTotalResults());
+        assertEquals("[Mit \"Mann\" & \\{Ma\\\\us\\}]", kr.getMatch(0).getSnippetBrackets());
+        assertEquals("<span class=\"context-left\"></span><mark>Mit &quot;Mann&quot; &amp; {Ma\\us}</mark><span class=\"context-right\"></span>", kr.getMatch(0).getSnippetHTML());
+        assertEquals("match-c1/d1/1-p0-4", kr.getMatch(0).getID());
+
+        Match km = ki.getMatchInfo("match-c1/d1/1-p0-4", "base", true, (ArrayList) null, (ArrayList) null, true, true, false);
+        assertEquals(0, km.getStartPos());
+        assertEquals("<span class=\"context-left\"></span>"+
+                     "<mark><span title=\"base/t:t\">"+
+                     "Mit "+
+                     "<span title=\"base/l:&quot;Mann&quot;\">"+
+                     "&quot;Mann&quot;"+
+                     "</span>"+
+                     " "+
+                     "<span title=\"base/l:&amp;\">&amp;</span>"+
+                     " "+
+                     "{Ma\\us}"+
+                     "</span>"+
+                     "</mark>"+
+                     "<span class=\"context-right\"></span>",
+                     km.getSnippetHTML());
+    };
 };
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
index ae08cc4..03bafcc 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
@@ -90,6 +90,16 @@
         assertEquals(null, id.getDocID());
         assertEquals("GOE_AGF.02286", id.getTextSigle());
         assertEquals("match-GOE_AGF.02286-p2105-2106", id.toString());
+
+        id = new MatchIdentifier("match-corpus-1/doc-1/text-1/p2105-2106");
+        assertEquals("match-corpus-1/doc-1/text-1-p2105-2106", id.toString());
+        assertEquals("corpus-1/doc-1/text-1", id.getTextSigle());
+        /*
+        assertEquals(2105, id.getStartPos());
+        assertEquals(2106, id.getEndPos());
+        assertEquals(null, id.getCorpusID());
+        assertEquals(null, id.getDocID());
+        */
     };