Escape quotes in JSON strings and test unicode behaviour

Change-Id: Ia58ab50ba0c4f085d5c397bacdcc3b1a78892c30
diff --git a/Changes b/Changes
index 672c91e..6bbcacf 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.55.6 2016-06-25
+0.55.6 2016-06-29
         - [bugfix] distance with key "t" uses default foundry (diewald)
 	- [cleanup] Renamed fromJson() to fromKoral() (diewald)
 	- [cleanup] Removed deprecated methods in Krill:
@@ -12,6 +12,7 @@
         - [bugfix] Fixed UID handling (diewald)
 	- [feature] Added document method to Web-API (diewald)
 	- [feature] Added experimental KrillStats class (diewald)
+	- [bugfix] Escape quotes in JSON strings (diewald)
 
 0.55.5 2016-05-02
 	- [performance] Changed to a dynamic window for sorting in FocusSpans (margaretha)
diff --git a/misc/payloads.md b/misc/payloads.md
index a46a11c..f041466 100644
--- a/misc/payloads.md
+++ b/misc/payloads.md
@@ -103,12 +103,12 @@
 to match a relation span with a specific attribute.
 
 If at least one TUI is set (either the left-part TUI reference,
-he right-part TUI reference, or the relation TUI), all TUIs have to be set.
+the right-part TUI reference, or the relation TUI), all TUIs have to be set.
 If the TUIs do not refer to anything, they have to be set to ```0```.
 
 1) Term to term relation has
  
-* 1 byte for PTI, 
+* 1 byte for PTI (32), 
 * 1 integer for the right part token position, 
 * 1 short for the left-part TUI, 
 * 1 short for right-part TUI and 
@@ -123,7 +123,7 @@
 
 2) Term to element relation has
 
-* 1 byte for PTI, 
+* 1 byte for PTI (33), 
 * 1 integer for the start element offset of the right part, 
 * 1 integer for the end element offset of the right part, 
 * 1 integer for the start position of the right part, 
@@ -139,7 +139,7 @@
 
 3) Element to term relation has 
 
-* 1 byte for PTI, 
+* 1 byte for PTI (34), 
 * 1 integer for the start element offset of the left part, 
 * 1 integer for the end element offset of the left part, 
 * 1 integer for end position of the left part, 
@@ -155,7 +155,7 @@
 
 4) Element to element relation has 
 
-* 1 byte for PTI, 
+* 1 byte for PTI (35), 
 * 1 integer for the start element offset of the left part, 
 * 1 integer for the end element offset of the left part, 
 * 1 integer for the start element offset of the right part, 
diff --git a/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java b/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java
index 1e0e0f0..72cfb23 100644
--- a/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java
+++ b/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java
@@ -60,7 +60,7 @@
      * Create a query object based on a regular expression.
      * 
      * <blockquote><pre>
-     * KrillQuery kq = new KrillQuery("tokens");
+     * QueryBuilder kq = new QueryBuilder("tokens");
      * SpanRegexQueryWrapper re = kq.re(".+?");
      * </pre></blockquote>
      * 
@@ -95,7 +95,7 @@
      * </ul>
      * 
      * <blockquote><pre>
-     * KrillQuery kq = new KrillQuery("tokens");
+     * QueryBuilder kq = new QueryBuilder("tokens");
      * SpanRegexQueryWrapper re = kq.re("[Aa]lternatives?",
      * RegExp.NONE);
      * </pre></blockquote>
@@ -117,7 +117,7 @@
      * Supports flags (see above) and case insensitivity.
      * 
      * <blockquote><pre>
-     * KrillQuery kq = new KrillQuery("tokens");
+     * QueryBuilder kq = new QueryBuilder("tokens");
      * SpanRegexQueryWrapper re = kq.re("alternatives?", RegExp.NONE,
      * true);
      * </pre></blockquote>
@@ -142,7 +142,7 @@
      * Supports case insensitivity.
      * 
      * <blockquote><pre>
-     * KrillQuery kq = new KrillQuery("tokens");
+     * QueryBuilder kq = new QueryBuilder("tokens");
      * SpanRegexQueryWrapper re = kq.re("alternatives?", true);
      * </pre></blockquote>
      * 
@@ -184,7 +184,7 @@
      * Supports case insensitivity.
      * 
      * <blockquote><pre>
-     * KrillQuery kq = new KrillQuery("tokens");
+     * QueryBuilder kq = new QueryBuilder("tokens");
      * SpanWildcardQueryWrapper wc = kq.wc("wall*", true);
      * </pre></blockquote>
      * 
@@ -203,7 +203,7 @@
      * Create a segment query object.
      * 
      * <blockquote><pre>
-     * KrillQuery kq = new KrillQuery("tokens");
+     * QueryBuilder kq = new QueryBuilder("tokens");
      * SpanSegmentQueryWrapper seg = kq.seg();
      * </pre></blockquote>
      * 
@@ -220,7 +220,7 @@
      * and {@link SpanAlterQueryWrapper} objects.
      * 
      * <blockquote><pre>
-     * KrillQuery kq = new KrillQuery("tokens");
+     * QueryBuilder kq = new QueryBuilder("tokens");
      * SpanSegmentQueryWrapper seg = kq.seg(
      * kq.re("mate/p=.*?"),
      * kq.re("opennlp/p=.*?")
@@ -262,7 +262,7 @@
      * Create an empty query segment.
      * 
      * <blockquote><pre>
-     * KrillQuery kq = new KrillQuery("tokens");
+     * QueryBuilder kq = new QueryBuilder("tokens");
      * SpanRepetitionQueryWrapper seg = kq.empty();
      * </pre></blockquote>
      */
diff --git a/src/main/java/de/ids_mannheim/korap/response/Message.java b/src/main/java/de/ids_mannheim/korap/response/Message.java
index 27ece33..975f9f6 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Message.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Message.java
@@ -1,5 +1,7 @@
 package de.ids_mannheim.korap.response;
 
+import static de.ids_mannheim.korap.util.KrillString.quote;
+
 import java.util.LinkedList;
 
 import com.fasterxml.jackson.annotation.JsonInclude.Include;
@@ -8,6 +10,7 @@
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.node.*;
 
+
 /**
  * A message for Notifications.
  * 
@@ -182,8 +185,7 @@
             return mapper.writeValueAsString(this.toJsonNode());
         }
         catch (Exception e) {
-            // Bad in case the message contains quotes!
-            msg = ", \"" + e.getLocalizedMessage() + "\"";
+            msg = ", " + quote(e.getLocalizedMessage());
         };
         return "[620, " + "\"Unable to generate JSON\"" + msg + "]";
     };
diff --git a/src/main/java/de/ids_mannheim/korap/response/Messages.java b/src/main/java/de/ids_mannheim/korap/response/Messages.java
index 118e9d0..b20e28f 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Messages.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Messages.java
@@ -1,6 +1,7 @@
 package de.ids_mannheim.korap.response;
 
 import de.ids_mannheim.korap.util.QueryException;
+import static de.ids_mannheim.korap.util.KrillString.quote;
 import de.ids_mannheim.korap.response.Message;
 
 import com.fasterxml.jackson.annotation.JsonInclude.Include;
@@ -280,8 +281,7 @@
             return mapper.writeValueAsString(this.toJsonNode());
         }
         catch (Exception e) {
-            // Bad in case the message contains quotes!
-            msg = ", \"" + e.getLocalizedMessage() + "\"";
+            msg = ", " + quote(e.getLocalizedMessage());
         };
 
         return "[620, " + "\"Unable to generate JSON\"" + msg + "]";
diff --git a/src/main/java/de/ids_mannheim/korap/response/Notifications.java b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
index 4a91b3a..753735b 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Notifications.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
@@ -1,5 +1,7 @@
 package de.ids_mannheim.korap.response;
 
+import static de.ids_mannheim.korap.util.KrillString.quote;
+
 import com.fasterxml.jackson.annotation.*;
 import com.fasterxml.jackson.annotation.JsonInclude.Include;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -517,7 +519,7 @@
         }
         catch (Exception e) {
             // Bad in case the message contains quotes!
-            msg = ", \"" + e.getLocalizedMessage() + "\"";
+            msg = ", " + quote(e.getLocalizedMessage());
         };
 
         return "{\"errors\" : [" + "[620, " + "\"Unable to generate JSON\""
diff --git a/src/main/java/de/ids_mannheim/korap/response/Response.java b/src/main/java/de/ids_mannheim/korap/response/Response.java
index ccb3056..b37619a 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Response.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Response.java
@@ -14,6 +14,7 @@
 import de.ids_mannheim.korap.KrillQuery;
 import de.ids_mannheim.korap.KrillStats;
 import de.ids_mannheim.korap.response.Notifications;
+import static de.ids_mannheim.korap.util.KrillString.quote;
 
 /**
  * Base class for objects meant to be responded by the server.
@@ -595,7 +596,7 @@
         }
         catch (Exception e) {
             // Bad in case the message contains quotes!
-            msg = ", \"" + e.getLocalizedMessage() + "\"";
+            msg = ", " + quote(e.getLocalizedMessage());
         };
 
         return "{\"errors\":[" + "[620, " + "\"Unable to generate JSON\"" + msg
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillString.java b/src/main/java/de/ids_mannheim/korap/util/KrillString.java
index cfc92f3..cc1f357 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillString.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillString.java
@@ -55,4 +55,16 @@
         return text.replace("&", "&amp;").replace("<", "&lt;")
                 .replace(">", "&gt;").replace("\"", "&quot;");
     };
+
+
+    /**
+     * Add surrounding double quotes.
+     * 
+     * @param text
+     *            The string to escape.
+     * @return The secured string.
+     */
+    public static String quote (String text) {
+        return '"' + text.replaceAll("([\"\\\\])", "\\\\$1") + '"';
+    };
 };
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
index 2a892b5..6db3030 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
@@ -15,8 +15,11 @@
 
 import de.ids_mannheim.korap.KrillIndex;
 import de.ids_mannheim.korap.KrillQuery;
+import de.ids_mannheim.korap.query.QueryBuilder;
 import de.ids_mannheim.korap.index.FieldDocument;
 import de.ids_mannheim.korap.index.MultiTermTokenStream;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.QueryException;
 
 @RunWith(JUnit4.class)
 public class TestKrillIndex {
@@ -120,6 +123,37 @@
         // hasDeletions, hasPendingMerges
     };
 
+    /*
+     * This test demonstrates the behaviour
+     */
+    @Test
+    public void indexUnicode () throws IOException, QueryException {
+        KrillIndex ki = new KrillIndex();
+
+        FieldDocument fd = new FieldDocument();
+        fd.addString("name", "Peter");
+
+        // These values are canonically equivalent
+        // But indexed as byte sequences
+        fd.addTV("base",
+                 new String("ju" + "\u006E" + "\u0303" + "o") +
+                 " " +
+                 new String("ju" + "\u00F1" + "o"),
+                 "[(0-5)s:ju" + "\u006E" + "\u0303" + "o|_0$<i>0<i>5|-:t$<i>2]"
+                 + "[(6-10)s:ju" + "\u00F1" + "o|_1$<i>6<i>10]");
+        ki.addDoc(fd);
+        ki.commit();
+
+        assertEquals(1, ki.numberOf("base", "documents"));
+
+        QueryBuilder kq = new QueryBuilder("base");
+        Result kr = ki.search(kq.seg("s:ju" + "\u00F1" + "o").toQuery());
+        assertEquals(1, kr.getTotalResults());
+
+        kr = ki.search(kq.seg("s:ju" + "\u006E" + "\u0303" + "o").toQuery());
+        assertEquals(1, kr.getTotalResults());
+    };
+
     @Test
     public void indexFieldInfo () throws IOException {
         KrillIndex ki = new KrillIndex();
diff --git a/src/test/java/de/ids_mannheim/korap/util/TestKrillString.java b/src/test/java/de/ids_mannheim/korap/util/TestKrillString.java
index 3a437c5..43d226c 100644
--- a/src/test/java/de/ids_mannheim/korap/util/TestKrillString.java
+++ b/src/test/java/de/ids_mannheim/korap/util/TestKrillString.java
@@ -19,4 +19,12 @@
         assertEquals("Er sagte: &quot;Das ist ja toll!&quot;",
                 escapeHTML("Er sagte: \"Das ist ja toll!\""));
     };
+
+    @Test
+    public void testQuote () {
+        assertEquals("\"hallo\"", quote("hallo"));
+        assertEquals("\"h'all'o\"", quote("h'all'o"));
+        assertEquals("\"er sagte: \\\"Hallo!\\\"\"", quote("er sagte: \"Hallo!\""));
+        assertEquals("\"a \\\\\\\" b\"", quote("a \\\" b"));
+    };
 };