Escape quotes in JSON strings and test unicode behaviour
Change-Id: Ia58ab50ba0c4f085d5c397bacdcc3b1a78892c30
diff --git a/Changes b/Changes
index 672c91e..6bbcacf 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.55.6 2016-06-25
+0.55.6 2016-06-29
- [bugfix] distance with key "t" uses default foundry (diewald)
- [cleanup] Renamed fromJson() to fromKoral() (diewald)
- [cleanup] Removed deprecated methods in Krill:
@@ -12,6 +12,7 @@
- [bugfix] Fixed UID handling (diewald)
- [feature] Added document method to Web-API (diewald)
- [feature] Added experimental KrillStats class (diewald)
+ - [bugfix] Escape quotes in JSON strings (diewald)
0.55.5 2016-05-02
- [performance] Changed to a dynamic window for sorting in FocusSpans (margaretha)
diff --git a/misc/payloads.md b/misc/payloads.md
index a46a11c..f041466 100644
--- a/misc/payloads.md
+++ b/misc/payloads.md
@@ -103,12 +103,12 @@
to match a relation span with a specific attribute.
If at least one TUI is set (either the left-part TUI reference,
-he right-part TUI reference, or the relation TUI), all TUIs have to be set.
+the right-part TUI reference, or the relation TUI), all TUIs have to be set.
If the TUIs do not refer to anything, they have to be set to ```0```.
1) Term to term relation has
-* 1 byte for PTI,
+* 1 byte for PTI (32),
* 1 integer for the right part token position,
* 1 short for the left-part TUI,
* 1 short for right-part TUI and
@@ -123,7 +123,7 @@
2) Term to element relation has
-* 1 byte for PTI,
+* 1 byte for PTI (33),
* 1 integer for the start element offset of the right part,
* 1 integer for the end element offset of the right part,
* 1 integer for the start position of the right part,
@@ -139,7 +139,7 @@
3) Element to term relation has
-* 1 byte for PTI,
+* 1 byte for PTI (34),
* 1 integer for the start element offset of the left part,
* 1 integer for the end element offset of the left part,
* 1 integer for end position of the left part,
@@ -155,7 +155,7 @@
4) Element to element relation has
-* 1 byte for PTI,
+* 1 byte for PTI (35),
* 1 integer for the start element offset of the left part,
* 1 integer for the end element offset of the left part,
* 1 integer for the start element offset of the right part,
diff --git a/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java b/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java
index 1e0e0f0..72cfb23 100644
--- a/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java
+++ b/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java
@@ -60,7 +60,7 @@
* Create a query object based on a regular expression.
*
* <blockquote><pre>
- * KrillQuery kq = new KrillQuery("tokens");
+ * QueryBuilder kq = new QueryBuilder("tokens");
* SpanRegexQueryWrapper re = kq.re(".+?");
* </pre></blockquote>
*
@@ -95,7 +95,7 @@
* </ul>
*
* <blockquote><pre>
- * KrillQuery kq = new KrillQuery("tokens");
+ * QueryBuilder kq = new QueryBuilder("tokens");
* SpanRegexQueryWrapper re = kq.re("[Aa]lternatives?",
* RegExp.NONE);
* </pre></blockquote>
@@ -117,7 +117,7 @@
* Supports flags (see above) and case insensitivity.
*
* <blockquote><pre>
- * KrillQuery kq = new KrillQuery("tokens");
+ * QueryBuilder kq = new QueryBuilder("tokens");
* SpanRegexQueryWrapper re = kq.re("alternatives?", RegExp.NONE,
* true);
* </pre></blockquote>
@@ -142,7 +142,7 @@
* Supports case insensitivity.
*
* <blockquote><pre>
- * KrillQuery kq = new KrillQuery("tokens");
+ * QueryBuilder kq = new QueryBuilder("tokens");
* SpanRegexQueryWrapper re = kq.re("alternatives?", true);
* </pre></blockquote>
*
@@ -184,7 +184,7 @@
* Supports case insensitivity.
*
* <blockquote><pre>
- * KrillQuery kq = new KrillQuery("tokens");
+ * QueryBuilder kq = new QueryBuilder("tokens");
* SpanWildcardQueryWrapper wc = kq.wc("wall*", true);
* </pre></blockquote>
*
@@ -203,7 +203,7 @@
* Create a segment query object.
*
* <blockquote><pre>
- * KrillQuery kq = new KrillQuery("tokens");
+ * QueryBuilder kq = new QueryBuilder("tokens");
* SpanSegmentQueryWrapper seg = kq.seg();
* </pre></blockquote>
*
@@ -220,7 +220,7 @@
* and {@link SpanAlterQueryWrapper} objects.
*
* <blockquote><pre>
- * KrillQuery kq = new KrillQuery("tokens");
+ * QueryBuilder kq = new QueryBuilder("tokens");
* SpanSegmentQueryWrapper seg = kq.seg(
* kq.re("mate/p=.*?"),
* kq.re("opennlp/p=.*?")
@@ -262,7 +262,7 @@
* Create an empty query segment.
*
* <blockquote><pre>
- * KrillQuery kq = new KrillQuery("tokens");
+ * QueryBuilder kq = new QueryBuilder("tokens");
* SpanRepetitionQueryWrapper seg = kq.empty();
* </pre></blockquote>
*/
diff --git a/src/main/java/de/ids_mannheim/korap/response/Message.java b/src/main/java/de/ids_mannheim/korap/response/Message.java
index 27ece33..975f9f6 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Message.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Message.java
@@ -1,5 +1,7 @@
package de.ids_mannheim.korap.response;
+import static de.ids_mannheim.korap.util.KrillString.quote;
+
import java.util.LinkedList;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
@@ -8,6 +10,7 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.*;
+
/**
* A message for Notifications.
*
@@ -182,8 +185,7 @@
return mapper.writeValueAsString(this.toJsonNode());
}
catch (Exception e) {
- // Bad in case the message contains quotes!
- msg = ", \"" + e.getLocalizedMessage() + "\"";
+ msg = ", " + quote(e.getLocalizedMessage());
};
return "[620, " + "\"Unable to generate JSON\"" + msg + "]";
};
diff --git a/src/main/java/de/ids_mannheim/korap/response/Messages.java b/src/main/java/de/ids_mannheim/korap/response/Messages.java
index 118e9d0..b20e28f 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Messages.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Messages.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.response;
import de.ids_mannheim.korap.util.QueryException;
+import static de.ids_mannheim.korap.util.KrillString.quote;
import de.ids_mannheim.korap.response.Message;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
@@ -280,8 +281,7 @@
return mapper.writeValueAsString(this.toJsonNode());
}
catch (Exception e) {
- // Bad in case the message contains quotes!
- msg = ", \"" + e.getLocalizedMessage() + "\"";
+ msg = ", " + quote(e.getLocalizedMessage());
};
return "[620, " + "\"Unable to generate JSON\"" + msg + "]";
diff --git a/src/main/java/de/ids_mannheim/korap/response/Notifications.java b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
index 4a91b3a..753735b 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Notifications.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Notifications.java
@@ -1,5 +1,7 @@
package de.ids_mannheim.korap.response;
+import static de.ids_mannheim.korap.util.KrillString.quote;
+
import com.fasterxml.jackson.annotation.*;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -517,7 +519,7 @@
}
catch (Exception e) {
// Bad in case the message contains quotes!
- msg = ", \"" + e.getLocalizedMessage() + "\"";
+ msg = ", " + quote(e.getLocalizedMessage());
};
return "{\"errors\" : [" + "[620, " + "\"Unable to generate JSON\""
diff --git a/src/main/java/de/ids_mannheim/korap/response/Response.java b/src/main/java/de/ids_mannheim/korap/response/Response.java
index ccb3056..b37619a 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Response.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Response.java
@@ -14,6 +14,7 @@
import de.ids_mannheim.korap.KrillQuery;
import de.ids_mannheim.korap.KrillStats;
import de.ids_mannheim.korap.response.Notifications;
+import static de.ids_mannheim.korap.util.KrillString.quote;
/**
* Base class for objects meant to be responded by the server.
@@ -595,7 +596,7 @@
}
catch (Exception e) {
// Bad in case the message contains quotes!
- msg = ", \"" + e.getLocalizedMessage() + "\"";
+ msg = ", " + quote(e.getLocalizedMessage());
};
return "{\"errors\":[" + "[620, " + "\"Unable to generate JSON\"" + msg
diff --git a/src/main/java/de/ids_mannheim/korap/util/KrillString.java b/src/main/java/de/ids_mannheim/korap/util/KrillString.java
index cfc92f3..cc1f357 100644
--- a/src/main/java/de/ids_mannheim/korap/util/KrillString.java
+++ b/src/main/java/de/ids_mannheim/korap/util/KrillString.java
@@ -55,4 +55,16 @@
return text.replace("&", "&").replace("<", "<")
.replace(">", ">").replace("\"", """);
};
+
+
+ /**
+ * Add surrounding double quotes.
+ *
+ * @param text
+ * The string to escape.
+ * @return The secured string.
+ */
+ public static String quote (String text) {
+ return '"' + text.replaceAll("([\"\\\\])", "\\\\$1") + '"';
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
index 2a892b5..6db3030 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestKrillIndex.java
@@ -15,8 +15,11 @@
import de.ids_mannheim.korap.KrillIndex;
import de.ids_mannheim.korap.KrillQuery;
+import de.ids_mannheim.korap.query.QueryBuilder;
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.index.MultiTermTokenStream;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.QueryException;
@RunWith(JUnit4.class)
public class TestKrillIndex {
@@ -120,6 +123,37 @@
// hasDeletions, hasPendingMerges
};
+ /*
+ * This test demonstrates the behaviour
+ */
+ @Test
+ public void indexUnicode () throws IOException, QueryException {
+ KrillIndex ki = new KrillIndex();
+
+ FieldDocument fd = new FieldDocument();
+ fd.addString("name", "Peter");
+
+ // These values are canonically equivalent
+ // But indexed as byte sequences
+ fd.addTV("base",
+ new String("ju" + "\u006E" + "\u0303" + "o") +
+ " " +
+ new String("ju" + "\u00F1" + "o"),
+ "[(0-5)s:ju" + "\u006E" + "\u0303" + "o|_0$<i>0<i>5|-:t$<i>2]"
+ + "[(6-10)s:ju" + "\u00F1" + "o|_1$<i>6<i>10]");
+ ki.addDoc(fd);
+ ki.commit();
+
+ assertEquals(1, ki.numberOf("base", "documents"));
+
+ QueryBuilder kq = new QueryBuilder("base");
+ Result kr = ki.search(kq.seg("s:ju" + "\u00F1" + "o").toQuery());
+ assertEquals(1, kr.getTotalResults());
+
+ kr = ki.search(kq.seg("s:ju" + "\u006E" + "\u0303" + "o").toQuery());
+ assertEquals(1, kr.getTotalResults());
+ };
+
@Test
public void indexFieldInfo () throws IOException {
KrillIndex ki = new KrillIndex();
diff --git a/src/test/java/de/ids_mannheim/korap/util/TestKrillString.java b/src/test/java/de/ids_mannheim/korap/util/TestKrillString.java
index 3a437c5..43d226c 100644
--- a/src/test/java/de/ids_mannheim/korap/util/TestKrillString.java
+++ b/src/test/java/de/ids_mannheim/korap/util/TestKrillString.java
@@ -19,4 +19,12 @@
assertEquals("Er sagte: "Das ist ja toll!"",
escapeHTML("Er sagte: \"Das ist ja toll!\""));
};
+
+ @Test
+ public void testQuote () {
+ assertEquals("\"hallo\"", quote("hallo"));
+ assertEquals("\"h'all'o\"", quote("h'all'o"));
+ assertEquals("\"er sagte: \\\"Hallo!\\\"\"", quote("er sagte: \"Hallo!\""));
+ assertEquals("\"a \\\\\\\" b\"", quote("a \\\" b"));
+ };
};