Fix support for verbatim string values in collection queries
Change-Id: Id4fa67bd04b82a2398fb3bfcc34b4bbabfdd2fcd
diff --git a/Changes b/Changes
index e1a8869..b96ad94 100644
--- a/Changes
+++ b/Changes
@@ -1,7 +1,9 @@
-0.29 2018-07-21
+0.29 2018-07-23
- Added check for errors on QuerySerializer object (diewald)
- Support verbatim string values in Poliqarp
(fixes #42; diewald)
+ - Fix support for verbatim string values in collection queries
+ (diewald)
0.28 2018-01-10
- Added some enums for koral:operation (margaretha)
diff --git a/src/main/antlr/collection/CollectionQueryLexer.g4 b/src/main/antlr/collection/CollectionQueryLexer.g4
index 190481a..546bcc4 100644
--- a/src/main/antlr/collection/CollectionQueryLexer.g4
+++ b/src/main/antlr/collection/CollectionQueryLexer.g4
@@ -22,6 +22,7 @@
FLAG_ix : '/' (('i'|'I') ('x'|'X')? );
QUOTE : '"';
+BACKSLASH : '\\';
LRB : '(';
RRB : ')';
LB : '[';
@@ -51,6 +52,8 @@
// EM: allow ':' in ALPHABET
fragment ALPHA : [a-zA-Z];
+ESC_QUOTE : BACKSLASH QUOTE;
+
DIGIT : [0-9];
DATE
diff --git a/src/main/antlr/collection/CollectionQueryParser.g4 b/src/main/antlr/collection/CollectionQueryParser.g4
index 949cbcb..074f5aa 100644
--- a/src/main/antlr/collection/CollectionQueryParser.g4
+++ b/src/main/antlr/collection/CollectionQueryParser.g4
@@ -98,7 +98,7 @@
multiword
//: '"' ~'"'* '"'
-: QUOTE ~QUOTE* QUOTE
+: QUOTE (~QUOTE | ESC_QUOTE)* QUOTE
;
relation
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryProcessor.java
index 835c2df..c64bf6f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryProcessor.java
@@ -277,8 +277,10 @@
TokenStream stream = parser.getTokenStream();
String stm = stream.getText(valueNode.getChild(0).getSourceInterval());
- if (stm.startsWith("\"") && stm.endsWith("\""))
- stm = stm.substring(1, stm.length()-1);
+ // Fix verbatim keys
+ if (stm.startsWith("\"") && stm.endsWith("\"")) {
+ stm = stm.substring(1, stm.length()-1).replaceAll("\\\\\\\\","\\\\").replaceAll("\\\\\"", "\"");
+ };
if ("regex".equals(node_cat)) {
diff --git a/src/test/java/de/ids_mannheim/korap/query/test/collection/CollectionQueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/query/test/collection/CollectionQueryProcessorTest.java
index 465e534..75a1a91 100644
--- a/src/test/java/de/ids_mannheim/korap/query/test/collection/CollectionQueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/test/collection/CollectionQueryProcessorTest.java
@@ -90,7 +90,10 @@
assertEquals("title", res.at("/collection/key").asText());
assertEquals("Mannheim", res.at("/collection/value").asText());
assertEquals("match:contains", res.at("/collection/match").asText());
+ }
+ @Test
+ public void testVerbatim () throws JsonProcessingException, IOException {
collection = "title~\"IDS Mannheim\"";
qs.setQuery(query, ql);
qs.setCollection(collection);
@@ -99,7 +102,7 @@
assertEquals("title", res.at("/collection/key").asText());
assertEquals("IDS Mannheim", res.at("/collection/value").asText());
assertEquals("match:contains", res.at("/collection/match").asText());
-
+
collection = "title~\"IDS:Mannheim\"";
qs.setQuery(query, ql);
qs.setCollection(collection);
@@ -108,7 +111,16 @@
assertEquals("title", res.at("/collection/key").asText());
assertEquals("IDS:Mannheim", res.at("/collection/value").asText());
assertEquals("match:contains", res.at("/collection/match").asText());
- }
+
+ // With escapes
+ collection = "title~\"IDS \\\"Mon\\\\nem\\\"\"";
+ qs.setCollection(collection);
+ res = mapper.readTree(qs.toJSON());
+ assertEquals("koral:doc", res.at("/collection/@type").asText());
+ assertEquals("title", res.at("/collection/key").asText());
+ assertEquals("IDS \"Mon\\nem\"", res.at("/collection/value").asText());
+ assertEquals("match:contains", res.at("/collection/match").asText());
+ }
@Test
public void testFlag () throws JsonProcessingException, IOException {