Fix #42 - Support verbatim string values in Poliqarp tokens

Change-Id: I4c0dd763f37f4f8f3eb454553e007858b34947e2
diff --git a/Changes b/Changes
index 4521dbb..e1a8869 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,7 @@
-0.29 2018-07-05
+0.29 2018-07-21
   - Added check for errors on QuerySerializer object (diewald)
+  - Support verbatim string values in Poliqarp
+    (fixes #42; diewald)
 
 0.28 2018-01-10
 	- Added some enums for koral:operation (margaretha)
diff --git a/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4 b/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
index b3bedb1..d911511 100644
--- a/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
+++ b/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
@@ -51,7 +51,7 @@
 
 
 /** Simple strings and Simple queries */
-WS                  : [ \t]  -> skip ;
+WS                  : [ \t]  -> channel(HIDDEN);
 fragment FOCC       : '{' WS* ( [0-9]* WS* ',' WS* [0-9]+ | [0-9]+ WS* ','? ) WS* '}';
 fragment NO_RE      : ~[ \t\/];
 fragment ALPHABET   : ~('\t' | ' ' | '/' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
@@ -62,6 +62,7 @@
 
 WORD                : ALPHABET+;
 
+
 /* Complex queries */
 LPAREN      : '[';
 RPAREN      : ']';
@@ -84,6 +85,8 @@
 STAR		: '*';
 PLUS		: '+';
 EMPTYREL	: '@';
+BACKSLASH	: '\\';
+SQUOTE      : '\'';
 
 /* Regular expressions and Regex queries */
 fragment RE_symbol     : ~('*' | '?' | '+' | '{' | '}' | '[' | ']'
@@ -101,7 +104,9 @@
 fragment RE_occ      : (RE_char | RE_chgroup | ( '(' RE_expr ')')) FOCC;
 fragment RE_group    : '(' RE_expr ')';
 fragment RE_expr     : ('.' | RE_char | RE_alter | RE_chgroup | RE_opt | RE_quant | RE_group)+;
-fragment RE_dquote            : '"'  (RE_expr | '\'' | ':' )* '"';
-fragment RE_squote            : '\''  (RE_expr | '\"' | ':' )* '\'';
+fragment RE_dquote   : '"'  (RE_expr | '\'' | ':' )* '"';
+// fragment RE_squote   : '\''  (RE_expr | '\"' | ':' )* '\'';
  
-REGEX             : ( RE_dquote | RE_squote );
+REGEX             : RE_dquote;
+
+ESC_SQUOTE        : BACKSLASH SQUOTE;
diff --git a/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4 b/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
index 9a597ca..1236956 100644
--- a/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
+++ b/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
@@ -44,9 +44,14 @@
 : REGEX
 ;
 
+verbatim
+: SQUOTE (~SQUOTE | ESC_SQUOTE)* SQUOTE;
+
+
 key
 : (WORD
 | regex
+| verbatim
 | NUMBER)
 ;
 
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryProcessor.java
index 9f449c9..835c2df 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/CollectionQueryProcessor.java
@@ -276,9 +276,9 @@
 
         TokenStream stream = parser.getTokenStream();
         String stm = stream.getText(valueNode.getChild(0).getSourceInterval());
-        // todo: is this correct?
+
         if (stm.startsWith("\"") && stm.endsWith("\""))
-            stm = stm.replaceAll("\"", "");
+			stm = stm.substring(1, stm.length()-1);
 
         if ("regex".equals(node_cat)) {
 			
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessor.java
index 3a2a594..7727011 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessor.java
@@ -799,8 +799,22 @@
             // process foundry
             if (foundryNode != null)
                 term.put("foundry", foundryNode.getText());
-            // process key: 'normal' or regex?
-            key = keyNode.getText();
+
+            // process key: 'normal', 'verbatim' or regex?
+			if (getNodeCat(keyNode.getChild(0)).equals("verbatim")) {
+
+				// Get stream from hidden channel
+				TokenStream stream = parser.getTokenStream();
+				key = stream.getText(keyNode.getChild(0).getSourceInterval());
+
+				if (key.startsWith("'") && key.endsWith("'"))
+					key = key.substring(1, key.length()-1);
+
+			}
+			else {
+				key = keyNode.getText();
+			};
+
             if (getNodeCat(keyNode.getChild(0)).equals("regex")) {
                 isRegex = true;
                 term.put("type", "type:regex");
@@ -830,8 +844,10 @@
                 }
             }
             // process value
-            if (valueNode != null)
+            if (valueNode != null) {
                 term.put("value", valueNode.getText());
+			};
+				
             // process operator ("match" property)
             if (termOpNode != null) {
                 String termOp = termOpNode.getText();
diff --git a/src/test/java/de/ids_mannheim/korap/query/test/poliqarpplus/PoliqarpPlusQueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/query/test/poliqarpplus/PoliqarpPlusQueryProcessorTest.java
index f964f8b..9e8771e 100644
--- a/src/test/java/de/ids_mannheim/korap/query/test/poliqarpplus/PoliqarpPlusQueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/test/poliqarpplus/PoliqarpPlusQueryProcessorTest.java
@@ -97,12 +97,6 @@
         res = mapper.readTree(qs.toJSON());
         assertEquals(302, res.at("/errors/0/0").asInt());
         assertEquals(302, res.at("/errors/1/0").asInt());
-		/*
-        assertEquals("koral:token", res.at("/query/@type").asText());
-        assertEquals("Mann", res.at("/query/wrap/key").asText());
-        assertEquals("lemma", res.at("/query/wrap/layer").asText());
-        assertEquals("match:eq", res.at("/query/wrap/match").asText());
-		*/
 	}
 
     @Test
@@ -132,6 +126,26 @@
         assertEquals("match:eq", res.at("/query/wrap/match").asText());
     }
 
+	@Test
+    public void testVerbatimKeys () throws JsonProcessingException, IOException {
+        query = "[mate/b='Der + Mann']";
+        qs.setQuery(query, "poliqarpplus");
+		assertFalse(qs.hasErrors());
+		res = mapper.readTree(qs.toJSON());
+        assertEquals("koral:token", res.at("/query/@type").asText());
+        assertEquals("koral:term", res.at("/query/wrap/@type").asText());
+        assertEquals("Der + Mann", res.at("/query/wrap/key").asText());
+        assertEquals("b", res.at("/query/wrap/layer").asText());
+        assertEquals("mate", res.at("/query/wrap/foundry").asText());
+        assertEquals("match:eq", res.at("/query/wrap/match").asText());
+
+		query = "[mate/b='D\\'Ma nn']";
+        qs.setQuery(query, "poliqarpplus");
+		assertFalse(qs.hasErrors());
+		res = mapper.readTree(qs.toJSON());
+        assertEquals("D\\'Ma nn", res.at("/query/wrap/key").asText());
+	}
+
 
     // todo:
     @Test