Fixed handling of escape symbols in regex (issue 21)
Change-Id: Id78caf2071422183e29f8eb44686e6c5f4b55b0c
diff --git a/.gitignore b/.gitignore
index 8e98dc1..f993ee1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,8 @@
# /
+/sandbox
+/Sandbox
+/todo.org
/target
/.settings
/.project
@@ -8,6 +11,7 @@
.*
!.gitignore
+
/src/main/resources
# /src/main/antlr/cosmas/
diff --git a/Changes b/Changes
index 739f58c..f58784d 100644
--- a/Changes
+++ b/Changes
@@ -5,6 +5,9 @@
- Cleanup POM (diewald)
- Fix deserialization of unnecessary brackets
around terms and termGroups in Poliqarp (diewald)
+ - Support for FCS 2.0 (margaretha)
+ - Fixed handling of escapes in regex
+ (issue #21; diewald)
0.21 2015-10-27
- Improved meta query builder (hanl)
diff --git a/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4 b/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
index 8648d87..b3bedb1 100644
--- a/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
+++ b/src/main/antlr/poliqarpplus/PoliqarpPlusLexer.g4
@@ -86,9 +86,13 @@
EMPTYREL : '@';
/* Regular expressions and Regex queries */
-fragment RE_char : ~('*' | '?' | '+' | '{' | '}' | '[' | ']'
- | '(' | ')' | '|' | '"' | ':' | '\'' | '\\');
+fragment RE_symbol : ~('*' | '?' | '+' | '{' | '}' | '[' | ']'
+ | '(' | ')' | '|' | '\\' | '"' | ':' | '\'');
+fragment RE_esc : '\\' ('.' | '*' | '?' | '+' | '{' | '}' | '[' | ']'
+ | '(' | ')' | '|' | '\\' | '"' | ':' | '\'');
+fragment RE_char : (RE_symbol | RE_esc );
fragment RE_alter : ((RE_char | ('(' RE_expr ')') | RE_chgroup) '|' RE_expr )+;
+
fragment RE_chgroup : '[' RE_char+ ']';
fragment RE_quant : (RE_star | RE_plus | RE_occ) QMARK?;
fragment RE_opt : (RE_char | RE_chgroup | ( '(' RE_expr ')')) '?';
diff --git a/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4 b/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
index 1907773..571cf12 100644
--- a/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
+++ b/src/main/antlr/poliqarpplus/PoliqarpPlusParser.g4
@@ -43,9 +43,9 @@
;
key
-: WORD
+: (WORD
| regex
-| NUMBER
+| NUMBER)
;
foundry
@@ -149,7 +149,7 @@
;
alignment
-: segment? (CARET segment)* CARET?
+: segment? ( (CARET segment)+ | CARET)
;
disjunction
diff --git a/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java b/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java
index 2cb60e3..520d9a4 100644
--- a/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java
+++ b/src/test/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusQueryProcessorTest.java
@@ -155,18 +155,42 @@
assertEquals("type:regex", res.at("/query/wrap/type").asText());
assertEquals("orth", res.at("/query/wrap/layer").asText());
assertEquals("match:eq", res.at("/query/wrap/match").asText());
+ }
+ @Test
+ public void testRegexEscape () throws JsonProcessingException, IOException {
// Escape regex symbols
+ query = "\"a.+?\"";
+ qs.setQuery(query, "poliqarpplus");
+ res = mapper.readTree(qs.toJSON());
+ assertEquals("koral:token", res.at("/query/@type").asText());
+ assertEquals("koral:term", res.at("/query/wrap/@type").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+ assertEquals("match:eq", res.at("/query/wrap/match").asText());
+ assertEquals("a.+?", res.at("/query/wrap/key").asText());
+
query = "\"a\\.\"";
qs.setQuery(query, "poliqarpplus");
res = mapper.readTree(qs.toJSON());
- System.out.println("QUERY IS " + res);
assertEquals("koral:token", res.at("/query/@type").asText());
assertEquals("koral:term", res.at("/query/wrap/@type").asText());
assertEquals("type:regex", res.at("/query/wrap/type").asText());
assertEquals("orth", res.at("/query/wrap/layer").asText());
assertEquals("match:eq", res.at("/query/wrap/match").asText());
assertEquals("a\\.", res.at("/query/wrap/key").asText());
+
+
+ query = "\"a\\.\\+\\?\\\\\"";
+ qs.setQuery(query, "poliqarpplus");
+ res = mapper.readTree(qs.toJSON());
+ assertEquals("koral:token", res.at("/query/@type").asText());
+ assertEquals("koral:term", res.at("/query/wrap/@type").asText());
+ assertEquals("type:regex", res.at("/query/wrap/type").asText());
+ assertEquals("orth", res.at("/query/wrap/layer").asText());
+ assertEquals("match:eq", res.at("/query/wrap/match").asText());
+ assertEquals("a\\.\\+\\?\\\\", res.at("/query/wrap/key").asText());
+
}