escaping implicit regex (denoted by flag /x)
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
index 0c82659..ade5135 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/PoliqarpPlusTree.java
@@ -159,7 +159,7 @@
if (flag.contains("x")) {
term.put("type", "type:regex");
if (!isRegex) {
- key = escapeRegexSpecialChars(key);
+ key = QueryUtils.escapeRegexSpecialChars(key);
}
term.put("key", ".*?"+key+".*?"); // overwrite key
}
@@ -338,10 +338,6 @@
openNodeCats.pop();
}
- private String escapeRegexSpecialChars(String key) {
- return Pattern.quote(key);
- }
-
/**
* Parses a repetition node
* @param node
@@ -451,7 +447,7 @@
else if (flag.contains("I")) term.put("caseInsensitive", false);
if (flag.contains("x")) {
if (!isRegex) {
- key = escapeRegexSpecialChars(key);
+ key = QueryUtils.escapeRegexSpecialChars(key);
}
term.put("key", ".*?"+key+".*?"); // flag 'x' allows submatches: overwrite key with appended .*?
term.put("type", "type:regex");
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
index e7a5a9b..61a97e2 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
@@ -7,6 +7,8 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* @author hanl
@@ -372,6 +374,20 @@
requestMap.put("@context", context);
}
+ public static String escapeRegexSpecialChars(String key) {
+ key.replace("\\", "\\\\");
+ Pattern p = Pattern.compile("\\.|\\^|\\$|\\||\\?|\\*|\\+|\\(|\\)|\\[|\\]|\\{|\\}");
+ Matcher m = p.matcher(key);
+ while (m.find()) {
+ System.out.println(m.group(0));
+ String match = m.group();
+ System.out.println(key);
+ key = m.replaceAll("\\\\"+match);
+ System.out.println(" > "+key);
+ }
+ return key;
+ }
+
}
diff --git a/src/test/java/PoliqarpPlusTreeTest.java b/src/test/java/PoliqarpPlusTreeTest.java
index 491e573..567395c 100644
--- a/src/test/java/PoliqarpPlusTreeTest.java
+++ b/src/test/java/PoliqarpPlusTreeTest.java
@@ -52,7 +52,7 @@
assertEquals(token4.replaceAll(" ", ""), map.replaceAll(" ", ""));
query = "[base=schland/x]";
- String token5 = "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=.*?\\Qschland\\E.*?, match=match:eq, type=type:regex}}";
+ String token5 = "{@type=korap:token, wrap={@type=korap:term, layer=lemma, key=.*?schland.*?, match=match:eq, type=type:regex}}";
ppt = new PoliqarpPlusTree(query);
map = ppt.getRequestMap().get("query").toString();
assertEquals(token5.replaceAll(" ", ""), map.replaceAll(" ", ""));
@@ -90,7 +90,7 @@
assertEquals(re3.replaceAll(" ", ""), map.replaceAll(" ", ""));
query = "schland/x";
- String re4 = "{@type=korap:token, wrap={@type=korap:term, layer=orth, key=.*?\\Qschland\\E.*?, match=match:eq, type=type:regex}}";
+ String re4 = "{@type=korap:token, wrap={@type=korap:term, layer=orth, key=.*?schland.*?, match=match:eq, type=type:regex}}";
ppt = new PoliqarpPlusTree(query);
map = ppt.getRequestMap().get("query").toString();
assertEquals(re4.replaceAll(" ", ""), map.replaceAll(" ", ""));
@@ -100,6 +100,12 @@
ppt = new PoliqarpPlusTree(query);
map = ppt.getRequestMap().get("query").toString();
assertEquals(re5.replaceAll(" ", ""), map.replaceAll(" ", ""));
+
+ query = "z.B./x";
+ String re6 = "{@type=korap:token, wrap={@type=korap:term, layer=orth, key=.*?z\\.B\\..*?, match=match:eq, type=type:regex}}";
+ ppt = new PoliqarpPlusTree(query);
+ map = ppt.getRequestMap().get("query").toString();
+ assertEquals(re6.replaceAll(" ", ""), map.replaceAll(" ", ""));
}
@Test