Merge "Use character classes instead of alternations in regexes"
diff --git a/Changes b/Changes
index a09dc54..8cfb3d1 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,8 @@
-0.36 2020-06-16
+0.36 2020-07-15
- [security] Upgraded version of Google Guava
(CVE-2018-10237; diewald)
+ - Optimize regular expressions to use character classes
+ instead of alternations (kupietz)
0.35 2019-11-12
- Added a method to add warnings in QuerySerializer (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
index f2ed6e1..69a6293 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/Cosmas2QueryProcessor.java
@@ -800,7 +800,7 @@
private void processOPLABEL (Tree node) {
// Step I: create element
- String key = node.getChild(0).toStringTree().replaceAll("<|>", "");
+ String key = node.getChild(0).toStringTree().replaceAll("[<>]", "");
Map<String, Object> elem = KoralObjectGenerator.makeSpan(key);
// Step II: decide where to put
putIntoSuperObject(elem);
@@ -1539,7 +1539,7 @@
* Normalises position operators to equivalents using #BED
*/
private String rewritePositionQuery (String q) {
- Pattern p = Pattern.compile("(\\w+):((\\+|-)?(sa|se|pa|pe|ta|te),?)+");
+ Pattern p = Pattern.compile("(\\w+):(([+\\-])?(sa|se|pa|pe|ta|te),?)+");
Matcher m = p.matcher(q);
String rewrittenQuery = q;
@@ -1547,7 +1547,7 @@
String match = m.group();
String conditionsString = match.split(":")[1];
Pattern conditionPattern =
- Pattern.compile("(\\+|-)?(sa|se|pa|pe|ta|te)");
+ Pattern.compile("([+\\-])?(sa|se|pa|pe|ta|te)");
Matcher conditionMatcher =
conditionPattern.matcher(conditionsString);
StringBuilder replacement = new StringBuilder("#BED(" + m.group(1) + " , ");
diff --git a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
index 5845f70..0bfdc73 100644
--- a/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
+++ b/src/main/java/de/ids_mannheim/korap/query/serialize/QueryUtils.java
@@ -210,7 +210,7 @@
public static String escapeRegexSpecialChars (String key) {
key.replace("\\", "\\\\");
Pattern p = Pattern
- .compile("\\.|\\^|\\$|\\||\\?|\\*|\\+|\\(|\\)|\\[|\\]|\\{|\\}");
+ .compile("[.^$|?*+()\\[\\]{}]");
Matcher m = p.matcher(key);
while (m.find()) {
String match = m.group();