Do not allow optional Lucene Regex Queries
Change-Id: Iceb8c15eb9767969c9e0401bc0d962d637e47bae
diff --git a/Changes b/Changes
index 8a924fa..a3e09db 100644
--- a/Changes
+++ b/Changes
@@ -1,8 +1,9 @@
-0.64.5 2025-11-21
+0.64.5 2025-12-03
- [maintenance] Update to Java 21 (diewald)
- [enhancement] Alter vcNamePattern to allow system VC names with less
than 3 characters (margaretha)
- [bugfix] Making totalResult counter long to prevent overflows (diewald)
+ - [bugfix] Do not allow optional Lucene Regex extensions (diewald)
0.64.4 2025-09-17
- [feature] Added --progress option to Krill-Indexer (kupietz)
diff --git a/src/main/java/de/ids_mannheim/korap/KrillQuery.java b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
index 78aab5a..82d3086 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
@@ -6,7 +6,6 @@
import java.util.List;
import java.util.Iterator;
-import org.apache.lucene.search.RegexpQuery;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
index bc3c7ff..4464302 100644
--- a/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
+++ b/src/main/java/de/ids_mannheim/korap/collection/CollectionBuilder.java
@@ -11,6 +11,7 @@
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.RegexpQuery;
+import org.apache.lucene.util.automaton.RegExp;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -215,7 +216,7 @@
if (this.regex)
return new QueryWrapperFilter(
new RegexpQuery(new org.apache.lucene.index.Term(
- this.field, this.term)));
+ this.field, this.term), RegExp.NONE));
// Simple term
return new TermsFilter(
diff --git a/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java b/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java
index ab69754..7eb75a6 100644
--- a/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java
+++ b/src/main/java/de/ids_mannheim/korap/query/QueryBuilder.java
@@ -34,7 +34,7 @@
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
-
+
// <legacy>
public static final byte OVERLAP = SpanWithinQuery.OVERLAP,
REAL_OVERLAP = SpanWithinQuery.REAL_OVERLAP,
@@ -69,7 +69,7 @@
* @return A {@link SpanRegexQueryWrapper} object.
*/
public SpanRegexQueryWrapper re (String re) {
- return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL, false);
+ return new SpanRegexQueryWrapper(this.field, re);
};
@@ -154,7 +154,7 @@
* @return A {@link SpanRegexQueryWrapper} object.
*/
public SpanRegexQueryWrapper re (String re, boolean caseinsensitive) {
- return new SpanRegexQueryWrapper(this.field, re, RegExp.ALL,
+ return new SpanRegexQueryWrapper(this.field, re,
caseinsensitive);
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRegexQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRegexQueryWrapper.java
index 9f777c0..fad96d9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRegexQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanRegexQueryWrapper.java
@@ -14,8 +14,10 @@
private SpanQuery query;
public String error = null;
+ private static int regexFlag = RegExp.NONE;
+
public SpanRegexQueryWrapper (String field, String re) {
- this(field, re, RegExp.ALL, false);
+ this(field, re, regexFlag, false);
};
@@ -26,7 +28,7 @@
public SpanRegexQueryWrapper (String field, String re,
boolean caseinsensitive) {
- this(field, re, RegExp.ALL, caseinsensitive);
+ this(field, re, regexFlag, caseinsensitive);
};
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
index bc113ef..5877547 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestKrillCollectionIndex.java
@@ -172,6 +172,10 @@
kcn.fromBuilder(cb.re("author", ".*an.*"));
assertEquals(2, kcn.docCount());
+ // No optional regex support enabled
+ kcn.fromBuilder(cb.re("author", "@an@"));
+ assertEquals(0, kcn.docCount());
+
// Kultur & Reisen,
// Reisen & Finanzen,
// Nachricht & Kultur & Reisen
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
index 477be58..3a10f1b 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestIndex.java
@@ -318,6 +318,13 @@
srquery = new RegexpQuery(new Term("text", "s:.*ng.*"));
assertEquals(2, searcher.search(srquery, 10).totalHits);
+ // All docs containing "ng"/x (optional Regex enabled by default)
+ srquery = new RegexpQuery(new Term("text", "s:@ng@"));
+ assertEquals(2, searcher.search(srquery, 10).totalHits);
+
+ // All docs containing "@ng@" (no optional query operators enabled)
+ ssrquery = new SpanRegexQueryWrapper("text", "s:@ng@");
+ assertEquals(0, searcher.search(ssrquery.toQuery(), 10).totalHits);
// Check http://comments.gmane.org/gmane.comp.jakarta.lucene.user/52283
// for Carstens question on wildcards