Fix regex with non-enclosed alternatives (fixes #243)
Change-Id: I3c26f8ae32c210cd1de9756ca5bfe2d90032bda5
diff --git a/Changes b/Changes
index 56eb739..a522f7a 100644
--- a/Changes
+++ b/Changes
@@ -8,6 +8,8 @@
- [bugfix] Fix StackOverflowError in ExpandedSpans by turning
a recursive call into a loop
(fixes #121; diewald; AI-assisted Claude Opus 4.6)
+ - [bugfix] Fix regex alternation in non-enclosed groups
+ (fixes #243; diewald; AI-assisted Claude Opus 4.6)
0.64.6 2026-03-09
- [performance] Add leaf cache. (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/KrillQuery.java b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
index c71eb1a..ae21a82 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
@@ -1396,6 +1396,12 @@
return new SpanRepetitionQueryWrapper();
};
+ // The regex contains a pipe and needs to be wrapped in a group
+ if (offset > 0 && v.length() > offset
+ && v.substring(offset).contains("|")) {
+ v = v.substring(0, offset) + "(" + v.substring(offset) + ")";
+ }
+
SpanRegexQueryWrapper srqw = qb.re(v, isCaseInsensitive);
if (srqw.error != null) {
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
index a9758f0..4967eca 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
@@ -735,6 +735,19 @@
};
@Test
+ public void queryJSONregexAlternation () throws QueryException {
+ // /der|die/
+ String json = getJsonString(getClass()
+ .getResource("/queries/segment/regex-alternation.jsonld")
+ .getFile());
+ KrillQuery kq = new KrillQuery("tokens");
+
+ assertEquals(
+ "SpanMultiTermQueryWrapper(tokens:/s:(der|die)/)",
+ kq.fromKoral(json).toQuery().toString());
+ };
+
+ @Test
public void queryJSONregexFail () {
// "Leserin.{,3}"
String json = getJsonString(getClass()
diff --git a/src/test/resources/queries/segment/regex-alternation.jsonld b/src/test/resources/queries/segment/regex-alternation.jsonld
new file mode 100644
index 0000000..9fb207e
--- /dev/null
+++ b/src/test/resources/queries/segment/regex-alternation.jsonld
@@ -0,0 +1,13 @@
+{
+ "query":{
+ "@type":"koral:token",
+ "wrap":{
+ "@type":"koral:term",
+ "match":"match:eq",
+ "type":"type:regex",
+ "layer":"orth",
+ "key":"der|die"
+ }
+ },
+ "@context":"http://korap.ids-mannheim.de/ns/koral/0.3/context.jsonld"
+}