Support regular expressions in attribute queries (fixes #80)
Change-Id: I34127945e3b4235857f31b1ac975361fbe3a991c
diff --git a/Changes b/Changes
index 5a580b1..0a56705 100644
--- a/Changes
+++ b/Changes
@@ -3,6 +3,8 @@
(diewald; fixes #177; diewald; AI-assisted Claude Opus 4.6)
- [bugfix] Correctly handle foundry and layer in attribute groups
(diewald; AI-assisted Claude Opus 4.6)
+ - [bugfix] Support regular expressions in attribute queries
+ (fixes #80; diewald; AI-assisted Claude Opus 4.6)
0.64.6 2026-03-09
- [performance] Add leaf cache. (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
index 0ae6922..a4b2123 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
@@ -7,7 +7,6 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
@@ -50,19 +49,19 @@
/**
* Constructs a SpanAttributeQuery based on the specified
- * {@link SpanTermQuery} and set whether payloads are to be
+ * {@link SpanQuery} and set whether payloads are to be
* collected or
* not.
*
* @param firstClause
- * a {@link SpanTermQuery}
+ * a {@link SpanQuery}
* @param collectPayloads
* a boolean flag representing the value
* <code>true</code> if payloads are to be collected,
* otherwise
* <code>false</code>.
*/
- public SpanAttributeQuery (SpanTermQuery firstClause,
+ public SpanAttributeQuery (SpanQuery firstClause,
boolean collectPayloads) {
super(firstClause, collectPayloads);
}
@@ -70,7 +69,7 @@
/**
* Constructs a SpanAttributeQuery based on the specified
- * {@link SpanTermQuery}, which is also marked for
+ * {@link SpanQuery}, which is also marked for
* negation/omission when
* matching to element/relation spans. Additionally set whether
* payloads are
@@ -90,7 +89,7 @@
* otherwise
* <code>false</code>.
*/
- public SpanAttributeQuery (SpanTermQuery firstClause, boolean negation,
+ public SpanAttributeQuery (SpanQuery firstClause, boolean negation,
boolean collectPayloads) {
super(firstClause, collectPayloads);
this.negation = negation;
@@ -101,7 +100,7 @@
@Override
public SimpleSpanQuery clone () {
SpanAttributeQuery sq = new SpanAttributeQuery(
- (SpanTermQuery) this.firstClause.clone(), this.negation,
+ (SpanQuery) this.firstClause.clone(), this.negation,
this.collectPayloads);
sq.setBoost(getBoost());
return sq;
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java
index 616fad5..fe4df54 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanAttributeQueryWrapper.java
@@ -1,7 +1,6 @@
package de.ids_mannheim.korap.query.wrap;
import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
import de.ids_mannheim.korap.query.SpanAttributeQuery;
import de.ids_mannheim.korap.util.QueryException;
@@ -47,12 +46,6 @@
return null;
}
- if (sq instanceof SpanTermQuery) {
- return new SpanAttributeQuery((SpanTermQuery) sq, isNegative, true);
- }
- else {
- throw new QueryException(
- "SpanAttributeQuery only supports SpanTermQuery.");
- }
+ return new SpanAttributeQuery(sq, isNegative, true);
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java
index 70e816a..b4bc359 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanWithAttributeQueryWrapper.java
@@ -4,13 +4,11 @@
import java.util.List;
import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
import de.ids_mannheim.korap.query.SimpleSpanQuery;
import de.ids_mannheim.korap.query.SpanAttributeQuery;
import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
import de.ids_mannheim.korap.util.QueryException;
-import de.ids_mannheim.korap.util.StatusCodes;
/**
* No optimization using expansion
@@ -178,14 +176,8 @@
if (sq != null) {
if (sq instanceof SpanAttributeQuery)
return (SpanAttributeQuery) sq;
- if (sq instanceof SpanTermQuery) {
- return new SpanAttributeQuery((SpanTermQuery) sq,
- attrQueryWrapper.isNegative, true);
- }
- else {
- throw new QueryException(StatusCodes.UNSUPPORTED_QUERY,
- "SpanAttributeQuery only supports SpanTermQuery.");
- }
+ return new SpanAttributeQuery(sq,
+ attrQueryWrapper.isNegative, true);
}
return null;
}
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestSpanWithAttributeJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestSpanWithAttributeJSON.java
index 877a37f..73b21a0 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestSpanWithAttributeJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestSpanWithAttributeJSON.java
@@ -9,7 +9,6 @@
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
import de.ids_mannheim.korap.util.QueryException;
-import de.ids_mannheim.korap.util.StatusCodes;
public class TestSpanWithAttributeJSON {
@Test
@@ -19,13 +18,10 @@
"/queries/attribute/element-regex-attribute.jsonld")
.getFile();
SpanQueryWrapper sqwi = getJsonQuery(filepath);
-
- QueryException exception = assertThrows(QueryException.class, () -> {
- sqwi.toQuery();
- });
- assertEquals("SpanAttributeQuery only supports SpanTermQuery.",
- exception.getMessage());
- assertEquals(StatusCodes.UNSUPPORTED_QUERY, exception.getErrorCode());
+ SpanQuery sq = sqwi.toQuery();
+ assertEquals(
+ "spanElementWithAttribute(<tokens:head />, spanAttribute(SpanMultiTermQueryWrapper(tokens:/@:type:top.*/)))",
+ sq.toString());
}
@Test