Correctly keep classes in repetition queries (fixes #59)
Change-Id: I7160ee479f37ca4d3c0c2be13a292b387462f8dc
diff --git a/Changes b/Changes
index f682bf0..d356bd8 100644
--- a/Changes
+++ b/Changes
@@ -22,6 +22,8 @@
(diewald)
- [feature] Uncomment krill.context.max.char property
(diewald; tests AI-assisted Claude Opus 4.6)
+ - [bugfix] Keep classes in repetition queries
+ (diewald; fixes #59; diewald; AI-assisted Claude Opus 4.6)
0.64.6 2026-03-09
- [performance] Add leaf cache. (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
index 2698133..71ff912 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
@@ -188,8 +188,6 @@
if (i == 1) {
try {
matchSpan = startSpan.clone();
- matchSpan.setPayloads(computeMatchPayload(adjacentSpans,
- 0, endIndex - 1));
if (DEBUG) {
log.debug("1. Add span to matchlist: {}-{} at {}",
@@ -210,8 +208,8 @@
startSpan.getStart(),
endSpan.getEnd(),
startSpan.getDoc(),
- computeMatchCost(adjacentSpans, 0, endIndex),
- computeMatchPayload(adjacentSpans, 0, endIndex)
+ computeMatchCost(adjacentSpans, j, endIndex),
+ computeMatchPayload(adjacentSpans, j, endIndex)
);
//System.out.println("c:"+matchSpan.getCost() +" p:"+ matchSpan.getPayloads().size());
//System.out.println(startSpan.getStart() +","+endSpan.getEnd());
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
index c743157..7f4251b 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
@@ -19,6 +19,7 @@
import de.ids_mannheim.korap.query.QueryBuilder;
import de.ids_mannheim.korap.Krill;
import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.query.SpanClassQuery;
import de.ids_mannheim.korap.query.SpanNextQuery;
import de.ids_mannheim.korap.query.SpanRepetitionQuery;
import de.ids_mannheim.korap.response.Match;
@@ -422,6 +423,28 @@
};
+ @Test
+ public void testRepetitionWithClassHighlights () throws IOException {
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(simpleFieldDoc("aab"));
+ ki.commit();
+
+ // spanRepetition({1: a}{1,2}) — repetition wrapping class ({a}+)
+ SpanQuery sq = new SpanRepetitionQuery(
+ new SpanClassQuery(
+ new SpanTermQuery(new Term("base", "s:a")),
+ (byte) 1),
+ 1, 2, true);
+
+ Result kr = ki.search(sq, (short) 10);
+ assertEquals(3, kr.getTotalResults());
+ assertEquals("[[{1:a}]]ab", kr.getMatch(0).getSnippetBrackets());
+ // This still separates the two 'a's
+ assertEquals("[[{1:a}{1:a}]]b", kr.getMatch(1).getSnippetBrackets());
+ assertEquals("a[[{1:a}]]b", kr.getMatch(2).getSnippetBrackets());
+ };
+
+
/**
* This method creates a corpus using fuzzing to
* check for unexpected, failing constellations
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
index 4967eca..5947790 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
@@ -880,4 +880,39 @@
"spanNext(SpanMultiTermQueryWrapper(tokens:/tt/p:NN/), spanExpansion(SpanMultiTermQueryWrapper(tokens:/tt/p:NN/), []{0, 100}, left))",
sqwi.toQuery().toString());
};
+
+ @Test
+ public void queryJSONrepetitionClass () throws QueryException {
+ // {Wald}+ repetition wrapping class
+ SpanQueryWrapper sqwi = getJsonQuery(getClass()
+ .getResource("/queries/bugs/repetition_class.jsonld").getFile());
+
+ assertEquals(
+ "spanRepetition({1: tokens:s:Wald}{1,100})",
+ sqwi.toQuery().toString());
+ };
+
+
+ @Test
+ public void queryJSONclassRepetition () throws QueryException {
+ // {Wald+} class wrapping repetition
+ SpanQueryWrapper sqwi = getJsonQuery(getClass()
+ .getResource("/queries/bugs/class_repetition.jsonld").getFile());
+
+ assertEquals(
+ "{1: spanRepetition(tokens:s:Wald{1,100})}",
+ sqwi.toQuery().toString());
+ };
+
+
+ @Test
+ public void queryJSONrepetitionClassWithBounds () throws QueryException {
+ // {Wald}{2,3} repetition wrapping class with explicit bounds
+ SpanQueryWrapper sqwi = getJsonQuery(getClass()
+ .getResource("/queries/bugs/repetition_class_2.jsonld").getFile());
+
+ assertEquals(
+ "spanRepetition({2: tokens:s:Wald}{2,3})",
+ sqwi.toQuery().toString());
+ };
};
diff --git a/src/test/resources/queries/bugs/class_repetition.jsonld b/src/test/resources/queries/bugs/class_repetition.jsonld
new file mode 100644
index 0000000..40d5f23
--- /dev/null
+++ b/src/test/resources/queries/bugs/class_repetition.jsonld
@@ -0,0 +1,28 @@
+{
+ "query": {
+ "@type": "koral:group",
+ "classOut": 1,
+ "operands": [
+ {
+ "@type": "koral:group",
+ "boundary": {
+ "@type": "koral:boundary",
+ "min": 1
+ },
+ "operands": [
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "Wald",
+ "layer": "orth",
+ "match": "match:eq"
+ }
+ }
+ ],
+ "operation": "operation:repetition"
+ }
+ ],
+ "operation": "operation:class"
+ }
+}
diff --git a/src/test/resources/queries/bugs/repetition_class.jsonld b/src/test/resources/queries/bugs/repetition_class.jsonld
new file mode 100644
index 0000000..e4a695d
--- /dev/null
+++ b/src/test/resources/queries/bugs/repetition_class.jsonld
@@ -0,0 +1,28 @@
+{
+ "query": {
+ "@type": "koral:group",
+ "boundary": {
+ "@type": "koral:boundary",
+ "min": 1
+ },
+ "operands": [
+ {
+ "@type": "koral:group",
+ "classOut": 1,
+ "operands": [
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "Wald",
+ "layer": "orth",
+ "match": "match:eq"
+ }
+ }
+ ],
+ "operation": "operation:class"
+ }
+ ],
+ "operation": "operation:repetition"
+ }
+}
diff --git a/src/test/resources/queries/bugs/repetition_class_2.jsonld b/src/test/resources/queries/bugs/repetition_class_2.jsonld
new file mode 100644
index 0000000..9aa9278
--- /dev/null
+++ b/src/test/resources/queries/bugs/repetition_class_2.jsonld
@@ -0,0 +1,29 @@
+{
+ "query": {
+ "@type": "koral:group",
+ "boundary": {
+ "@type": "koral:boundary",
+ "min": 2,
+ "max": 3
+ },
+ "operands": [
+ {
+ "@type": "koral:group",
+ "classOut": 2,
+ "operands": [
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "Wald",
+ "layer": "orth",
+ "match": "match:eq"
+ }
+ }
+ ],
+ "operation": "operation:class"
+ }
+ ],
+ "operation": "operation:repetition"
+ }
+}