Turn recursive call into loop to avoid stack overflow error (fixes #121)
Change-Id: I889a8eb7b02f76c46ce53afd7a81b2473dfee2c4
diff --git a/Changes b/Changes
index 0a56705..56eb739 100644
--- a/Changes
+++ b/Changes
@@ -5,6 +5,9 @@
(diewald; AI-assisted Claude Opus 4.6)
- [bugfix] Support regular expressions in attribute queries
(fixes #80; diewald; AI-assisted Claude Opus 4.6)
+ - [bugfix] Fix StackOverflowError in ExpandedSpans by turning
+ a recursive call into a loop
+ (fixes #121; diewald; AI-assisted Claude Opus 4.6)
0.64.6 2026-03-09
- [performance] Add leaf cache. (diewald)
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
index cb2bbd7..4349000 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
@@ -118,53 +118,63 @@
CandidateSpan cs;
int counter, start, end = 0;
+ boolean continueCandidateLoop = true;
+
if (direction < 0) { // left
- counter = max;
- while (counter >= min) {
- start = firstSpans.start() - counter;
- if (start >= 0) {
- cs = new CandidateSpan(start, firstSpans.end(),
- firstSpans.doc(), firstSpans.cost(),
- createPayloads(start, firstSpans.start()));
+ while (continueCandidateLoop) {
+ continueCandidateLoop = false;
- candidateSpans.add(cs);
+ counter = max;
+ while (counter >= min) {
+ start = firstSpans.start() - counter;
+ if (start >= 0) {
+ cs = new CandidateSpan(start, firstSpans.end(),
+ firstSpans.doc(), firstSpans.cost(),
+ createPayloads(start, firstSpans.start()));
+
+ candidateSpans.add(cs);
+ }
+ counter--;
}
- counter--;
- }
- int lastPosition = firstSpans.start();
- if (hasMoreSpans && (hasMoreSpans = firstSpans.next())) {
- start = Math.max(0, firstSpans.start() - max);
- if (DEBUG) {
- log.debug("next candidate start: " + start + ", lastPosition "
- + lastPosition);
- };
- if (start <= lastPosition) {
- setCandidateList();
+ int lastPosition = firstSpans.start();
+ if (hasMoreSpans && (hasMoreSpans = firstSpans.next())) {
+ start = Math.max(0, firstSpans.start() - max);
+ if (DEBUG) {
+ log.debug("next candidate start: " + start + ", lastPosition "
+ + lastPosition);
+ };
+ if (start <= lastPosition) {
+ continueCandidateLoop = true;
+ }
}
}
}
else {
- counter = min;
- while (counter <= max) {
- // TODO: How do I know if the end is already too far
- // (over the end of the doc)?
- end = firstSpans.end() + counter;
- cs = new CandidateSpan(firstSpans.start(), end,
- firstSpans.doc(), firstSpans.cost(),
- createPayloads(firstSpans.end(), end));
- candidateSpans.add(cs);
- counter++;
- }
+ while (continueCandidateLoop) {
+ continueCandidateLoop = false;
- int lastPosition = end;
- if (hasMoreSpans && (hasMoreSpans = firstSpans.next())) {
- if (DEBUG) {
- log.debug("next candidate start: " + firstSpans.start()
- + ", lastPosition " + lastPosition);
- };
- if (firstSpans.start() <= lastPosition) {
- setCandidateList();
+ counter = min;
+ while (counter <= max) {
+ // TODO: How do I know if the end is already too far
+ // (over the end of the doc)?
+ end = firstSpans.end() + counter;
+ cs = new CandidateSpan(firstSpans.start(), end,
+ firstSpans.doc(), firstSpans.cost(),
+ createPayloads(firstSpans.end(), end));
+ candidateSpans.add(cs);
+ counter++;
+ }
+
+ int lastPosition = end;
+ if (hasMoreSpans && (hasMoreSpans = firstSpans.next())) {
+ if (DEBUG) {
+ log.debug("next candidate start: " + firstSpans.start()
+ + ", lastPosition " + lastPosition);
+ };
+ if (firstSpans.start() <= lastPosition) {
+ continueCandidateLoop = true;
+ }
}
}
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
index 2f0fe80..dde8645 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
@@ -3,6 +3,8 @@
import static de.ids_mannheim.korap.TestSimple.getJsonString;
import static de.ids_mannheim.korap.TestSimple.simpleFieldDoc;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Arrays;
@@ -731,7 +733,51 @@
assertEquals(14, kr.getTotalResults());
}
-
+
+ // Issue #121:
+ // StackOverflowError in ExpandedSpans.setCandidateList()
+ // when searching [tt/p="NN"] []* [tt/p="NN"] on a large document.
+ @Test
+ public void testExpansionOverflowBug () throws IOException, QueryException {
+
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-overflow");
+
+ int tokenCount = 5000;
+ StringBuilder surface = new StringBuilder();
+ StringBuilder annotation = new StringBuilder();
+
+ for (int i = 0; i < tokenCount; i++) {
+ String word = "tok" + i;
+ surface.append(word);
+ annotation.append("[(").append(i).append("-").append(i + 1).append(")");
+ annotation.append("s:").append(word);
+ annotation.append("|tt/p:NN");
+ if (i == 0) {
+ annotation.append("|<>:base/s:t$<b>64<i>0<i>")
+ .append(tokenCount).append("<i>")
+ .append(tokenCount).append("<b>0");
+ }
+ annotation.append("|_").append(i).append("$<i>").append(i)
+ .append("<i>").append(i + 1);
+ annotation.append("]");
+ }
+
+ fd.addTV("tokens", surface.toString(), annotation.toString());
+
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(fd);
+ ki.commit();
+
+ String json = getJsonString(getClass()
+ .getResource("/queries/bugs/expansion_overflow.jsonld").getFile());
+ Krill krill = new Krill(json);
+
+ Result kr = krill.apply(ki);
+ assertFalse("Search should not produce errors", kr.hasErrors());
+ assertEquals("Should find matches", 499849, kr.getTotalResults());
+ }
+
private FieldDocument createFieldDoc6 () {
FieldDocument fd = new FieldDocument();
diff --git a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
index 1d40690..a9758f0 100644
--- a/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
+++ b/src/test/java/de/ids_mannheim/korap/query/TestKrillQueryJSON.java
@@ -856,4 +856,15 @@
assertEquals("Query type is not supported", e.getMessage());
};
};
+
+ @Test
+ public void queryJSONexpansionOverflow () throws QueryException {
+ SpanQueryWrapper sqwi = getJsonQuery(
+ getClass().getResource("/queries/bugs/expansion_overflow.jsonld").getFile());
+
+ // [tt/p="NN"] []* [tt/p="NN"]
+ assertEquals(
+ "spanNext(SpanMultiTermQueryWrapper(tokens:/tt/p:NN/), spanExpansion(SpanMultiTermQueryWrapper(tokens:/tt/p:NN/), []{0, 100}, left))",
+ sqwi.toQuery().toString());
+ };
};
diff --git a/src/test/resources/queries/bugs/expansion_overflow.jsonld b/src/test/resources/queries/bugs/expansion_overflow.jsonld
new file mode 100644
index 0000000..ec001cd
--- /dev/null
+++ b/src/test/resources/queries/bugs/expansion_overflow.jsonld
@@ -0,0 +1,44 @@
+{
+ "query": {
+ "operands": [
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "foundry": "tt",
+ "@type": "koral:term",
+ "match": "match:eq",
+ "type": "type:regex",
+ "key": "NN",
+ "layer": "p"
+ }
+ },
+ {
+ "operands": [
+ {
+ "@type": "koral:token"
+ }
+ ],
+ "boundary": {
+ "min": 0,
+ "@type": "koral:boundary"
+ },
+ "@type": "koral:group",
+ "operation": "operation:repetition"
+ },
+ {
+ "@type": "koral:token",
+ "wrap": {
+ "foundry": "tt",
+ "@type": "koral:term",
+ "match": "match:eq",
+ "type": "type:regex",
+ "key": "NN",
+ "layer": "p"
+ }
+ }
+ ],
+ "@type": "koral:group",
+ "operation": "operation:sequence"
+ },
+ "@context": "http://korap.ids-mannheim.de/ns/koral/0.3/context.jsonld"
+}