Fix focus query (fixes #78)
Change-Id: I6bcfb2304c3d77d362a9be272f3a2d1805cdce72
diff --git a/Changes b/Changes
index c02f7ff..dfc10cd 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,7 @@
+0.59.6 2021-10-26
+ - [bugfix] Fixed skipping of focus spans (fixed #78; margaretha,
+ diewald)
+
0.59.5 2021-09-30
- [bugfix] Fixed candidate settings in token distance spans
(margaretha, diewald)
diff --git a/pom.xml b/pom.xml
index 405301a..d0f48e1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.59.5</version>
+ <version>0.59.6</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
index b341c77..c694f26 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
@@ -14,6 +14,9 @@
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.Bits;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import de.ids_mannheim.korap.constants.RelationDirection;
import de.ids_mannheim.korap.query.SpanFocusQuery;
@@ -45,6 +48,9 @@
private List<Byte> classNumbers;
private SpanQuery query;
+ // Logger
+ private final Logger log = LoggerFactory.getLogger(FocusSpans.class);
+
// This advices the java compiler to ignore all loggings
public static final boolean DEBUG = false;
@@ -99,6 +105,11 @@
matchPayload.clear();
spanId = 0;
CandidateSpan cs;
+
+ if (DEBUG) {
+ log.debug("FirstSpan: ({}) {}-{}", firstSpans.doc(), firstSpans.start(), firstSpans.end());
+ }
+
while (hasMoreSpans || candidates.size() > 0) {
if (isSorted) {
@@ -266,7 +277,7 @@
}
}
}
- if (firstSpans.doc() == target) {
+ if (firstSpans.doc() == target || firstSpans.doc() > target) {
return next();
}
if (firstSpans.doc() < target && firstSpans.skipTo(target)) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
index ab5672b..1068e58 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
@@ -62,7 +62,7 @@
@Override
- public boolean next () throws IOException {
+ public boolean next () throws IOException {
isStartEnumeration = false;
matchPayload.clear();
return advance();
@@ -80,7 +80,6 @@
* @throws IOException
*/
private boolean advance () throws IOException {
-
while (hasMoreSpans || !matchList.isEmpty()
|| !candidateList.isEmpty()) {
@@ -119,7 +118,8 @@
hasMoreFirstSpan = firstSpans.next();
if (hasMoreFirstSpan){
if (DEBUG) {
- log.debug("FirstSpan "+firstSpans.start() +","+firstSpans.end());
+ log.debug("FirstSpan [{}]", firstSpans.toString());
+ log.debug("FirstSpan ({}) {}-{}", firstSpans.doc(), firstSpans.start(), firstSpans.end());
}
setMatchList();
}
@@ -175,7 +175,7 @@
*/
private void searchCandidates () throws IOException {
if (DEBUG) {
- log.debug(candidateList.toString());
+ log.debug("CandidateList: {}", candidateList.toString());
};
Iterator<CandidateSpan> i = candidateList.iterator();
CandidateSpan cs;
@@ -208,7 +208,8 @@
while (hasMoreSpans && candidateListDocNum == secondSpans.doc()) {
if (DEBUG) {
- log.debug("SecondSpan " +secondSpans.start() + "," + secondSpans.end());
+ log.debug("SecondSpan [{}]", secondSpans.toString());
+ log.debug("SecondSpan ({}) {}-{}", secondSpans.doc(), secondSpans.start(), secondSpans.end());
};
if (secondSpans.start() > firstSpans.end()) {
break;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java
index 36284ba..99e2a5a 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java
@@ -173,7 +173,6 @@
assertEquals(1, kr.getTotalResults());
}
-
@Test
public void testFocusInNext () throws QueryException, IOException {
ki = new KrillIndex();
@@ -192,10 +191,11 @@
ki.addDoc(simpleFieldDoc("dbc"));
ki.addDoc(simpleFieldDoc("bca"));
ki.addDoc(simpleFieldDoc("abcd")); // 1xMatch
+
ki.commit();
QueryBuilder kq = new QueryBuilder("base");
-
+
SpanQueryWrapper focus = kq.seq(kq.seg("s:b"),kq.focus(kq.seq(kq.seg("s:a"),kq.seg("s:b"),kq.nr(1, kq.seg("s:c")))));
assertEquals("spanNext(base:s:b, focus(1: spanNext(spanNext(base:s:a, base:s:b), {1: base:s:c})))", focus.toQuery().toString());
@@ -209,8 +209,47 @@
assertEquals(5, kr.getTotalResults());
}
-
-
+
+ @Test
+ public void testFocusInNextBug () throws QueryException, IOException {
+ ki = new KrillIndex();
+
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV("base", "bbc",
+ "[(0-1)s:b|<>:base/s:t$<b>64<i>0<i>3<i>3<b>0|_0$<i>0<i>1]"+
+ "[(1-2)s:b|a:d|a:d|_1$<i>1<i>2]"+
+ "[(2-3)s:c|_2$<i>2<i>3]"
+ );
+ ki.addDoc(fd);
+
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-2");
+ fd.addTV("base", "c",
+ "[(0-1)s:c|<>:base/s:t$<b>64<i>0<i>1<i>1<b>0|a:b|_0$<i>0<i>1]"
+ );
+ ki.addDoc(fd);
+
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-3");
+ fd.addTV("base", "bcc",
+ "[(0-1)s:b|<>:base/s:t$<b>64<i>0<i>3<i>3<b>0|a:b|_0$<i>0<i>1]"+
+ "[(1-2)s:c|a:d|a:b|_1$<i>1<i>2]"+
+ "[(2-3)s:c|_2$<i>2<i>3]"
+ );
+ ki.addDoc(fd);
+
+ ki.commit();
+
+ QueryBuilder kq = new QueryBuilder("base");
+
+ SpanQueryWrapper focus = kq.seq(kq.seg("a:b"),kq.focus(kq.seq(kq.seg("s:b"),kq.nr(1, kq.seg("s:c")))));
+ kr = ki.search(focus.toQuery(), (short) 10);
+
+ assertEquals(1, kr.getTotalResults());
+ }
+
+
public static FieldDocument createFieldDoc () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-0");