Fixed ensuring same documents of spans (solved #87)
Change-Id: I603db8dce99a36a430a47ca6188457573c7a096e
diff --git a/Changes b/Changes
index bf020da..74018eb 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,7 @@
+0.61.1 2023-02-14
+ - [bugfix] Fixed ensuring same documents of spans (solved #87,
+ margaretha)
+
0.61.0 2022-11-16
- [cleanup] Remove ehcache from dependencies (diewald)
- [security] Update to Java 11 for consistency.
diff --git a/pom.xml b/pom.xml
index 22dc3ed..3198f40 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.61.0</version>
+ <version>0.61.1</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 7f807fa..cf2187d 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -3,12 +3,9 @@
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
// Java core classes
import java.util.ArrayList;
-import java.util.Base64;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -58,7 +55,6 @@
import com.fasterxml.jackson.databind.ObjectMapper;
-import de.ids_mannheim.korap.cache.VirtualCorpusCache;
// Krill classes
import de.ids_mannheim.korap.index.FieldDocument;
import de.ids_mannheim.korap.index.KeywordAnalyzer;
@@ -77,7 +73,6 @@
import de.ids_mannheim.korap.util.KrillDate;
import de.ids_mannheim.korap.util.KrillProperties;
import de.ids_mannheim.korap.util.QueryException;
-import de.ids_mannheim.korap.util.StatusCodes;
/**
* <p>KrillIndex implements a simple API for searching in and writing
@@ -1643,13 +1638,19 @@
catch (QueryException e) {
kr.addError(e.getErrorCode(),e.getLocalizedMessage());
- log.warn(e.getLocalizedMessage());
- }
- catch (Exception e) {
- // 104 ILLEGAL_ARGUMENT, see Kustvakt core
- // de.ids_mannheim.korap.exceptions.StatusCodes.ILLEGAL_ARGUMENT
- kr.addError(104,e.getLocalizedMessage());
- log.warn(e.getLocalizedMessage());
+ log.warn(e.getLocalizedMessage());
+ }
+ catch (IllegalArgumentException e) {
+ // 104 ILLEGAL_ARGUMENT, see Kustvakt core
+ // de.ids_mannheim.korap.exceptions.StatusCodes.ILLEGAL_ARGUMENT
+ kr.addError(104,e.getLocalizedMessage());
+ log.warn(e.getMessage());
+ }
+ catch (Exception e) {
+ // 100 GENERAL ERROR, see Kustvakt core StatusCodes
+ kr.addError(100,e.getMessage());
+ log.error(e.getMessage());
+ e.printStackTrace();
}
// Stop timer thread
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java
index 9930843..60a0a19 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java
@@ -81,7 +81,8 @@
@Override
protected void setCandidateList () throws IOException {
- if (hasMoreSpans && hasMoreElements) {
+ if (hasMoreSpans && hasMoreElements
+ && (hasMoreFirstSpans || !candidateList.isEmpty())) {
if (candidateListDocNum == elements.doc()
&& candidateListDocNum == secondSpans.doc()) {
candidateListIndex = -1;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index 2bf24de..c01de91 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -81,7 +81,7 @@
* @return true iff such a document exists.
*/
protected boolean ensureSameDoc (Spans x, Spans y) throws IOException {
- while (x.doc() != y.doc()) {
+ while (hasMoreSpans && x.doc() != y.doc()) {
if (x.doc() < y.doc()) {
if (!x.skipTo(y.doc())) {
hasMoreSpans = false;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
index 7bd2bb8..ea5aea5 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
@@ -40,14 +40,14 @@
private FieldDocument createFieldDoc0 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-0");
- fd.addTV("tokens", "bbcbbb",
- "[(0-1)s:b|s:c|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]"
- + "[(1-2)s:b|_2$<i>1<i>2]"
- + "[(2-3)s:c|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>3<b>0]"
- + "[(3-4)s:b|_4$<i>3<i>4|<>:s$<b>64<i>3<i>4<i>4<b>0]"
- + "[(4-5)s:b|_5$<i>4<i>5|<>:s$<b>64<i>4<i>5<i>5<b>0]"
- + "[(5-6)s:b|_6$<i>5<i>6]"
- + "[(6-7)s:c|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0]");
+ fd.addTV("tokens", "bbcbbbc",
+ "[(0-1)s:b|s:c|_0$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>1<b>0]"
+ + "[(1-2)s:b|_1$<i>1<i>2]"
+ + "[(2-3)s:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>3<b>0]"
+ + "[(3-4)s:b|_3$<i>3<i>4|<>:s$<b>64<i>3<i>4<i>4<b>0]"
+ + "[(4-5)s:b|_4$<i>4<i>5|<>:s$<b>64<i>4<i>5<i>5<b>0]"
+ + "[(5-6)s:b|_5$<i>5<i>6]"
+ + "[(6-7)s:c|_6$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0]");
return fd;
}
@@ -55,12 +55,12 @@
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-1");
fd.addTV("tokens", "ecebdc",
- "[(0-1)s:e|_1$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>6<i>6<b>0|<>:s$<b>64<i>0<i>2<i>1<b>0]"
- + "[(1-2)s:c|s:b|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]"
- + "[(2-3)s:e|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>3<b>0]"
- + "[(3-4)s:b|_4$<i>3<i>4|<>:s$<b>64<i>3<i>4<i>4<b>0]"
- + "[(4-5)s:c|_5$<i>4<i>5|<>:s$<b>64<i>4<i>5<i>5<b>0]"
- + "[(5-6)s:c|_6$<i>5<i>6|<>:s$<b>64<i>5<i>6<i>6<b>0]");
+ "[(0-1)s:e|_0$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>6<i>6<b>0|<>:s$<b>64<i>0<i>1<i>1<b>0]"
+ + "[(1-2)s:c|s:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]"
+ + "[(2-3)s:e|_2$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>3<b>0]"
+ + "[(3-4)s:b|_3$<i>3<i>4|<>:s$<b>64<i>3<i>4<i>4<b>0]"
+ + "[(4-5)s:c|_4$<i>4<i>5|<>:s$<b>64<i>4<i>5<i>5<b>0]"
+ + "[(5-6)s:c|_5$<i>5<i>6|<>:s$<b>64<i>5<i>6<i>6<b>0]");
return fd;
}
@@ -68,12 +68,12 @@
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-2");
fd.addTV("tokens", "bbbddd",
- "[(0-1)s:b|_1$<i>0<i>1|<>:p$<b>64<i>0<i>2<i>1<b>0]"
- + "[(1-2)s:b|_2$<i>1<i>2]"
- + "[(2-3)s:b|_3$<i>2<i>3|<>:p$<b>64<i>2<i>3<i>3<b>0]"
- + "[(3-4)s:d|_4$<i>3<i>4|<>:p$<b>64<i>3<i>4<i>4<b>0]"
- + "[(4-5)s:d|_5$<i>4<i>5|<>:p$<b>64<i>4<i>5<i>5<b>0]"
- + "[(5-6)s:d|_6$<i>5<i>6]");
+ "[(0-1)s:b|_0$<i>0<i>1|<>:p$<b>64<i>0<i>1<i>1<b>0]"
+ + "[(1-2)s:b|_1$<i>1<i>2]"
+ + "[(2-3)s:b|_2$<i>2<i>3|<>:p$<b>64<i>2<i>3<i>3<b>0]"
+ + "[(3-4)s:d|_3$<i>3<i>4|<>:p$<b>64<i>3<i>4<i>4<b>0]"
+ + "[(4-5)s:d|_4$<i>4<i>5|<>:p$<b>64<i>4<i>5<i>5<b>0]"
+ + "[(5-6)s:d|_5$<i>5<i>6]");
return fd;
}
@@ -81,12 +81,12 @@
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-3");
fd.addTV("tokens", "bdbcdd",
- "[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>1<b>0]"
- + "[(1-2)s:d|_2$<i>1<i>2]"
- + "[(2-3)s:b|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>3<b>0]"
- + "[(3-4)s:c|_4$<i>3<i>4|<>:s$<b>64<i>3<i>5<i>5<b>0]"
- + "[(4-5)s:d|_5$<i>4<i>5|<>:s$<b>64<i>4<i>5<i>5<b>0]"
- + "[(5-6)s:d|_6$<i>5<i>6]");
+ "[(0-1)s:b|_0$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0]"
+ + "[(1-2)s:d|_1$<i>1<i>2]"
+ + "[(2-3)s:b|_2$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>3<b>0]"
+ + "[(3-4)s:c|_3$<i>3<i>4|<>:s$<b>64<i>3<i>5<i>5<b>0]"
+ + "[(4-5)s:d|_4$<i>4<i>5|<>:s$<b>64<i>4<i>5<i>5<b>0]"
+ + "[(5-6)s:d|_5$<i>5<i>6]");
return fd;
}
@@ -94,12 +94,12 @@
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-4");
fd.addTV("tokens", "bdbcdd",
- "[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>1<b>0]"
- + "[(1-2)s:d|_2$<i>1<i>2]"
- + "[(2-3)s:c|s:b|_3$<i>2<i>3|<>:s$<b>64<i>2<i>5<i>3<b>0]"
- + "[(3-4)s:c|_4$<i>3<i>4|<>:s$<b>64<i>3<i>5<i>4<b>0]"
- + "[(4-5)s:d|_5$<i>4<i>5|<>:s$<b>64<i>4<i>5<i>5<b>0]"
- + "[(5-6)s:d|_6$<i>5<i>6]");
+ "[(0-1)s:b|_0$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]"
+ + "[(1-2)s:d|_1$<i>1<i>2]"
+ + "[(2-3)s:c|s:b|_2$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>3<b>0]"
+ + "[(3-4)s:c|_3$<i>3<i>4|<>:s$<b>64<i>3<i>5<i>5<b>0]"
+ + "[(4-5)s:d|_4$<i>4<i>5|<>:s$<b>64<i>4<i>5<i>5<b>0]"
+ + "[(5-6)s:d|_5$<i>5<i>6]");
return fd;
}
@@ -107,12 +107,37 @@
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-5");
fd.addTV("tokens", "edef",
- "[(0-1)s:e|_1$<i>0<i>1]"
- + "[(1-2)s:d|_2$<i>1<i>2]"
- + "[(2-3)s:e|_3$<i>2<i>3]"
- + "[(3-4)s:f|_4$<i>3<i>4]");
+ "[(0-1)s:e|_0$<i>0<i>1]"
+ + "[(1-2)s:d|_1$<i>1<i>2]"
+ + "[(2-3)s:e|_2$<i>2<i>3]"
+ + "[(3-4)s:f|_3$<i>3<i>4]");
return fd;
}
+
+ private FieldDocument createFieldDoc6 () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-6");
+ fd.addTV("tokens", "bebcee",
+ "[(0-1)s:b|_0$<i>0<i>1|<>:s$<b>64<i>0<i>4<i>4<b>0]"
+ + "[(1-2)s:e|_1$<i>1<i>2]"
+ + "[(2-3)s:b|_2$<i>2<i>3]"
+ + "[(3-4)s:c|_3$<i>3<i>4]"
+ + "[(4-5)s:e|_4$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0]"
+ + "[(5-6)s:e|_5$<i>5<i>6]");
+ return fd;
+ }
+
+ private FieldDocument createFieldDoc7 () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-5");
+ fd.addTV("tokens", "beef",
+ "[(0-1)s:b|_0$<i>0<i>1|<>:s$<b>64<i>0<i>4<i>4<b>0]"
+ + "[(1-2)s:e|_1$<i>1<i>2]"
+ + "[(2-3)s:e|_2$<i>2<i>3]"
+ + "[(3-4)s:f|_3$<i>3<i>4]");
+ return fd;
+ }
+
public SpanQuery createQuery (String elementType, String x, String y,
int min, int max, boolean isOrdered) {
@@ -161,7 +186,7 @@
ki = new KrillIndex();
ki.addDoc(createFieldDoc4());
ki.commit();
-
+ // b~d~b/c~c~dd
SpanQuery sq;
sq = createQuery("s", "s:b", "s:c", 1, 1, true);
@@ -305,7 +330,7 @@
assertEquals(1, kr.getTotalResults()); // Is 1 correct or
// should it not be
// ordered?
- assertEquals("[[ec]]ebdc", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("e[[ce]]bdc", kr.getMatch(0).getSnippetBrackets());
}
@Test
@@ -328,7 +353,7 @@
new DistanceConstraint(e, 0, 0, true, false), true);
kr = ki.search(sdq, (short) 10);
- assertEquals(1, kr.getTotalResults());
+ assertEquals(2, kr.getTotalResults());
}
@Test
@@ -354,6 +379,8 @@
assertEquals(0, kr.getTotalResults());
}
+
+
@Test
public void testNoElementSpans () throws IOException {
ki = new KrillIndex();
@@ -374,5 +401,59 @@
new DistanceConstraint(e, 0, 0, true, false), true);
kr = ki.search(sdq, (short) 10);
+ assertEquals(1, kr.getTotalResults());
+ }
+
+ @Test
+ public void testNoMoreFirstSpanWithSpanOrQuery () throws IOException {
+ ki = new KrillIndex();
+ ki.addDoc(createFieldDoc3());
+ ki.addDoc(createFieldDoc5());
+ ki.commit();
+
+ // c or d
+ SpanOrQuery soq = new SpanOrQuery(
+ new SpanTermQuery(new Term("tokens", "s:c")),
+ new SpanTermQuery(new Term("tokens", "s:d")));
+
+ // b /s0 (c or d)
+ SpanElementQuery e = new SpanElementQuery("tokens", "s");
+ SpanDistanceQuery sdq = new SpanDistanceQuery(
+ new SpanTermQuery(new Term("tokens", "s:b")),
+ soq,
+ new DistanceConstraint(e, 0, 0, true, false), true);
+
+ kr = ki.search(sdq, (short) 10);
+ assertEquals(1, kr.getTotalResults());
+ }
+
+ @Test
+ public void testNoMoreSecondSpansOrQuery () throws IOException {
+ ki = new KrillIndex();
+ ki.addDoc(createFieldDoc6());
+ ki.addDoc(createFieldDoc5());
+ ki.addDoc(createFieldDoc7());
+ ki.commit();
+
+ // c or d
+ SpanOrQuery soq = new SpanOrQuery(
+ new SpanTermQuery(new Term("tokens", "s:c")),
+ new SpanTermQuery(new Term("tokens", "s:d")));
+
+ // b /s0(c or d)
+ SpanElementQuery e = new SpanElementQuery("tokens", "s");
+ SpanDistanceQuery sdq = new SpanDistanceQuery(
+ new SpanTermQuery(new Term("tokens", "s:b")),
+ soq,
+ new DistanceConstraint(e, 0, 0, true, false), true);
+
+ kr = ki.search(sdq, (short) 10);
+
+ assertEquals(2, kr.getTotalResults());
+
+// System.out.println(kr.getTotalResults());
+// for (Match m : kr.getMatches()) {
+// System.out.println(m.getSnippetBrackets());
+// }
}
}