Add testcases for token-distance-exclusion, fix some bugs
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java
index cbf6b45..02102bb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java
@@ -36,9 +36,8 @@
@Override
protected boolean advance() throws IOException {
- while(hasMoreSpans){
- if (hasMoreSecondSpans && forward())
- continue;
+ while(hasMoreSpans){
+ if (hasMoreSecondSpans) forwardSecondSpans();
if (findMatch()){
hasMoreSpans = firstSpans.next();
@@ -49,27 +48,27 @@
return false;
}
- private boolean forward() throws IOException{
+ private void forwardSecondSpans() throws IOException{
- if (secondSpans.doc() <= firstSpans.doc() &&
+ if (secondSpans.doc() < firstSpans.doc()){
+ hasMoreSecondSpans = secondSpans.skipTo(firstSpans.doc());
+ }
+
+ // skip the secondSpan to the right side of the firstspan
+ while (hasMoreSecondSpans && secondSpans.doc() == firstSpans.doc() &&
firstSpans.start() >= secondSpans.end()){
if (isOrdered){
hasMoreSecondSpans = secondSpans.next();
- return true;
}
-
- else {
- int actualDistance = calculateActualDistance();
- if (actualDistance > maxDistance){
- hasMoreSecondSpans = secondSpans.next();
- return true;
- }
+ else if (calculateActualDistance() > maxDistance){
+ hasMoreSecondSpans = secondSpans.next();
}
-
- }
-
- return false;
+ // the firstspan is within maxDistance
+ //if (!isOrdered && calculateActualDistance() <= maxDistance){
+ else { break; }
+
+ }
}
private int calculateActualDistance(){
@@ -81,7 +80,7 @@
}
private boolean findMatch() throws IOException {
- if (!hasMoreSecondSpans){
+ if (!hasMoreSecondSpans || secondSpans.doc() > firstSpans.doc()){
setMatchProperties();
return true;
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
index b822b34..7b0c0c1 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
@@ -1,5 +1,7 @@
package de.ids_mannheim.korap.index;
+import static org.junit.Assert.*;
+
import java.io.IOException;
import org.apache.lucene.index.Term;
@@ -10,35 +12,120 @@
import de.ids_mannheim.korap.KorapIndex;
import de.ids_mannheim.korap.KorapResult;
import de.ids_mannheim.korap.query.SpanDistanceQuery;
+import de.ids_mannheim.korap.query.SpanElementQuery;
public class TestDistanceExclusionIndex {
private KorapIndex ki;
private KorapResult kr;
+ /** Ordered, unordered
+ * */
@Test
public void testCase1() throws IOException{
ki = new KorapIndex();
ki.addDoc(createFieldDoc0());
+ ki.commit();
+ SpanQuery sq;
+ // ---- Distance 0 to 1
+ sq = createQuery("s:c","s:e",0,1,true);
+ kr = ki.search(sq, (short) 10);
+ assertEquals(3, kr.getTotalResults());
+ assertEquals(2, kr.match(0).getStartPos());
+ assertEquals(3, kr.match(0).getEndPos());
+ assertEquals(3, kr.match(1).getStartPos());
+ assertEquals(4, kr.match(1).getEndPos());
+ assertEquals(5, kr.match(2).getStartPos());
+ assertEquals(6, kr.match(2).getEndPos());
+
+ // Unordered
+ sq = createQuery("s:c","s:e",0,1,false);
+ kr = ki.search(sq, (short) 10);
+ assertEquals(2, kr.getTotalResults());
+ }
+
+ /** Multiple docs, unordered
+ * No more secondSpans
+ * */
+ @Test
+ public void testCase2() throws IOException{
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc0());
ki.addDoc(createFieldDoc1());
ki.commit();
SpanQuery sq;
// ---- Distance 0 to 1
sq = createQuery("s:c","s:e",0,1,false);
- kr = ki.search(sq, (short) 10);
+ kr = ki.search(sq, (short) 10);
+ assertEquals(5, kr.getTotalResults());
+ assertEquals(1, kr.match(3).getLocalDocID());
+ }
+
+ /** Secondspans' document number is bigger than firstspans'
+ * Actual distance is smaller than min distance.
+ * */
+ @Test
+ public void testCase3() throws IOException{
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc1());
+ ki.addDoc(createFieldDoc0());
+ ki.commit();
-// System.out.println(sq.toString("base"));
-
-// System.out.print(kr.getTotalResults()+"\n");
-// for (int i=0; i< kr.getTotalResults(); i++){
-// System.out.println(
-// kr.match(i).getLocalDocID()+" "+
-// kr.match(i).startPos + " " +
-// kr.match(i).endPos
-// );
-// }
+ SpanQuery sq;
+ // Unordered
+ sq = createQuery("s:c","s:e",2,2,false);
+ kr = ki.search(sq, (short) 10);
+ assertEquals(5, kr.getTotalResults());
+ }
+
+ /** Unordered: firstspan in on the right side of the secondspan,
+ * but within max distance.
+ * */
+ @Test
+ public void testCase4() throws IOException{
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ SpanQuery sq;
+ // Unordered
+ sq = createQuery("s:b","s:c",2,2,false);
+ kr = ki.search(sq, (short) 10);
+ assertEquals(1, kr.getTotalResults());
+ assertEquals(1, kr.match(0).getStartPos());
+ assertEquals(2, kr.match(0).getEndPos());
}
-
+
+ /** Element queries
+ * */
+ @Test
+ public void testCase5() throws IOException{
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc0());
+ ki.commit();
+
+ SpanDistanceQuery sq;
+ sq = new SpanDistanceQuery(
+ new SpanElementQuery("base", "x"),
+ new SpanElementQuery("base", "y")
+ ,0,1,false,true);
+ sq.setExclusion(true);
+
+ kr = ki.search(sq, (short) 10);
+ assertEquals(1, kr.getTotalResults());
+ assertEquals(9, kr.match(0).getStartPos());
+ assertEquals(10, kr.match(0).getEndPos());
+
+ System.out.print(kr.getTotalResults()+"\n");
+ for (int i=0; i< kr.getTotalResults(); i++){
+ System.out.println(
+ kr.match(i).getLocalDocID()+" "+
+ kr.match(i).startPos + " " +
+ kr.match(i).endPos
+ );
+ }
+ }
+
private SpanQuery createQuery(String x, String y, int min, int max, boolean isOrdered){
SpanDistanceQuery sq = new SpanDistanceQuery(
new SpanTermQuery(new Term("base",x)),
@@ -52,6 +139,7 @@
return sq;
}
+
private FieldDocument createFieldDoc0(){
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-0");
@@ -65,8 +153,8 @@
"[(5-6)s:c|_6#5-6|<>:y#5-8$<i>8]" +
"[(6-7)s:d|_7#6-7]" +
"[(7-8)s:e|_8#7-8|<>:x#7-9$<i>9]" +
- "[(8-9)s:e|_9#8-9|<>:x#8-10$<i>10]" +
- "[(9-10)s:d|_10#9-10]");
+ "[(8-9)s:e|_9#8-9]" +
+ "[(9-10)s:d|_10#9-10|<>:x#9-10$<i>10]");
return fd;
}
@@ -83,4 +171,23 @@
"[(5-6)s:d|_6#5-6]");
return fd;
}
+
+ private FieldDocument createFieldDoc2() {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-2");
+ fd.addTV("base",
+ "text",
+ "[(0-1)s:b|_1#0-1]" +
+ "[(1-2)s:b|_2#1-2]" +
+ "[(2-3)s:c|_3#2-3]" +
+ "[(3-4)s:c|_4#3-4]" +
+ "[(4-5)s:b|_5#4-5]" +
+ "[(5-6)s:d|_6#5-6]" +
+ "[(6-7)s:b|_7#6-7]" +
+ "[(7-8)s:d|_8#7-8]" +
+ "[(8-9)s:c|_9#8-9]" +
+ "[(9-10)s:d|_10#9-10]");
+ return fd;
+ }
+
}