Fix handling invalid secondspans for ElementDistanceSpan when a second span is not within an element.
Add testcases for DistanceSpan for two identical tokens.
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpan.java
index ca643a9..206702b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpan.java
@@ -24,7 +24,7 @@
public abstract class DistanceSpan extends SimpleSpans{
protected boolean hasMoreFirstSpans;
- protected boolean collectPayloads;
+ protected boolean collectPayloads;
protected int minDistance,maxDistance;
protected List<CandidateSpan> candidateList;
@@ -67,17 +67,27 @@
if (findMatch())
return true;
}
- // Forward secondspan
- if (hasMoreSpans = secondSpans.next())
+
+ do { // Forward secondspan
+ hasMoreSpans = secondSpans.next();
setCandidateList();
+ }
+ while (hasMoreSpans && !isSecondSpanValid());
}
return false;
}
+ /** Determine if the current second span is valid. It is always valid in
+ * TokenDistanceSpan, but it can be invalid in the ElementDistanceSpan,
+ * namely when it is not within a particular element (a sentence or a
+ * paragraph depends on the element distance unit).
+ *
+ * */
+ protected abstract boolean isSecondSpanValid() throws IOException;
+
/** Collect all possible firstspan instances as candidate spans for
* the current secondspan. The candidate spans are within the max
* distance from the current secondspan.
- *
* */
protected abstract void setCandidateList() throws IOException;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java
index 389e989..c097819 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java
@@ -65,18 +65,18 @@
protected void setCandidateList() throws IOException{
if (candidateListDocNum == elements.doc() &&
candidateListDocNum == secondSpans.doc()){
- addNewCandidates();
- candidateListIndex = -1;
+ candidateListIndex = -1;
+ addNewCandidates();
}
else {
candidateList.clear();
if (hasMoreFirstSpans && findSameDoc()){
candidateListDocNum = firstSpans.doc();
elementPosition=0;
- addNewCandidates();
- candidateListIndex = -1;
+ candidateListIndex = -1;
+ addNewCandidates();
}
- }
+ }
}
/** Add new possible candidates. Candidates must be in an element
@@ -93,13 +93,17 @@
}
hasMoreFirstSpans = firstSpans.next();
}
-
+ }
+
+ @Override
+ protected boolean isSecondSpanValid() throws IOException{
if (advanceElementTo(secondSpans)){
secondSpanPostion = elementPosition;
filterCandidateList(secondSpanPostion);
+ return true;
}
// second span is not in an element
- else { candidateList.clear(); }
+ return false;
}
/** Advance elements until encountering a span.
@@ -113,8 +117,8 @@
if (span.start() >= elements.start() &&
span.end() <= elements.end()){
return true;
- }
-
+ }
+
hasMoreElements = elements.next();
elementPosition++;
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
index 8cdd7ef..90b1d8a 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
@@ -162,15 +162,26 @@
assertEquals(2, kr.match(0).startPos);
assertEquals(5, kr.match(0).endPos);
-// System.out.print(kr.getTotalResults()+"\n");
-// for (int i=0; i< kr.getTotalResults(); i++){
-// System.out.println(
-// kr.match(i).getLocalDocID()+" "+
-// kr.match(i).startPos + " " +
-// kr.match(i).endPos
-// );
-// }
-
}
+ /** Same tokens in different elements */
+ @Test
+ public void testCase4() throws IOException{
+ //System.out.println("testCase4");
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc0());
+ ki.commit();
+
+ SpanQuery sq;
+ sq = createQuery("s", "s:b", "s:b", 1, 2,true);
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals(2, kr.totalResults());
+ assertEquals(0, kr.match(0).startPos);
+ assertEquals(4, kr.match(0).endPos);
+ assertEquals(3, kr.match(1).startPos);
+ assertEquals(5, kr.match(1).endPos);
+
+ }
+
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestElementIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestElementIndex.java
index 07bafb8..ac84b6a 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestElementIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestElementIndex.java
@@ -1,3 +1,5 @@
+package de.ids_mannheim.korap.index;
+
import java.util.*;
import java.io.*;