Fix handling invalid secondspans for ElementDistanceSpan when a second span is not within an element.
Add testcases for DistanceSpan for two identical tokens.
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpan.java
index ca643a9..206702b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpan.java
@@ -24,7 +24,7 @@
public abstract class DistanceSpan extends SimpleSpans{
protected boolean hasMoreFirstSpans;
- protected boolean collectPayloads;
+ protected boolean collectPayloads;
protected int minDistance,maxDistance;
protected List<CandidateSpan> candidateList;
@@ -67,17 +67,27 @@
if (findMatch())
return true;
}
- // Forward secondspan
- if (hasMoreSpans = secondSpans.next())
+
+ do { // Forward secondspan
+ hasMoreSpans = secondSpans.next();
setCandidateList();
+ }
+ while (hasMoreSpans && !isSecondSpanValid());
}
return false;
}
+ /** Determine if the current second span is valid. It is always valid in
+ * TokenDistanceSpan, but it can be invalid in the ElementDistanceSpan,
+ * namely when it is not within a particular element (a sentence or a
+ * paragraph depends on the element distance unit).
+ *
+ * */
+ protected abstract boolean isSecondSpanValid() throws IOException;
+
/** Collect all possible firstspan instances as candidate spans for
* the current secondspan. The candidate spans are within the max
* distance from the current secondspan.
- *
* */
protected abstract void setCandidateList() throws IOException;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java
index 389e989..c097819 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java
@@ -65,18 +65,18 @@
protected void setCandidateList() throws IOException{
if (candidateListDocNum == elements.doc() &&
candidateListDocNum == secondSpans.doc()){
- addNewCandidates();
- candidateListIndex = -1;
+ candidateListIndex = -1;
+ addNewCandidates();
}
else {
candidateList.clear();
if (hasMoreFirstSpans && findSameDoc()){
candidateListDocNum = firstSpans.doc();
elementPosition=0;
- addNewCandidates();
- candidateListIndex = -1;
+ candidateListIndex = -1;
+ addNewCandidates();
}
- }
+ }
}
/** Add new possible candidates. Candidates must be in an element
@@ -93,13 +93,17 @@
}
hasMoreFirstSpans = firstSpans.next();
}
-
+ }
+
+ @Override
+ protected boolean isSecondSpanValid() throws IOException{
if (advanceElementTo(secondSpans)){
secondSpanPostion = elementPosition;
filterCandidateList(secondSpanPostion);
+ return true;
}
// second span is not in an element
- else { candidateList.clear(); }
+ return false;
}
/** Advance elements until encountering a span.
@@ -113,8 +117,8 @@
if (span.start() >= elements.start() &&
span.end() <= elements.end()){
return true;
- }
-
+ }
+
hasMoreElements = elements.next();
elementPosition++;
}