Element distance span without gaps.
Test cases.
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java
index 11ef837..d51a70d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpan.java
@@ -12,16 +12,15 @@
import de.ids_mannheim.korap.query.SpanDistanceQuery;
-/** Enumeration of element-based distance span matches.
- * The distance unit is an element position, including a gap.
- * For example:
- * X <p></p>
- * Z
- * <p></p> Y.
- * X and Y has a distance of 4.
+/** Span enumeration of element-based distance span matches.
+ * Each match consists of two child spans. The element-distance between
+ * the child spans is the difference between the element position numbers
+ * where the child spans are. The element-distance unit can be a sentence
+ * or a paragraph. All other child spans occurrence which are not in
+ * a sentence or a paragraph (with respect to the element distance type
+ * current used) are ignored.
*
- * @author margaretha
- * @deprecated
+ * @author margaretha
* */
public class ElementDistanceSpan extends DistanceSpan {
@@ -29,9 +28,6 @@
private Spans elements;
private int elementPosition, secondSpanPostion;
- private boolean isGap; // refers to the gap right before the current element
- private int gapPosition;
-
public ElementDistanceSpan(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts)
@@ -83,87 +79,51 @@
}
}
- /** Add new possible candidates
+ /** Add new possible candidates. Candidates must be in an element
+ * and not too far from the secondspan.
* */
- private void addNewCandidates() throws IOException{
- int position;
+ private void addNewCandidates() throws IOException{
while ( hasMoreFirstSpans &&
firstSpans.doc() == candidateListDocNum &&
firstSpans.start() < secondSpans.end()){
- advanceElementTo(firstSpans);
- position = findPosition(firstSpans);
- candidateList.add(new CandidateSpan(firstSpans,position));
- filterCandidateList(position);
+ if (advanceElementTo(firstSpans)){
+ candidateList.add(new CandidateSpan(firstSpans,elementPosition));
+ filterCandidateList(elementPosition);
+ }
hasMoreFirstSpans = firstSpans.next();
}
- secondSpanPostion = findSecondSpanPosition();
- filterCandidateList(secondSpanPostion);
+ if (advanceElementTo(secondSpans)){
+ secondSpanPostion = elementPosition;
+ filterCandidateList(secondSpanPostion);
+ }
+ // second span is not in an element
+ else { candidateList.clear(); }
}
- /** Advance elements until encountering the span.
- * Add the elementPosition while searching.
- * Add gap positions too.
+ /** Advance elements until encountering a span.
+ * @return true iff an element containing the span, is found.
*/
- private void advanceElementTo(Spans span) throws IOException{
- isGap = false;
- int prevElementEnd = elements.end();
+ private boolean advanceElementTo(Spans span) throws IOException{
while (hasMoreElements &&
elements.doc() == candidateListDocNum &&
- elements.end() <= span.start()){
+ elements.start() < span.end()){
- // Find a gap
- if (prevElementEnd < elements.start()){
- elementPosition++;
+ if (span.start() >= elements.start() &&
+ span.end() <= elements.end()){
+ return true;
}
-
- prevElementEnd = elements.end();
+
hasMoreElements = elements.next();
elementPosition++;
}
-
- // Find the last gap between the prevElement and current element
- if (hasMoreElements &&
- elements.doc() == candidateListDocNum &&
- prevElementEnd < elements.start()){
- isGap = true;
- gapPosition = elementPosition;
- elementPosition++;
- }
-
+ return false;
}
-
- /** Find a span position which can be
- * in the current element,
- * in the gap between the previous and current element,
- * in the very last gap at the end of a doc.
- * @return the span position
+
+ /** Reduce the number of candidates by removing all candidates that are
+ * not within a max distance from the given element position.
* */
- private int findPosition(Spans span){
- int position = elementPosition;
-
- // in the gap
- if (isGap && span.end() <= elements.start())
- position = gapPosition;
-
- return position;
- }
-
-
- private int findSecondSpanPosition() throws IOException{
- // in the gap
- if (isGap && secondSpans.end() <= elements.start()){
- return gapPosition;
- }
- // in current element
- else if (secondSpans.end() <= elements.end()){
- return elementPosition;
- }
- advanceElementTo(secondSpans);
- return findPosition(secondSpans);
- }
-
private void filterCandidateList(int position){
Iterator<CandidateSpan> i = candidateList.iterator();
CandidateSpan cs;
@@ -175,6 +135,7 @@
}
i.remove();
}
+
//System.out.println("pos "+position+" " +candidateList.size());
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceSpan.java b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceSpan.java
new file mode 100644
index 0000000..366db87
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceSpan.java
@@ -0,0 +1,138 @@
+package de.ids_mannheim.korap.index;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanDistanceQuery;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+
+@RunWith(JUnit4.class)
+public class TestElementDistanceSpan {
+
+ KorapResult kr;
+ KorapIndex ki;
+
+ private FieldDocument createFieldDoc0() {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-0");
+ fd.addTV("base",
+ "text",
+ "[(0-1)s:b|s:c|_1#0-1|<>:s#0-2$<i>1]" +
+ "[(1-2)s:b|_2#1-2]" +
+ "[(2-3)s:c|_3#2-3|<>:s#2-3$<i>3]" +
+ "[(3-4)s:b|_4#3-4|<>:s#3-4$<i>4]" +
+ "[(4-5)s:b|_5#4-5|<>:s#4-5$<i>5]" +
+ "[(5-6)s:b|_6#5-6]" +
+ "[(6-7)s:c|_7#6-7]");
+ return fd;
+ }
+
+
+ private FieldDocument createFieldDoc1() {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV("base",
+ "text",
+ "[(0-1)s:e|_1#0-1|<>:s#0-2$<i>1]" +
+ "[(1-2)s:c|s:b|_2#1-2|<>:s#1-2$<i>2]" +
+ "[(2-3)s:e|_3#2-3|<>:s#2-3$<i>3]" +
+ "[(3-4)s:b|_4#3-4|<>:s#3-4$<i>4]" +
+ "[(4-5)s:d|_5#4-5|<>:s#4-5$<i>5]" +
+ "[(5-6)s:c|_6#5-6|<>:s#5-6$<i>6]");
+ return fd;
+ }
+
+ private FieldDocument createFieldDoc2() {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-0");
+ fd.addTV("base",
+ "text",
+ "[(0-1)s:b|_1#0-1|<>:p#0-2$<i>1]" +
+ "[(1-2)s:b|_2#1-2]" +
+ "[(2-3)s:b|_3#2-3|<>:p#2-3$<i>3]" +
+ "[(3-4)s:d|_4#3-4|<>:p#3-4$<i>4]" +
+ "[(4-5)s:d|_5#4-5|<>:p#4-5$<i>5]" +
+ "[(5-6)s:d|_6#5-6]");
+ return fd;
+ }
+
+ public SpanQuery createQuery(String elementType, String x, String y,
+ int minDistance, int maxDistance){
+ return new SpanDistanceQuery(
+ new SpanElementQuery("base", elementType),
+ new SpanTermQuery(new Term("base",x)),
+ new SpanTermQuery(new Term("base",y)),
+ minDistance,
+ maxDistance,
+ true);
+ }
+
+
+ /** Multiple documents
+ * Ensure terms and elements are in the same doc
+ * Ensure terms are in elements
+ * Check filter candidate list
+ * */
+ @Test
+ public void testCase1() throws IOException{
+ System.out.println("testCase1");
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc0());
+ ki.addDoc(createFieldDoc1());
+ ki.commit();
+
+ SpanQuery sq;
+ sq = createQuery("s", "s:b", "s:c", 1, 2);
+ kr = ki.search(sq, (short) 10);
+
+// assertEquals(4, kr.totalResults());
+// assertEquals(0, kr.match(0).startPos);
+// assertEquals(1, kr.match(0).endPos);
+// assertEquals(0, kr.match(1).startPos);
+// assertEquals(3, kr.match(1).endPos);
+
+// System.out.print(kr.getTotalResults()+"\n");
+// for (int i=0; i< kr.getTotalResults(); i++){
+// System.out.println(
+// kr.match(i).getLocalDocID()+" "+
+// kr.match(i).startPos + " " +
+// kr.match(i).endPos
+// );
+// }
+
+ }
+
+ /** Ensure terms and elements are in the same doc
+ * */
+ @Test
+ public void testCase2() throws IOException{
+ System.out.println("testCase2");
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc0());
+ ki.addDoc(createFieldDoc1());
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ SpanQuery sq;
+ sq = createQuery("p", "s:b", "s:d", 1, 1);
+ kr = ki.search(sq, (short) 10);
+
+// assertEquals(1, kr.totalResults());
+// assertEquals(2, kr.match(0).getLocalDocID());
+// assertEquals(2, kr.match(0).startPos);
+// assertEquals(4, kr.match(0).endPos);
+
+ }
+
+
+}