Fixed #53 - element distance query bug.
Change-Id: I7954bf696ced43da382123cbef698574b21ec4e7
diff --git a/Changes b/Changes
index b0aab67..a17eeca 100644
--- a/Changes
+++ b/Changes
@@ -11,6 +11,7 @@
- [feature] Instead of adding, the Indexer now upserts documents
to avoid multiple documents with the same text sigle
(diewald)
+ - [bugfix] Fixed #53 element distance query bug (margaretha)
0.58.4 2019-02-05
- [cleanup] Remove deprecated methods setLicense/getLicense,
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
index 06227e2..4db9212 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
@@ -8,6 +8,8 @@
import java.util.List;
import java.util.Map;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@@ -32,7 +34,6 @@
private long matchCost;
protected int currentDocNum;
-
/**
* Constructs UnorderedDistanceSpans for the given
* {@link SpanDistanceQuery} .
@@ -61,7 +62,6 @@
hasMoreSpans = hasMoreFirstSpans && hasMoreSecondSpans;
}
-
@Override
protected boolean advance () throws IOException {
while (hasMoreSpans || !matchList.isEmpty()) {
@@ -69,13 +69,11 @@
setMatchProperties();
return true;
}
- if (prepareLists())
- setMatchList();
+ if (prepareLists()) setMatchList();
}
return false;
}
-
/**
* Updates the firstSpanList and secondSpanList by adding the next
* possible first and second spans. Both the spans must be in the
@@ -90,7 +88,6 @@
*/
protected abstract boolean prepareLists () throws IOException;
-
/**
* Sets the list of matches for the span having the smallest
* position (i.e. between the first and the second spans), and its
@@ -106,16 +103,16 @@
hasMoreFirstSpans, secondSpanList);
hasMoreSecondSpans = setCandidateList(secondSpanList, secondSpans,
hasMoreSecondSpans, firstSpanList);
- // System.out.println("--------------------");
- // System.out.println("firstSpanList:");
- // for (CandidateSpan cs : firstSpanList) {
- // System.out.println(cs.getStart() + " " + cs.getEnd());
- // }
- //
- // System.out.println("secondSpanList:");
- // for (CandidateSpan cs : secondSpanList) {
- // System.out.println(cs.getStart() + " " + cs.getEnd());
- // }
+ // System.out.println("--------------------");
+ // System.out.println("firstSpanList:");
+ // for (CandidateSpan cs : firstSpanList) {
+ // System.out.println(cs.getStart() + " " + cs.getEnd());
+ // }
+ //
+ // System.out.println("secondSpanList:");
+ // for (CandidateSpan cs : secondSpanList) {
+ // System.out.println(cs.getStart() + " " + cs.getEnd());
+ // }
CandidateSpan currentFirstSpan, currentSecondSpan;
if (!firstSpanList.isEmpty() && !secondSpanList.isEmpty()) {
@@ -126,26 +123,12 @@
if (currentFirstSpan.getStart() < currentSecondSpan.getStart()
|| isLastCandidateSmaller(currentFirstSpan,
currentSecondSpan)) {
-// log.trace("current target: " + firstSpanList.get(0).getStart()
-// + " " + firstSpanList.get(0).getEnd());
-// System.out.println("candidates:");
-// for (CandidateSpan cs : secondSpanList) {
-// System.out.println(cs.getStart() + " " + cs.getEnd());
-// }
-
matchList = findMatches(currentFirstSpan, secondSpanList, true);
updateList(firstSpanList);
}
else {
-// log.trace("current target: " + secondSpanList.get(0).getStart()
-// + " " + secondSpanList.get(0).getEnd());
-// System.out.println("candidates:");
-// for (CandidateSpan cs : firstSpanList) {
-// System.out.println(cs.getStart() + " " + cs.getEnd());
-// }
-
- matchList = findMatches(currentSecondSpan, firstSpanList,
- false);
+ matchList =
+ findMatches(currentSecondSpan, firstSpanList, false);
updateList(secondSpanList);
if (currentFirstSpan.getStart() == currentSecondSpan.getStart()
@@ -158,21 +141,14 @@
}
}
}
- else if (firstSpanList.isEmpty()) {
- // log.trace("current target: " + secondSpanList.get(0).getStart()
- // + " " + secondSpanList.get(0).getEnd());
- // log.trace("candidates: empty");
+ else if (!secondSpanList.isEmpty()) {
updateList(secondSpanList);
}
- else {
- // log.trace("current target: " + firstSpanList.get(0).getStart()
- // + " " + firstSpanList.get(0).getEnd());
- // log.trace("candidates: empty");
+ else if (!firstSpanList.isEmpty()) {
updateList(firstSpanList);
}
}
-
/**
* Tells if the last candidate from the secondSpanList has a
* smaller end position than the end position of the the last
@@ -189,8 +165,8 @@
private boolean isLastCandidateSmaller (CandidateSpan currentFirstSpan,
CandidateSpan currentSecondSpan) {
if (currentFirstSpan.getEnd() == currentSecondSpan.getEnd()) {
- int secondEnd = secondSpanList.get(secondSpanList.size() - 1)
- .getEnd();
+ int secondEnd =
+ secondSpanList.get(secondSpanList.size() - 1).getEnd();
int firstEnd = firstSpanList.get(firstSpanList.size() - 1).getEnd();
return (secondEnd < firstEnd ? true : false);
}
@@ -198,7 +174,6 @@
return false;
}
-
/**
* Performs an update based on the given candidateList. In
* {@link UnorderedTokenDistanceSpans}, the first candidate in the
@@ -211,7 +186,6 @@
*/
protected abstract void updateList (List<CandidateSpan> candidateList);
-
/**
* Sets the candidate list for the first element in the target
* list and
@@ -236,7 +210,6 @@
boolean hasMoreCandidates, List<CandidateSpan> targetList)
throws IOException;
-
/**
* Finds all matches between the target span and its candidates in
* the
@@ -254,7 +227,6 @@
protected abstract List<CandidateSpan> findMatches (CandidateSpan target,
List<CandidateSpan> candidateList, boolean isTargetFirstSpan);
-
/**
* Computes match properties and creates a candidate span match to
* be added to the match list.
@@ -262,7 +234,8 @@
* @return a candidate span match
*/
protected CandidateSpan createMatchCandidate (CandidateSpan target,
- CandidateSpan cs, boolean isDistanceZero, boolean isTargetFirstSpan) {
+ CandidateSpan cs, boolean isDistanceZero,
+ boolean isTargetFirstSpan) {
int start = Math.min(target.getStart(), cs.getStart());
int end = Math.max(target.getEnd(), cs.getEnd());
@@ -278,8 +251,8 @@
payloads.addAll(cs.getPayloads());
}
}
- CandidateSpan match = new CandidateSpan(start, end, doc, cost,
- payloads);
+ CandidateSpan match =
+ new CandidateSpan(start, end, doc, cost, payloads);
if (isTargetFirstSpan) {
match.setChildSpan(target);
match.setSecondChildSpan(cs);
@@ -288,11 +261,10 @@
match.setChildSpan(cs);
match.setSecondChildSpan(target);
}
- //match.setChildSpan(cs);
+ // match.setChildSpan(cs);
return match;
}
-
/**
* Assigns the first candidate span in the match list as the
* current span match, and removes it from the matchList.
@@ -311,13 +283,14 @@
// log.trace("Match doc#={} start={} end={}", matchDocNumber,
// matchStartPosition, matchEndPosition);
- // log.trace("firstspan " + getMatchFirstSpan().getStart() + " "
+ // log.trace("firstspan " + getMatchFirstSpan().getStart() + "
+ // "
// + getMatchFirstSpan().getEnd());
- // log.trace("secondspan " + getMatchSecondSpan().getStart() + " "
+ // log.trace("secondspan " + getMatchSecondSpan().getStart() +
+ // " "
// + getMatchSecondSpan().getEnd());
}
-
@Override
public boolean skipTo (int target) throws IOException {
if (hasMoreSpans && (secondSpans.doc() < target)) {
@@ -334,7 +307,6 @@
return advance();
}
-
@Override
public long cost () {
return matchCost;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
index 388e97b..f111c29 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
@@ -6,6 +6,8 @@
import java.util.List;
import java.util.Map;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@@ -33,6 +35,8 @@
// target span
private List<CandidateSpan> elementList;
+ private Logger log = LogManager.getLogger(UnorderedElementDistanceSpans.class);
+ boolean DEBUG = false;
/**
* Constructs UnorderedElementDistanceSpans for the given
@@ -75,6 +79,11 @@
hasMoreFirstSpans);
hasMoreSecondSpans = addSpan(secondSpans, secondSpanList,
hasMoreSecondSpans);
+
+ if (DEBUG){
+ log.debug("prepare firstSpanList: " +firstSpanList.size());
+ log.debug("prepare secondSpanList: " +secondSpanList.size());
+ }
}
else {
hasMoreSpans = false;
@@ -263,6 +272,7 @@
@Override
protected void updateList (List<CandidateSpan> candidateList) {
+// if (DEBUG) log.debug("candidate list size: " +candidateList.size());
updateElementList(candidateList.get(0).getPosition());
candidateList.remove(0);
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestUnorderedElementDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestUnorderedElementDistanceIndex.java
index c625818..6f3a2a5 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestUnorderedElementDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestUnorderedElementDistanceIndex.java
@@ -17,6 +17,7 @@
import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanNextQuery;
import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.QueryException;
@RunWith(JUnit4.class)
public class TestUnorderedElementDistanceIndex {
@@ -93,7 +94,7 @@
private FieldDocument createFieldDoc5 () {
FieldDocument fd = new FieldDocument();
- fd.addString("ID", "doc-2");
+ fd.addString("ID", "doc-5");
fd.addTV("base", "text",
"[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0|<>:p$<b>64<i>0<i>4<i>4<b>0]"
+ "[(1-2)s:b|s:e|_2$<i>1<i>2]"
@@ -104,6 +105,20 @@
+ "[(6-7)s:b|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0|<>:p$<b>64<i>6<i>7<i>7<b>0]");
return fd;
}
+
+ private FieldDocument createFieldDoc6 () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-6");
+ fd.addTV("base", "text",
+ "[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0]"
+ + "[(1-2)s:b|s:e|_2$<i>1<i>2]"
+ + "[(2-3)s:e|_3$<i>2<i>3]"
+ + "[(3-4)s:b|s:c|_4$<i>3<i>4]"
+ + "[(4-5)s:a|_5$<i>4<i>5]"
+ + "[(5-6)s:a|_6$<i>5<i>6]"
+ + "[(6-7)s:b|_7$<i>6<i>7]");
+ return fd;
+ }
public SpanQuery createQuery (String elementType, String x, String y,
int minDistance, int maxDistance, boolean isOrdered) {
@@ -297,4 +312,21 @@
assertEquals(4, kr.getMatch(7).startPos);
assertEquals(7, kr.getMatch(7).endPos);
}
+
+
+ /** Both candidate lists may be empty because the spans are not in an element span.
+ *
+ * @throws IOException
+ * @throws QueryException
+ */
+ @Test
+ public void testBothCandidateListEmptyBug () throws IOException, QueryException {
+ ki = new KrillIndex();
+ ki.addDoc(createFieldDoc6());
+ ki.commit();
+
+ SpanQuery sq = createQuery("s", "s:a", "s:c", 1, 2, false);
+ kr = ki.search(sq, (short) 10);
+ assertEquals(0, kr.getMatches().size());
+ }
}