Fixed #53 - element distance query bug.
Change-Id: I7954bf696ced43da382123cbef698574b21ec4e7
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
index 06227e2..4db9212 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
@@ -8,6 +8,8 @@
import java.util.List;
import java.util.Map;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@@ -32,7 +34,6 @@
private long matchCost;
protected int currentDocNum;
-
/**
* Constructs UnorderedDistanceSpans for the given
* {@link SpanDistanceQuery} .
@@ -61,7 +62,6 @@
hasMoreSpans = hasMoreFirstSpans && hasMoreSecondSpans;
}
-
@Override
protected boolean advance () throws IOException {
while (hasMoreSpans || !matchList.isEmpty()) {
@@ -69,13 +69,11 @@
setMatchProperties();
return true;
}
- if (prepareLists())
- setMatchList();
+ if (prepareLists()) setMatchList();
}
return false;
}
-
/**
* Updates the firstSpanList and secondSpanList by adding the next
* possible first and second spans. Both the spans must be in the
@@ -90,7 +88,6 @@
*/
protected abstract boolean prepareLists () throws IOException;
-
/**
* Sets the list of matches for the span having the smallest
* position (i.e. between the first and the second spans), and its
@@ -106,16 +103,16 @@
hasMoreFirstSpans, secondSpanList);
hasMoreSecondSpans = setCandidateList(secondSpanList, secondSpans,
hasMoreSecondSpans, firstSpanList);
- // System.out.println("--------------------");
- // System.out.println("firstSpanList:");
- // for (CandidateSpan cs : firstSpanList) {
- // System.out.println(cs.getStart() + " " + cs.getEnd());
- // }
- //
- // System.out.println("secondSpanList:");
- // for (CandidateSpan cs : secondSpanList) {
- // System.out.println(cs.getStart() + " " + cs.getEnd());
- // }
+ // System.out.println("--------------------");
+ // System.out.println("firstSpanList:");
+ // for (CandidateSpan cs : firstSpanList) {
+ // System.out.println(cs.getStart() + " " + cs.getEnd());
+ // }
+ //
+ // System.out.println("secondSpanList:");
+ // for (CandidateSpan cs : secondSpanList) {
+ // System.out.println(cs.getStart() + " " + cs.getEnd());
+ // }
CandidateSpan currentFirstSpan, currentSecondSpan;
if (!firstSpanList.isEmpty() && !secondSpanList.isEmpty()) {
@@ -126,26 +123,12 @@
if (currentFirstSpan.getStart() < currentSecondSpan.getStart()
|| isLastCandidateSmaller(currentFirstSpan,
currentSecondSpan)) {
-// log.trace("current target: " + firstSpanList.get(0).getStart()
-// + " " + firstSpanList.get(0).getEnd());
-// System.out.println("candidates:");
-// for (CandidateSpan cs : secondSpanList) {
-// System.out.println(cs.getStart() + " " + cs.getEnd());
-// }
-
matchList = findMatches(currentFirstSpan, secondSpanList, true);
updateList(firstSpanList);
}
else {
-// log.trace("current target: " + secondSpanList.get(0).getStart()
-// + " " + secondSpanList.get(0).getEnd());
-// System.out.println("candidates:");
-// for (CandidateSpan cs : firstSpanList) {
-// System.out.println(cs.getStart() + " " + cs.getEnd());
-// }
-
- matchList = findMatches(currentSecondSpan, firstSpanList,
- false);
+ matchList =
+ findMatches(currentSecondSpan, firstSpanList, false);
updateList(secondSpanList);
if (currentFirstSpan.getStart() == currentSecondSpan.getStart()
@@ -158,21 +141,14 @@
}
}
}
- else if (firstSpanList.isEmpty()) {
- // log.trace("current target: " + secondSpanList.get(0).getStart()
- // + " " + secondSpanList.get(0).getEnd());
- // log.trace("candidates: empty");
+ else if (!secondSpanList.isEmpty()) {
updateList(secondSpanList);
}
- else {
- // log.trace("current target: " + firstSpanList.get(0).getStart()
- // + " " + firstSpanList.get(0).getEnd());
- // log.trace("candidates: empty");
+ else if (!firstSpanList.isEmpty()) {
updateList(firstSpanList);
}
}
-
/**
* Tells if the last candidate from the secondSpanList has a
* smaller end position than the end position of the the last
@@ -189,8 +165,8 @@
private boolean isLastCandidateSmaller (CandidateSpan currentFirstSpan,
CandidateSpan currentSecondSpan) {
if (currentFirstSpan.getEnd() == currentSecondSpan.getEnd()) {
- int secondEnd = secondSpanList.get(secondSpanList.size() - 1)
- .getEnd();
+ int secondEnd =
+ secondSpanList.get(secondSpanList.size() - 1).getEnd();
int firstEnd = firstSpanList.get(firstSpanList.size() - 1).getEnd();
return (secondEnd < firstEnd ? true : false);
}
@@ -198,7 +174,6 @@
return false;
}
-
/**
* Performs an update based on the given candidateList. In
* {@link UnorderedTokenDistanceSpans}, the first candidate in the
@@ -211,7 +186,6 @@
*/
protected abstract void updateList (List<CandidateSpan> candidateList);
-
/**
* Sets the candidate list for the first element in the target
* list and
@@ -236,7 +210,6 @@
boolean hasMoreCandidates, List<CandidateSpan> targetList)
throws IOException;
-
/**
* Finds all matches between the target span and its candidates in
* the
@@ -254,7 +227,6 @@
protected abstract List<CandidateSpan> findMatches (CandidateSpan target,
List<CandidateSpan> candidateList, boolean isTargetFirstSpan);
-
/**
* Computes match properties and creates a candidate span match to
* be added to the match list.
@@ -262,7 +234,8 @@
* @return a candidate span match
*/
protected CandidateSpan createMatchCandidate (CandidateSpan target,
- CandidateSpan cs, boolean isDistanceZero, boolean isTargetFirstSpan) {
+ CandidateSpan cs, boolean isDistanceZero,
+ boolean isTargetFirstSpan) {
int start = Math.min(target.getStart(), cs.getStart());
int end = Math.max(target.getEnd(), cs.getEnd());
@@ -278,8 +251,8 @@
payloads.addAll(cs.getPayloads());
}
}
- CandidateSpan match = new CandidateSpan(start, end, doc, cost,
- payloads);
+ CandidateSpan match =
+ new CandidateSpan(start, end, doc, cost, payloads);
if (isTargetFirstSpan) {
match.setChildSpan(target);
match.setSecondChildSpan(cs);
@@ -288,11 +261,10 @@
match.setChildSpan(cs);
match.setSecondChildSpan(target);
}
- //match.setChildSpan(cs);
+ // match.setChildSpan(cs);
return match;
}
-
/**
* Assigns the first candidate span in the match list as the
* current span match, and removes it from the matchList.
@@ -311,13 +283,14 @@
// log.trace("Match doc#={} start={} end={}", matchDocNumber,
// matchStartPosition, matchEndPosition);
- // log.trace("firstspan " + getMatchFirstSpan().getStart() + " "
+ // log.trace("firstspan " + getMatchFirstSpan().getStart() + "
+ // "
// + getMatchFirstSpan().getEnd());
- // log.trace("secondspan " + getMatchSecondSpan().getStart() + " "
+ // log.trace("secondspan " + getMatchSecondSpan().getStart() +
+ // " "
// + getMatchSecondSpan().getEnd());
}
-
@Override
public boolean skipTo (int target) throws IOException {
if (hasMoreSpans && (secondSpans.doc() < target)) {
@@ -334,7 +307,6 @@
return advance();
}
-
@Override
public long cost () {
return matchCost;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
index 388e97b..f111c29 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
@@ -6,6 +6,8 @@
import java.util.List;
import java.util.Map;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@@ -33,6 +35,8 @@
// target span
private List<CandidateSpan> elementList;
+ private Logger log = LogManager.getLogger(UnorderedElementDistanceSpans.class);
+ boolean DEBUG = false;
/**
* Constructs UnorderedElementDistanceSpans for the given
@@ -75,6 +79,11 @@
hasMoreFirstSpans);
hasMoreSecondSpans = addSpan(secondSpans, secondSpanList,
hasMoreSecondSpans);
+
+ if (DEBUG){
+ log.debug("prepare firstSpanList: " +firstSpanList.size());
+ log.debug("prepare secondSpanList: " +secondSpanList.size());
+ }
}
else {
hasMoreSpans = false;
@@ -263,6 +272,7 @@
@Override
protected void updateList (List<CandidateSpan> candidateList) {
+// if (DEBUG) log.debug("candidate list size: " +candidateList.size());
updateElementList(candidateList.get(0).getPosition());
candidateList.remove(0);
}