Update DistanceConstraint
Add childspan in CandidateSpan
Fix bugs in unordered distance queries (prepareLists)
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
index d8727f2..8f4273a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
@@ -13,6 +13,7 @@
private long cost;
private Collection<byte[]> payloads;
private int position;
+ private CandidateSpan childSpan; // used for multiple distance with unordered constraint
public CandidateSpan(Spans span) throws IOException {
this.doc = span.doc();
@@ -84,6 +85,14 @@
public void setPosition(int position) {
this.position = position;
}
+
+ public CandidateSpan getChildSpan() {
+ return childSpan;
+ }
+
+ public void setChildSpan(CandidateSpan childSpan) {
+ this.childSpan = childSpan;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java
index cd751ab..b079c72 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementDistanceSpans.java
@@ -40,7 +40,7 @@
hasMoreElements = elements.next();
hasMoreSpans = hasMoreFirstSpans && hasMoreElements;
- elementPosition=0;
+ elementPosition=0;
}
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
index b4b71d8..3d9fd0a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
@@ -28,6 +28,7 @@
protected List<CandidateSpan> matchList;
private long matchCost;
private int matchListSpanNum;
+ protected int currentDocNum;
public UnorderedDistanceSpans(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
@@ -35,7 +36,6 @@
super(query, context, acceptDocs, termContexts);
minDistance = query.getMinDistance();
maxDistance = query.getMaxDistance();
- collectPayloads = query.isCollectPayloads();
firstSpanList = new ArrayList<CandidateSpan>();
secondSpanList = new ArrayList<CandidateSpan>();
@@ -52,15 +52,8 @@
if (!matchList.isEmpty()){
setMatchProperties();
return true;
- }
-
- if (firstSpanList.isEmpty() && secondSpanList.isEmpty()){
- if (fillEmptyCandidateLists()){
- setMatchList();
- }
- else { hasMoreSpans = false; }
- }
- else { setMatchList(); }
+ }
+ if (prepareLists()) setMatchList();
}
return false;
}
@@ -72,7 +65,7 @@
*
* @return true iff at least one of the candidate lists can be filled.
* */
- protected abstract boolean fillEmptyCandidateLists() throws IOException;
+ protected abstract boolean prepareLists() throws IOException;
/** Set the list of matches between the span having the smallest position, and
* its candidates. Simply remove the span if it does not have any candidates.
@@ -83,6 +76,16 @@
hasMoreFirstSpans,secondSpanList);
hasMoreSecondSpans = setCandidateList(secondSpanList,secondSpans,
hasMoreSecondSpans,firstSpanList);
+// System.out.println("--------------------");
+// System.out.println("firstSpanList:");
+// for (CandidateSpan cs: firstSpanList) {
+// System.out.println(cs.getStart() +" "+ cs.getEnd());
+// }
+//
+// System.out.println("secondSpanList:");
+// for (CandidateSpan cs: secondSpanList) {
+// System.out.println(cs.getStart() +" "+ cs.getEnd());
+// }
CandidateSpan currentFirstSpan, currentSecondSpan;
if (!firstSpanList.isEmpty() && !secondSpanList.isEmpty()){
@@ -92,12 +95,24 @@
if (currentFirstSpan.getEnd() < currentSecondSpan.getEnd() ||
isLastCandidateSmaller(currentFirstSpan, currentSecondSpan)){
+// System.out.println("current target: "+firstSpanList.get(0).getStart() +" "+firstSpanList.get(0).getEnd());
+// System.out.println("candidates:");
+// for (CandidateSpan cs: secondSpanList) {
+// System.out.println(cs.getStart() +" "+ cs.getEnd());
+// }
+
matchList = findMatches(currentFirstSpan, secondSpanList);
setMatchFirstSpan(currentFirstSpan);
matchListSpanNum = 2;
updateList(firstSpanList);
}
else {
+// System.out.println("current target: "+secondSpanList.get(0).getStart() +" "+secondSpanList.get(0).getEnd());
+// System.out.println("candidates:");
+// for (CandidateSpan cs: firstSpanList) {
+// System.out.println(cs.getStart() +" "+ cs.getEnd());
+// }
+
matchList = findMatches(currentSecondSpan, firstSpanList);
setMatchSecondSpan(currentSecondSpan);
matchListSpanNum = 1;
@@ -105,9 +120,13 @@
}
}
else if (firstSpanList.isEmpty()){
+// System.out.println("current target: "+secondSpanList.get(0).getStart() +" "+secondSpanList.get(0).getEnd());
+// System.out.println("candidates: empty");
updateList(secondSpanList);
}
else{
+// System.out.println("current target: "+firstSpanList.get(0).getStart() +" "+firstSpanList.get(0).getEnd());
+// System.out.println("candidates: empty");
updateList(firstSpanList);
}
}
@@ -161,7 +180,10 @@
payloads.addAll(cs.getPayloads());
}
}
- return new CandidateSpan(start,end,doc,cost,payloads);
+
+ CandidateSpan match = new CandidateSpan(start,end,doc,cost,payloads);
+ match.setChildSpan(cs);
+ return match;
}
/** Assign the first candidate span in the match list as the current span match.
@@ -174,13 +196,15 @@
matchCost = cs.getCost();
matchPayload.addAll(cs.getPayloads());
matchList.remove(0);
-
+
if (matchListSpanNum == 1)
- setMatchFirstSpan(cs);
- else setMatchSecondSpan(cs);
+ setMatchFirstSpan(cs.getChildSpan());
+ else setMatchSecondSpan(cs.getChildSpan());
log.trace("Match doc#={} start={} end={}", matchDocNumber,
matchStartPosition,matchEndPosition);
+ //System.out.println("firstspan "+getMatchFirstSpan().getStart()+" "+ getMatchFirstSpan().getEnd());
+ //System.out.println("secondspan "+getMatchSecondSpan().getStart()+" "+ getMatchSecondSpan().getEnd());
}
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
index 21c22e3..b9ed42c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
@@ -29,8 +29,7 @@
// contains all previous elements whose position is greater than the last
// target span
- private List<CandidateSpan> elementList;
- private int currentDoc;
+ private List<CandidateSpan> elementList;
public UnorderedElementDistanceSpans(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
@@ -44,34 +43,53 @@
}
@Override
- protected boolean fillEmptyCandidateLists() throws IOException {
- int position;
- while (firstSpanList.isEmpty() && secondSpanList.isEmpty()){
-
+ protected boolean prepareLists() throws IOException {
+
+ if (firstSpanList.isEmpty() && secondSpanList.isEmpty()){
if (hasMoreFirstSpans && hasMoreSecondSpans && hasMoreElements &&
findSameDoc(firstSpans, secondSpans, elements)){
- if (currentDoc != firstSpans.doc()){
- currentDoc = firstSpans.doc();
+ if (currentDocNum != firstSpans.doc()){
+ currentDocNum = firstSpans.doc();
elementList.clear();
- }
-
- position = findElementPosition(firstSpans);
- if (position != -1)
- firstSpanList.add(new CandidateSpan(firstSpans,position));
-
- position = findElementPosition(secondSpans);
- if (position != -1)
- secondSpanList.add(new CandidateSpan(secondSpans,position));
-
- hasMoreFirstSpans = firstSpans.next();
- hasMoreSecondSpans = secondSpans.next();
+ }
+
+ hasMoreFirstSpans = addSpan(firstSpans,firstSpanList,hasMoreFirstSpans);
+ hasMoreSecondSpans = addSpan(secondSpans, secondSpanList, hasMoreSecondSpans);
}
- else { return false; }
+ else {
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ else if (firstSpanList.isEmpty() && hasMoreFirstSpans &&
+ firstSpans.doc() == currentDocNum){
+ hasMoreFirstSpans = addSpan(firstSpans,firstSpanList,hasMoreFirstSpans);
}
+ else if (secondSpanList.isEmpty() && hasMoreSecondSpans &&
+ secondSpans.doc() == currentDocNum){
+ hasMoreSecondSpans = addSpan(secondSpans, secondSpanList, hasMoreSecondSpans);
+ }
+
return true;
}
+ private boolean addSpan(Spans span, List<CandidateSpan> list, boolean hasMoreSpan)
+ throws IOException {
+ int position;
+ while (hasMoreSpan && span.doc() == currentDocNum){
+ position = findElementPosition(span);
+ if (position != -1){
+ list.add(new CandidateSpan(span,position));
+ hasMoreSpan = span.next();
+ return hasMoreSpan;
+ }
+ hasMoreSpan = span.next();
+ }
+ return hasMoreSpan;
+ }
+
+
/** Find the element position of the span in the element list or by advancing
* the element spans until encountering the span.
*
@@ -99,7 +117,7 @@
* */
private boolean advanceElementTo(Spans span) throws IOException {
while (hasMoreElements &&
- elements.doc() == currentDoc &&
+ elements.doc() == currentDocNum &&
elements.start() < span.end()){
if (span.start() >= elements.start() &&
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java
index f7e6037..f733bbb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java
@@ -28,16 +28,33 @@
}
@Override
- protected boolean fillEmptyCandidateLists() throws IOException {
- if (hasMoreFirstSpans && hasMoreSecondSpans &&
- ensureSameDoc(firstSpans, secondSpans)){
- firstSpanList.add(new CandidateSpan(firstSpans));
- secondSpanList.add(new CandidateSpan(secondSpans));
- hasMoreFirstSpans = firstSpans.next();
- hasMoreSecondSpans = secondSpans.next();
- return true;
+ protected boolean prepareLists() throws IOException {
+
+ if (firstSpanList.isEmpty() && secondSpanList.isEmpty()){
+ if (hasMoreFirstSpans && hasMoreSecondSpans &&
+ ensureSameDoc(firstSpans, secondSpans)){
+ firstSpanList.add(new CandidateSpan(firstSpans));
+ secondSpanList.add(new CandidateSpan(secondSpans));
+ hasMoreFirstSpans = firstSpans.next();
+ hasMoreSecondSpans = secondSpans.next();
+ currentDocNum = firstSpans.doc();
+ }
+ else {
+ hasMoreSpans = false;
+ return false;
+ }
}
- return false;
+ else if (firstSpanList.isEmpty() && hasMoreFirstSpans &&
+ firstSpans.doc() == currentDocNum){
+ firstSpanList.add(new CandidateSpan(firstSpans));
+ hasMoreFirstSpans = firstSpans.next();
+ }
+ else if (secondSpanList.isEmpty() && hasMoreSecondSpans &&
+ secondSpans.doc() == currentDocNum){
+ secondSpanList.add(new CandidateSpan(secondSpans));
+ hasMoreSecondSpans = secondSpans.next();
+ }
+ return true;
}
@Override