Fixed sorting unordered element distance query results.
Change-Id: I0499a0c602c0d8d7a069477002bfd595ce24bac0
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
index 83e8ba4..fcdb6ff 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
@@ -8,11 +8,9 @@
/**
* CandidateSpan stores the current state of a Lucene {@link Spans},
- * which is an
- * enumeration. CandidateSpan is used for various purposes, such as
- * for
- * collecting spans which will be used in a latter process or next
- * matching.
+ * which is an enumeration. CandidateSpan is used for various
+ * purposes, such as for collecting spans which will be used in a
+ * latter process or next matching.
*
* @author margaretha
*/
@@ -21,8 +19,11 @@
private long cost;
private Collection<byte[]> payloads;
private int position;
- private CandidateSpan childSpan; // used for example for multiple distance
- // with unordered constraint
+
+ // Child spans are used in multiple distance queries with unordered constraint
+ private CandidateSpan childSpan;
+ private CandidateSpan secondChildSpan;
+
protected short spanId;
protected boolean hasSpanId;
@@ -270,6 +271,16 @@
}
+ public CandidateSpan getSecondChildSpan () {
+ return secondChildSpan;
+ }
+
+
+ public void setSecondChildSpan (CandidateSpan secondChildSpan) {
+ this.secondChildSpan = secondChildSpan;
+ }
+
+
/**
* Returns the span id of another Span related to the
* CandidateSpan. Only
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index 4dd9247..cb555c4 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -129,7 +129,8 @@
return;
}
- if (!payload.isEmpty()) {
+ if (!payload.isEmpty() && payload.get(0) != null){
+
// Get payload one by one
final int length = payload.get(0).length;
final ByteBuffer bb = ByteBuffer.allocate(length);
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
index 72876ab..e08db92 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
@@ -3,6 +3,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -17,10 +18,8 @@
/**
* Enumeration of span matches, whose two child spans have a specific
- * range of
- * distance (within a minimum and a maximum distance) and can occur in
- * any
- * order.
+ * range of distance (within a minimum and a maximum distance) and can
+ * occur in any order.
*
* @author margaretha
*/
@@ -31,7 +30,6 @@
protected List<CandidateSpan> firstSpanList, secondSpanList;
protected List<CandidateSpan> matchList;
private long matchCost;
- private int matchListSpanNum;
protected int currentDocNum;
@@ -80,14 +78,10 @@
/**
* Updates the firstSpanList and secondSpanList by adding the next
- * possible
- * first and second spans. Both the spans must be in the same
- * document. In
- * UnorderedElementDistanceSpans, a span that is not in an element
- * (distance
- * unit), is not added to its candidate list. The element must
- * also be in
- * the same document.
+ * possible first and second spans. Both the spans must be in the
+ * same document. In UnorderedElementDistanceSpans, a span that is
+ * not in an element (distance unit), is not added to its
+ * candidate list. The element must also be in the same document.
*
* @return <code>true</code> if at least one of the candidate
* lists can be
@@ -112,16 +106,16 @@
hasMoreFirstSpans, secondSpanList);
hasMoreSecondSpans = setCandidateList(secondSpanList, secondSpans,
hasMoreSecondSpans, firstSpanList);
- // System.out.println("--------------------");
- // System.out.println("firstSpanList:");
- // for (CandidateSpan cs : firstSpanList) {
- // System.out.println(cs.getStart() + " " + cs.getEnd());
- // }
- //
- // System.out.println("secondSpanList:");
- // for (CandidateSpan cs : secondSpanList) {
- // System.out.println(cs.getStart() + " " + cs.getEnd());
- // }
+ // System.out.println("--------------------");
+ // System.out.println("firstSpanList:");
+ // for (CandidateSpan cs : firstSpanList) {
+ // System.out.println(cs.getStart() + " " + cs.getEnd());
+ // }
+ //
+ // System.out.println("secondSpanList:");
+ // for (CandidateSpan cs : secondSpanList) {
+ // System.out.println(cs.getStart() + " " + cs.getEnd());
+ // }
CandidateSpan currentFirstSpan, currentSecondSpan;
if (!firstSpanList.isEmpty() && !secondSpanList.isEmpty()) {
@@ -140,9 +134,7 @@
// System.out.println(cs.getStart() +" "+ cs.getEnd());
// }
- matchList = findMatches(currentFirstSpan, secondSpanList);
- setMatchFirstSpan(currentFirstSpan);
- matchListSpanNum = 2;
+ matchList = findMatches(currentFirstSpan, secondSpanList, true);
updateList(firstSpanList);
}
else {
@@ -154,10 +146,18 @@
// System.out.println(cs.getStart() +" "+ cs.getEnd());
// }
- matchList = findMatches(currentSecondSpan, firstSpanList);
- setMatchSecondSpan(currentSecondSpan);
- matchListSpanNum = 1;
+ matchList = findMatches(currentSecondSpan, firstSpanList,
+ false);
updateList(secondSpanList);
+
+ if (currentFirstSpan.getStart() == currentSecondSpan.getStart()
+ && currentFirstSpan.getEnd() == currentSecondSpan
+ .getEnd()) {
+ matchList.addAll(findMatches(currentFirstSpan,
+ secondSpanList, false));
+ Collections.sort(matchList);
+ updateList(firstSpanList);
+ }
}
}
else if (firstSpanList.isEmpty()) {
@@ -248,16 +248,18 @@
* a target span
* @param candidateList
* a candidate list
+ * @param isTargetFirstSpan
+ * true is the target span is of the first span, false
+ * otherwise
* @return the matches in a list
*/
protected abstract List<CandidateSpan> findMatches (CandidateSpan target,
- List<CandidateSpan> candidateList);
+ List<CandidateSpan> candidateList, boolean isTargetFirstSpan);
/**
* Computes match properties and creates a candidate span match to
- * be added
- * to the match list.
+ * be added to the match list.
*
* @return a candidate span match
*/
@@ -280,15 +282,14 @@
}
CandidateSpan match = new CandidateSpan(start, end, doc, cost,
payloads);
- match.setChildSpan(cs);
+ //match.setChildSpan(cs);
return match;
}
/**
* Assigns the first candidate span in the match list as the
- * current span
- * match, and removes it from the matchList.
+ * current span match, and removes it from the matchList.
*/
private void setMatchProperties () {
CandidateSpan cs = matchList.get(0);
@@ -299,10 +300,8 @@
matchPayload.addAll(cs.getPayloads());
matchList.remove(0);
- if (matchListSpanNum == 1)
- setMatchFirstSpan(cs.getChildSpan());
- else
- setMatchSecondSpan(cs.getChildSpan());
+ setMatchFirstSpan(cs.getChildSpan());
+ setMatchSecondSpan(cs.getSecondChildSpan());
// log.trace("Match doc#={} start={} end={}", matchDocNumber,
// matchStartPosition, matchEndPosition);
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
index 24e277b..0e4b7fa 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
@@ -98,10 +98,8 @@
/**
* Adds all the spans occurring in the current document, as
- * CandidateSpans
- * to the specified candidate list, and tells if the enumeration
- * of the
- * spans has finished, or not.
+ * CandidateSpans to the specified candidate list, and tells if
+ * the enumeration of the spans has finished, or not.
*
* @param span
* a Span
@@ -133,8 +131,8 @@
/**
* Finds the element position of the specified span in the element
- * list or
- * by advancing the element spans until encountering the span.
+ * list or by advancing the element spans until encountering the
+ * span.
*
* @param span
* a Span
@@ -161,6 +159,7 @@
* Advances the element spans until encountering the given span.
*
* @param span
+ * a span
* @return <code>true</code> if such an element is found,
* <code>false</code>
* if the span is not in an element.
@@ -212,8 +211,7 @@
/**
* Tells if the target and candidate spans are not too far from
- * each other
- * (within the maximum distance).
+ * each other (within the maximum distance).
*
* @return <code>true</code> if the target and candidate spans are
* within
@@ -240,7 +238,7 @@
@Override
protected List<CandidateSpan> findMatches (CandidateSpan target,
- List<CandidateSpan> candidateList) {
+ List<CandidateSpan> candidateList, boolean isTargetFirstSpan) {
List<CandidateSpan> matches = new ArrayList<>();
@@ -251,17 +249,50 @@
actualDistance = Math.abs(targetPos - cs.getPosition());
if (minDistance == 0 && actualDistance == 0) {
- matches.add(createMatchCandidate(target, cs, true));
+ matches.add(createMatchCandidate(target, cs, true,
+ isTargetFirstSpan));
continue;
}
if (minDistance <= actualDistance && actualDistance <= maxDistance)
- matches.add(createMatchCandidate(target, cs, false));
+ matches.add(createMatchCandidate(target, cs, false,
+ isTargetFirstSpan));
}
return matches;
}
+ /**
+ * Creates a match from the two given spans (target and candidate)
+ *
+ * @param target
+ * the target span
+ * @param cs
+ * the candidate span
+ * @param isDistanceZero
+ * true if the distance between the two spans are zero,
+ * false otherwise
+ * @param isTargetFirstSpan
+ * true is the target span is of the first span, false
+ * otherwise
+ * @return a match
+ */
+ private CandidateSpan createMatchCandidate (CandidateSpan target,
+ CandidateSpan cs, boolean isDistanceZero,
+ boolean isTargetFirstSpan) {
+ CandidateSpan match = createMatchCandidate(target, cs, isDistanceZero);
+ if (isTargetFirstSpan) {
+ match.setChildSpan(target);
+ match.setSecondChildSpan(cs);
+ }
+ else {
+ match.setChildSpan(cs);
+ match.setSecondChildSpan(target);
+ }
+ return match;
+ }
+
+
@Override
protected void updateList (List<CandidateSpan> candidateList) {
updateElementList(candidateList.get(0).getPosition());
@@ -271,10 +302,8 @@
/**
* Reduces the number of elements kept in the element list by
- * removing the
- * elements whose position is smaller than or identical to the
- * position of
- * the last target span.
+ * removing the elements whose position is smaller than or
+ * identical to the position of the last target span.
*
* @param position
* the last target span position
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java
index 5c1350c..c0ac7f1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java
@@ -123,7 +123,7 @@
@Override
protected List<CandidateSpan> findMatches (CandidateSpan target,
- List<CandidateSpan> candidateList) {
+ List<CandidateSpan> candidateList, boolean isTargetFirstSpan) {
List<CandidateSpan> matches = new ArrayList<>();
int actualDistance;
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 4af1618..56e7dd9 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -57,7 +57,7 @@
private final static Logger log = LoggerFactory.getLogger(Match.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = true;
+ public static final boolean DEBUG = false;
// Mapper for JSON serialization
ObjectMapper mapper = new ObjectMapper();