Fixed sorting unordered element distance query results.
Change-Id: I0499a0c602c0d8d7a069477002bfd595ce24bac0
diff --git a/Changes b/Changes
index 518ddc5..55fd618 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,5 @@
0.55.7 2016-11-24
+ - [bugfix] Sorted results in unordered element distance query results (margaretha)
- [bugfix] Throw error on optional operands in distance
queries (diewald)
- [performance] Remember solved problematic queries in the
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
index 83e8ba4..fcdb6ff 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpan.java
@@ -8,11 +8,9 @@
/**
* CandidateSpan stores the current state of a Lucene {@link Spans},
- * which is an
- * enumeration. CandidateSpan is used for various purposes, such as
- * for
- * collecting spans which will be used in a latter process or next
- * matching.
+ * which is an enumeration. CandidateSpan is used for various
+ * purposes, such as for collecting spans which will be used in a
+ * latter process or next matching.
*
* @author margaretha
*/
@@ -21,8 +19,11 @@
private long cost;
private Collection<byte[]> payloads;
private int position;
- private CandidateSpan childSpan; // used for example for multiple distance
- // with unordered constraint
+
+ // Child spans are used in multiple distance queries with unordered constraint
+ private CandidateSpan childSpan;
+ private CandidateSpan secondChildSpan;
+
protected short spanId;
protected boolean hasSpanId;
@@ -270,6 +271,16 @@
}
+ public CandidateSpan getSecondChildSpan () {
+ return secondChildSpan;
+ }
+
+
+ public void setSecondChildSpan (CandidateSpan secondChildSpan) {
+ this.secondChildSpan = secondChildSpan;
+ }
+
+
/**
* Returns the span id of another Span related to the
* CandidateSpan. Only
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index 4dd9247..cb555c4 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -129,7 +129,8 @@
return;
}
- if (!payload.isEmpty()) {
+ if (!payload.isEmpty() && payload.get(0) != null){
+
// Get payload one by one
final int length = payload.get(0).length;
final ByteBuffer bb = ByteBuffer.allocate(length);
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
index 72876ab..e08db92 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
@@ -3,6 +3,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -17,10 +18,8 @@
/**
* Enumeration of span matches, whose two child spans have a specific
- * range of
- * distance (within a minimum and a maximum distance) and can occur in
- * any
- * order.
+ * range of distance (within a minimum and a maximum distance) and can
+ * occur in any order.
*
* @author margaretha
*/
@@ -31,7 +30,6 @@
protected List<CandidateSpan> firstSpanList, secondSpanList;
protected List<CandidateSpan> matchList;
private long matchCost;
- private int matchListSpanNum;
protected int currentDocNum;
@@ -80,14 +78,10 @@
/**
* Updates the firstSpanList and secondSpanList by adding the next
- * possible
- * first and second spans. Both the spans must be in the same
- * document. In
- * UnorderedElementDistanceSpans, a span that is not in an element
- * (distance
- * unit), is not added to its candidate list. The element must
- * also be in
- * the same document.
+ * possible first and second spans. Both the spans must be in the
+ * same document. In UnorderedElementDistanceSpans, a span that is
+ * not in an element (distance unit), is not added to its
+ * candidate list. The element must also be in the same document.
*
* @return <code>true</code> if at least one of the candidate
* lists can be
@@ -112,16 +106,16 @@
hasMoreFirstSpans, secondSpanList);
hasMoreSecondSpans = setCandidateList(secondSpanList, secondSpans,
hasMoreSecondSpans, firstSpanList);
- // System.out.println("--------------------");
- // System.out.println("firstSpanList:");
- // for (CandidateSpan cs : firstSpanList) {
- // System.out.println(cs.getStart() + " " + cs.getEnd());
- // }
- //
- // System.out.println("secondSpanList:");
- // for (CandidateSpan cs : secondSpanList) {
- // System.out.println(cs.getStart() + " " + cs.getEnd());
- // }
+ // System.out.println("--------------------");
+ // System.out.println("firstSpanList:");
+ // for (CandidateSpan cs : firstSpanList) {
+ // System.out.println(cs.getStart() + " " + cs.getEnd());
+ // }
+ //
+ // System.out.println("secondSpanList:");
+ // for (CandidateSpan cs : secondSpanList) {
+ // System.out.println(cs.getStart() + " " + cs.getEnd());
+ // }
CandidateSpan currentFirstSpan, currentSecondSpan;
if (!firstSpanList.isEmpty() && !secondSpanList.isEmpty()) {
@@ -140,9 +134,7 @@
// System.out.println(cs.getStart() +" "+ cs.getEnd());
// }
- matchList = findMatches(currentFirstSpan, secondSpanList);
- setMatchFirstSpan(currentFirstSpan);
- matchListSpanNum = 2;
+ matchList = findMatches(currentFirstSpan, secondSpanList, true);
updateList(firstSpanList);
}
else {
@@ -154,10 +146,18 @@
// System.out.println(cs.getStart() +" "+ cs.getEnd());
// }
- matchList = findMatches(currentSecondSpan, firstSpanList);
- setMatchSecondSpan(currentSecondSpan);
- matchListSpanNum = 1;
+ matchList = findMatches(currentSecondSpan, firstSpanList,
+ false);
updateList(secondSpanList);
+
+ if (currentFirstSpan.getStart() == currentSecondSpan.getStart()
+ && currentFirstSpan.getEnd() == currentSecondSpan
+ .getEnd()) {
+ matchList.addAll(findMatches(currentFirstSpan,
+ secondSpanList, false));
+ Collections.sort(matchList);
+ updateList(firstSpanList);
+ }
}
}
else if (firstSpanList.isEmpty()) {
@@ -248,16 +248,18 @@
* a target span
* @param candidateList
* a candidate list
+ * @param isTargetFirstSpan
+ * true is the target span is of the first span, false
+ * otherwise
* @return the matches in a list
*/
protected abstract List<CandidateSpan> findMatches (CandidateSpan target,
- List<CandidateSpan> candidateList);
+ List<CandidateSpan> candidateList, boolean isTargetFirstSpan);
/**
* Computes match properties and creates a candidate span match to
- * be added
- * to the match list.
+ * be added to the match list.
*
* @return a candidate span match
*/
@@ -280,15 +282,14 @@
}
CandidateSpan match = new CandidateSpan(start, end, doc, cost,
payloads);
- match.setChildSpan(cs);
+ //match.setChildSpan(cs);
return match;
}
/**
* Assigns the first candidate span in the match list as the
- * current span
- * match, and removes it from the matchList.
+ * current span match, and removes it from the matchList.
*/
private void setMatchProperties () {
CandidateSpan cs = matchList.get(0);
@@ -299,10 +300,8 @@
matchPayload.addAll(cs.getPayloads());
matchList.remove(0);
- if (matchListSpanNum == 1)
- setMatchFirstSpan(cs.getChildSpan());
- else
- setMatchSecondSpan(cs.getChildSpan());
+ setMatchFirstSpan(cs.getChildSpan());
+ setMatchSecondSpan(cs.getSecondChildSpan());
// log.trace("Match doc#={} start={} end={}", matchDocNumber,
// matchStartPosition, matchEndPosition);
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
index 24e277b..0e4b7fa 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedElementDistanceSpans.java
@@ -98,10 +98,8 @@
/**
* Adds all the spans occurring in the current document, as
- * CandidateSpans
- * to the specified candidate list, and tells if the enumeration
- * of the
- * spans has finished, or not.
+ * CandidateSpans to the specified candidate list, and tells if
+ * the enumeration of the spans has finished, or not.
*
* @param span
* a Span
@@ -133,8 +131,8 @@
/**
* Finds the element position of the specified span in the element
- * list or
- * by advancing the element spans until encountering the span.
+ * list or by advancing the element spans until encountering the
+ * span.
*
* @param span
* a Span
@@ -161,6 +159,7 @@
* Advances the element spans until encountering the given span.
*
* @param span
+ * a span
* @return <code>true</code> if such an element is found,
* <code>false</code>
* if the span is not in an element.
@@ -212,8 +211,7 @@
/**
* Tells if the target and candidate spans are not too far from
- * each other
- * (within the maximum distance).
+ * each other (within the maximum distance).
*
* @return <code>true</code> if the target and candidate spans are
* within
@@ -240,7 +238,7 @@
@Override
protected List<CandidateSpan> findMatches (CandidateSpan target,
- List<CandidateSpan> candidateList) {
+ List<CandidateSpan> candidateList, boolean isTargetFirstSpan) {
List<CandidateSpan> matches = new ArrayList<>();
@@ -251,17 +249,50 @@
actualDistance = Math.abs(targetPos - cs.getPosition());
if (minDistance == 0 && actualDistance == 0) {
- matches.add(createMatchCandidate(target, cs, true));
+ matches.add(createMatchCandidate(target, cs, true,
+ isTargetFirstSpan));
continue;
}
if (minDistance <= actualDistance && actualDistance <= maxDistance)
- matches.add(createMatchCandidate(target, cs, false));
+ matches.add(createMatchCandidate(target, cs, false,
+ isTargetFirstSpan));
}
return matches;
}
+ /**
+ * Creates a match from the two given spans (target and candidate)
+ *
+ * @param target
+ * the target span
+ * @param cs
+ * the candidate span
+ * @param isDistanceZero
+ * true if the distance between the two spans are zero,
+ * false otherwise
+ * @param isTargetFirstSpan
+ * true is the target span is of the first span, false
+ * otherwise
+ * @return a match
+ */
+ private CandidateSpan createMatchCandidate (CandidateSpan target,
+ CandidateSpan cs, boolean isDistanceZero,
+ boolean isTargetFirstSpan) {
+ CandidateSpan match = createMatchCandidate(target, cs, isDistanceZero);
+ if (isTargetFirstSpan) {
+ match.setChildSpan(target);
+ match.setSecondChildSpan(cs);
+ }
+ else {
+ match.setChildSpan(cs);
+ match.setSecondChildSpan(target);
+ }
+ return match;
+ }
+
+
@Override
protected void updateList (List<CandidateSpan> candidateList) {
updateElementList(candidateList.get(0).getPosition());
@@ -271,10 +302,8 @@
/**
* Reduces the number of elements kept in the element list by
- * removing the
- * elements whose position is smaller than or identical to the
- * position of
- * the last target span.
+ * removing the elements whose position is smaller than or
+ * identical to the position of the last target span.
*
* @param position
* the last target span position
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java
index 5c1350c..c0ac7f1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedTokenDistanceSpans.java
@@ -123,7 +123,7 @@
@Override
protected List<CandidateSpan> findMatches (CandidateSpan target,
- List<CandidateSpan> candidateList) {
+ List<CandidateSpan> candidateList, boolean isTargetFirstSpan) {
List<CandidateSpan> matches = new ArrayList<>();
int actualDistance;
diff --git a/src/main/java/de/ids_mannheim/korap/response/Match.java b/src/main/java/de/ids_mannheim/korap/response/Match.java
index 4af1618..56e7dd9 100644
--- a/src/main/java/de/ids_mannheim/korap/response/Match.java
+++ b/src/main/java/de/ids_mannheim/korap/response/Match.java
@@ -57,7 +57,7 @@
private final static Logger log = LoggerFactory.getLogger(Match.class);
// This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = true;
+ public static final boolean DEBUG = false;
// Mapper for JSON serialization
ObjectMapper mapper = new ObjectMapper();
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
index 8ecc031..8594285 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
@@ -22,9 +22,14 @@
import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanNextQuery;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.response.Match;
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.util.QueryException;
+/**
+ * @author margaretha
+ *
+ */
@RunWith(JUnit4.class)
public class TestElementDistanceIndex {
@@ -87,7 +92,6 @@
return fd;
}
-
public SpanQuery createQuery (String elementType, String x, String y,
int min, int max, boolean isOrdered) {
@@ -246,8 +250,7 @@
kr = ki.search(sqwi.toQuery(), (short) 10);
assertEquals(1, kr.getTotalResults()); // Is 1 correct or should it not be ordered?
assertEquals("[[ec]]ebdc", kr.getMatch(0).getSnippetBrackets());
- };
-
+ }
public static String getString (String path) {
StringBuilder contentBuilder = new StringBuilder();
@@ -256,14 +259,14 @@
String str;
while ((str = in.readLine()) != null) {
contentBuilder.append(str);
- };
+ }
in.close();
}
catch (IOException e) {
fail(e.getMessage());
}
return contentBuilder.toString();
- };
+ }
public static SpanQueryWrapper jsonQuery (String jsonFile) {
@@ -276,7 +279,7 @@
catch (QueryException e) {
fail(e.getMessage());
sqwi = new QueryBuilder("tokens").seg("???");
- };
+ }
return sqwi;
- };
-};
+ }
+}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestUnorderedElementDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestUnorderedElementDistanceIndex.java
index be79a11..c625818 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestUnorderedElementDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestUnorderedElementDistanceIndex.java
@@ -91,6 +91,19 @@
return fd;
}
+ private FieldDocument createFieldDoc5 () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-2");
+ fd.addTV("base", "text",
+ "[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>2<b>0|<>:p$<b>64<i>0<i>4<i>4<b>0]"
+ + "[(1-2)s:b|s:e|_2$<i>1<i>2]"
+ + "[(2-3)s:e|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>4<b>0]"
+ + "[(3-4)s:b|s:c|_4$<i>3<i>4]"
+ + "[(4-5)s:e|_5$<i>4<i>5|<>:s$<b>64<i>4<i>6<i>6<b>0|<>:p$<b>64<i>4<i>6<i>6<b>0]"
+ + "[(5-6)s:d|_6$<i>5<i>6]"
+ + "[(6-7)s:b|_7$<i>6<i>7|<>:s$<b>64<i>6<i>7<i>7<b>0|<>:p$<b>64<i>6<i>7<i>7<b>0]");
+ return fd;
+ }
public SpanQuery createQuery (String elementType, String x, String y,
int minDistance, int maxDistance, boolean isOrdered) {
@@ -251,4 +264,37 @@
// );
// }
}
+
+
+
+ /**
+ * Subspans occurrences are in the same positions.
+ */
+ @Test
+ public void testCase6 () throws IOException {
+ ki = new KrillIndex();
+ ki.addDoc(createFieldDoc5());
+ ki.commit();
+
+ SpanQuery sq = createQuery("s", "s:b", "s:e", 1, 2, false);
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals(8, kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).startPos);
+ assertEquals(3, kr.getMatch(0).endPos);
+ assertEquals(0, kr.getMatch(1).startPos);
+ assertEquals(5, kr.getMatch(1).endPos);
+ assertEquals(1, kr.getMatch(2).startPos);
+ assertEquals(3, kr.getMatch(2).endPos);
+ assertEquals(1, kr.getMatch(3).startPos);
+ assertEquals(4, kr.getMatch(3).endPos);
+ assertEquals(1, kr.getMatch(4).startPos);
+ assertEquals(5, kr.getMatch(4).endPos);
+ assertEquals(2, kr.getMatch(5).startPos);
+ assertEquals(7, kr.getMatch(5).endPos);
+ assertEquals(3, kr.getMatch(6).startPos);
+ assertEquals(5, kr.getMatch(6).endPos);
+ assertEquals(4, kr.getMatch(7).startPos);
+ assertEquals(7, kr.getMatch(7).endPos);
+ }
}