Failing test for sorted subspans
Change-Id: Ic37621e0dd3b68c8a3e050cc0c9dc5be5fb735ff
diff --git a/Changes b/Changes
index e67a228..059b4de 100644
--- a/Changes
+++ b/Changes
@@ -4,6 +4,8 @@
(margaretha)
- [bugfix] Store lazy loading embedded spans for startsWith in
WithinSpans (diewald)
+ - [bugfix] Remove element payloads in SubSpans (similar to Focus)
+ to prevent wrong highlighting.
0.55.4 2016-04-22
- [bugfix] Wrap <base/s=t> boundaries around right extended queries
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
index 84f589d..1a1ab0a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
@@ -34,8 +34,7 @@
*
* In this example, the SpanSubspanQuery creates subspans, that are
* the first
- * two tokens of all sentences. It also collects all payloads from the
- * {@link ElementSpans} for the SubSpans.
+ * two tokens of all sentences.
*
* @author margaretha
* */
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
index 64c2355..ca01080 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.util.Map;
+import java.util.ArrayList;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@@ -10,6 +11,9 @@
import de.ids_mannheim.korap.query.SpanSubspanQuery;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* Enumeration of SubSpans, which are parts of another Spans. The
* SubSpans are specified with a start offset relative to the original
@@ -17,12 +21,18 @@
* position of the subspans is the same as that of the original spans.
*
* @author margaretha
+ * @author diewald
*
*/
public class SubSpans extends SimpleSpans {
- private int startOffset, length;
+ // Logger
+ private final Logger log = LoggerFactory.getLogger(SubSpans.class);
+ // This advices the java compiler to ignore all loggings
+ public static final boolean DEBUG = false;
+
+ private int startOffset, length;
/**
* Constructs SubSpans for the given {@link SpanSubspanQuery}
@@ -41,6 +51,11 @@
super(subspanQuery, context, acceptDocs, termContexts);
this.startOffset = subspanQuery.getStartOffset();
this.length = subspanQuery.getLength();
+ this.matchPayload = new ArrayList<byte[]>(6);
+
+ if (DEBUG) {
+ log.trace("Init SubSpan at {} with length {}", this.startOffset, this.length);
+ };
hasMoreSpans = firstSpans.next();
}
@@ -77,12 +92,15 @@
* @throws IOException
*/
public boolean findMatch () throws IOException {
+
+ // Check at span ending
if (this.startOffset < 0) {
matchStartPosition = firstSpans.end() + startOffset;
if (matchStartPosition < firstSpans.start()) {
matchStartPosition = firstSpans.start();
- }
+ };
}
+ // Check at span beginning
else {
matchStartPosition = firstSpans.start() + startOffset;
if (matchStartPosition >= firstSpans.end()) {
@@ -90,6 +108,7 @@
}
}
+ // Find end position of span
if (this.length > 0) {
matchEndPosition = matchStartPosition + this.length;
if (matchEndPosition > firstSpans.end()) {
@@ -99,8 +118,27 @@
else {
matchEndPosition = firstSpans.end();
}
- matchPayload = firstSpans.getPayload();
+
+ matchPayload.clear();
+
+ // Remove element payloads
+ for (byte[] payload : firstSpans.getPayload()) {
+ if (payload[0] == (byte) 64) {
+ continue;
+ };
+
+ matchPayload.add(payload.clone());
+ };
+
matchDocNumber = firstSpans.doc();
+
+ if (DEBUG) {
+ log.trace("Start at absolute position {} " +
+ "and end at absolute position {}",
+ matchStartPosition,
+ matchEndPosition);
+ };
+
return true;
}
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index 78bf62e..1945bdb 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-log4j.rootLogger = ERROR, stdout
+# log4j.rootLogger = ERROR, stdout
# Queries:
# log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -6,6 +6,7 @@
# Spans:
# log4j.logger.de.ids_mannheim.korap.query.spans.ElementSpans = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.query.spans.AttributeSpans = TRACE, stdout
+log4j.logger.de.ids_mannheim.korap.query.spans.SubSpans = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.query.spans.ElementAttributeSpans = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.query.spans.KorapTermSpan = TRACE, stdout
# log4j.logger.de.ids_mannheim.korap.query.spans.WithinSpans = TRACE, stdout
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
index fe59ad3..f09301f 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
@@ -12,21 +12,24 @@
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.query.DistanceConstraint;
import de.ids_mannheim.korap.query.SpanDistanceQuery;
+import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanSubspanQuery;
+/*
+ * @author margaretha
+ * @author diewald
+ */
public class TestSubSpanIndex {
Result kr;
KrillIndex ki;
-
public TestSubSpanIndex () throws IOException {
ki = new KrillIndex();
ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
ki.commit();
}
-
@Test
public void testCase1 () throws IOException {
SpanDistanceQuery sdq = new SpanDistanceQuery(new SpanTermQuery(
@@ -118,4 +121,26 @@
// }
}
-}
+ // Negative SubSpanQuery
+ @Test
+ public void testCaseNegativeSubSpan () throws IOException {
+ KrillIndex ki = new KrillIndex();
+ FieldDocument fd = new FieldDocument();
+
+ fd.addTV(
+ "base",
+ // <x>a <x>b </x>c </x>
+ "a b c ",
+ "[(0-1)s:a|i:a|_0$<i>0<i>2|<>:x$<b>64<i>0<i>6<i>3<b>0]" +
+ "[(1-2)s:b|i:b|_1$<i>2<i>4|<>:x$<b>64<i>2<i>4<i>2<b>1]" +
+ "[(3-4)s:c|i:c|_2$<i>4<i>6]");
+ ki.addDoc(fd);
+ ki.commit();
+ SpanSubspanQuery ssq = new SpanSubspanQuery(new SpanElementQuery("base", "x"), -1, 1, true);
+ kr = ki.search(ssq, (short) 10);
+
+ assertEquals(2, kr.getTotalResults());
+ assertEquals("a [b ]c ", kr.getMatch(0).getSnippetBrackets());
+ assertEquals("a b [c ]", kr.getMatch(1).getSnippetBrackets());
+ };
+};