Fixed span expansion with negation.
Change-Id: Ib78b1a28055324bf3c9cd49edbcb71ed1777aadd
diff --git a/Changes b/Changes
index edf568f..e6d9a37 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,7 @@
-0.58.1 2018-10-22
+0.58.1 2018-10-23
- [bugfix] Security upgrade of Jackson for CVE-2017-17485 and
CVE-2018-7489 (diewald)
+ - [bugfix] Span expansion with negation (margaretha)
0.58.0 2018-09-03
- [feature] Implemented referencing cached collection (margaretha)
@@ -24,6 +25,7 @@
regular test suite (diewald)
- [bugfix] Fixed missing docs in VC cache (margaretha)
- [documentation] Added update hint to Readme (diewald)
+ - [bugfix] Span expansion with negation (margaretha)
0.57 2018-04-05
- [feature] Support text queries in metadata
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
index daea9e8..ecddc2a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
@@ -30,11 +30,9 @@
* to the
* <em>right</em> of the original span.
*
- * The expansion offsets, namely the start and end position of an
- * expansion
- * part, can be stored in payloads. A class number is assigned to the
- * offsets
- * grouping them altogether.
+ * The expansion offsets, namely the start and end positions of an
+ * expansion part, are stored in payloads. A class number is assigned
+ * to the offsets grouping them altogether.
*
* @author margaretha
*/
@@ -49,7 +47,6 @@
private long matchCost;
-
/**
* Constructs ExpandedExclusionSpans from the given
* {@link SpanExpansionQuery}.
@@ -91,7 +88,6 @@
hasMoreSpans = firstSpans.next();
}
-
@Override
public boolean next () throws IOException {
matchPayload.clear();
@@ -99,7 +95,6 @@
return advance();
}
-
/**
* Advances the ExpandedExclusionSpans to the next match.
*
@@ -131,7 +126,6 @@
return false;
}
-
/**
* Finds matches by expanding the firstspans either to the left or
* to the
@@ -154,38 +148,41 @@
}
}
-
/**
* Expands the firstspans to the left.
*
* @throws IOException
*/
private void expandLeft () throws IOException {
- //int counter = max;
+ // int counter = max;
int maxPos = max;
CandidateSpan lastNotClause = null;
- while (hasMoreNotClause && notClause.start() < firstSpans.start()) {
+ while (hasMoreNotClause &&
+ notClause.doc() == firstSpans.doc() &&
+ notClause.start() < firstSpans.start()) {
// between max and firstspan.start()
if (notClause.start() >= firstSpans.start() - maxPos) {
maxPos = firstSpans.start() - notClause.start() - 1;
lastNotClause = new CandidateSpan(notClause);
- //counter--;
+ // counter--;
}
if (!notClause.next()) {
hasMoreNotClause = false;
}
}
- // if a notClause is between max and firstspan.start,
+ // if a notClause is between max and firstspan.start,
// then maxPos = last NotClause pos -1
generateCandidates(min, maxPos, direction);
if (lastNotClause != null && hasMoreNotClause)
while ((hasMoreSpans = firstSpans.next())
- // the next notClause is not in between max and firstspan.start()
+ // the next notClause is not in between max and
+ // firstspan.start()
&& notClause.start() > firstSpans.start()
- // the last notClause is in between max and firstspan.start()
+ // the last notClause is in between max and
+ // firstspan.start()
&& lastNotClause.getStart() < firstSpans.start()
&& lastNotClause.getStart() >= firstSpans.start() - max) {
@@ -197,7 +194,6 @@
}
}
-
/**
* Expands the firstspans to the right.
*
@@ -209,7 +205,7 @@
boolean isFound = false;
CandidateSpan firstNotClause = null;
- //System.out.println("main start:"+firstSpans.start());
+ // System.out.println("main start:"+firstSpans.start());
while (hasMoreNotClause && notClause.start() < expansionEnd) {
// between firstspan.end() and expansionEnd
if (!isFound && notClause.start() >= firstSpans.end()) {
@@ -222,7 +218,7 @@
}
}
// if a notClause is between firstSpan.end and max
- // then maxPos = the first notClause pos -1
+ // then maxPos = the first notClause pos -1
generateCandidates(min, maxPos, direction);
if (firstNotClause != null) {
@@ -230,7 +226,9 @@
// in between
&& firstNotClause.getStart() < firstSpans.end() + max
&& firstNotClause.getStart() >= firstSpans.end()) {
- //System.out.println("first start:"+firstNotClause.getStart()+", main start:"+firstSpans.start());
+ // System.out.println("first
+ // start:"+firstNotClause.getStart()+", main
+ // start:"+firstSpans.start());
maxPos = firstNotClause.getStart() - firstSpans.end() - 1;
generateCandidates(min, maxPos, direction);
}
@@ -240,7 +238,6 @@
}
}
-
/**
* Creates new candidate matches for the given direction, minimum
* and
@@ -265,7 +262,7 @@
start = Math.max(0, firstSpans.start() - counter);
if (start > -1) {
end = firstSpans.end();
- //System.out.println(start+","+end);
+ // System.out.println(start+","+end);
cs = new CandidateSpan(start, end, firstSpans.doc(),
firstSpans.cost(),
createPayloads(start, firstSpans.start()));
@@ -279,7 +276,7 @@
while (counter <= maxPos) {
start = firstSpans.start();
end = firstSpans.end() + counter;
- //System.out.println(start+","+end);
+ // System.out.println(start+","+end);
cs = new CandidateSpan(start, end, firstSpans.doc(),
firstSpans.cost(),
@@ -290,7 +287,6 @@
}
}
-
/**
* Creates payloads for a candiate match by copying the payloads
* of the
@@ -314,13 +310,12 @@
payload.addAll(firstSpans.getPayload());
}
if (classNumber > 0) {
- //System.out.println("Extension offsets "+start+","+end);
+ // System.out.println("Extension offsets "+start+","+end);
payload.add(createExtensionPayloads(start, end));
}
return payload;
}
-
/**
* Generates a byte array of extension offsets and class number to
* be added
@@ -342,7 +337,6 @@
return buffer.array();
}
-
@Override
public boolean skipTo (int target) throws IOException {
if (hasMoreSpans && (firstSpans.doc() < target)) {
@@ -355,7 +349,6 @@
return advance();
}
-
@Override
public long cost () {
return matchCost;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
index 81a6ef0..488067d 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
@@ -1,10 +1,9 @@
package de.ids_mannheim.korap.index;
+import static de.ids_mannheim.korap.TestSimple.getJsonString;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-import java.io.*;
-import static de.ids_mannheim.korap.TestSimple.*;
+import java.io.IOException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.RegexpQuery;
@@ -20,10 +19,8 @@
import de.ids_mannheim.korap.query.QueryBuilder;
import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanExpansionQuery;
-import de.ids_mannheim.korap.query.SpanNextQuery;
import de.ids_mannheim.korap.query.SpanRepetitionQuery;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
-import de.ids_mannheim.korap.response.Match;
import de.ids_mannheim.korap.response.Result;
import de.ids_mannheim.korap.util.QueryException;
@@ -479,6 +476,71 @@
}
+
+ @Test
+ public void indexExpansionWithNegationDifferentFragments () throws Exception {
+ KrillIndex ki = new KrillIndex();
+
+ // Add to the index in a single fragment
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "a B c",
+ "[(0-1)s:a|i:a|_1$<i>0<i>1]"
+ + "[(1-2)s:B|i:b|_2$<i>1<i>2|]"
+ + "[(2-3)s:c|i:c|_3$<i>2<i>3]");
+ ki.addDoc(fd);
+ ki.commit();
+ fd.addTV("base",
+ "a b c",
+ "[(0-1)s:a|i:a|_1$<i>0<i>1]"
+ + "[(1-2)s:b|i:b|_2$<i>1<i>2|]"
+ + "[(2-3)s:c|i:c|_3$<i>2<i>3]");
+ ki.addDoc(fd);
+ ki.commit();
+
+ QueryBuilder kq = new QueryBuilder("base");
+ SpanQuery sq = kq.seq(kq.seg("s:a")).append(kq.seg().without("s:B")).append(kq.seg("s:c")).toQuery();
+ assertEquals("spanNext(base:s:a, spanExpansion(base:s:c, !base:s:B{1, 1}, left))", sq.toString());
+ Krill ks = new Krill(sq);
+ ks.getMeta().getContext().left.setToken(true).setLength(0);
+ ks.getMeta().getContext().right.setToken(true).setLength(0);
+
+ Result kr = ki.search(ks);
+ assertEquals((long) 1, kr.getTotalResults());
+ };
+
+ @Test
+ public void indexExpansionWithNegationSameFragmentBug () throws Exception {
+ KrillIndex ki = new KrillIndex();
+
+ // Add to the index in a single fragment
+ FieldDocument fd = new FieldDocument();
+ fd.addTV("base",
+ "a B c",
+ "[(0-1)s:a|i:a|_1$<i>0<i>1]"
+ + "[(1-2)s:B|i:b|_2$<i>1<i>2|]"
+ + "[(2-3)s:c|i:c|_3$<i>2<i>3]");
+ ki.addDoc(fd);
+ fd.addTV("base",
+ "a b c",
+ "[(0-1)s:a|i:a|_1$<i>0<i>1]"
+ + "[(1-2)s:b|i:b|_2$<i>1<i>2|]"
+ + "[(2-3)s:c|i:c|_3$<i>2<i>3]");
+ ki.addDoc(fd);
+ ki.commit();
+
+ QueryBuilder kq = new QueryBuilder("base");
+ SpanQuery sq = kq.seq(kq.seg("s:a")).append(kq.seg().without("s:B")).append(kq.seg("s:c")).toQuery();
+ assertEquals("spanNext(base:s:a, spanExpansion(base:s:c, !base:s:B{1, 1}, left))", sq.toString());
+ Krill ks = new Krill(sq);
+ ks.getMeta().getContext().left.setToken(true).setLength(0);
+ ks.getMeta().getContext().right.setToken(true).setLength(0);
+
+ Result kr = ki.search(ks);
+ assertEquals((long) 1, kr.getTotalResults());
+ };
+
+
private FieldDocument createFieldDoc6 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-6");