Added a few more tests
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
index fe1a416..f406ef2 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
@@ -28,6 +28,8 @@
private boolean hasMoreNotClause;
private Spans notClause;
+ private long matchCost;
+
public ExpandedExclusionSpans(SpanExpansionQuery spanExpansionQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
@@ -72,6 +74,7 @@
matchStartPosition = cs.getStart();
matchEndPosition = cs.getEnd();
matchPayload = cs.getPayloads();
+ matchCost = cs.getCost() + notClause.cost();
candidateSpans.remove(0);
return true;
}
@@ -213,7 +216,7 @@
payload.addAll(firstSpans.getPayload());
}
if (classNumber > 0 ){
- // System.out.println("Extension offsets "+start+","+end);
+ //System.out.println("Extension offsets "+start+","+end);
payload.add(calculateExtensionOffsets(start, end));
}
return payload;
@@ -241,7 +244,6 @@
@Override
public long cost() {
- // TODO Auto-generated method stub
- return 0;
+ return matchCost;
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
index 68ae7f2..9000d4e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
@@ -13,7 +13,7 @@
import de.ids_mannheim.korap.query.SpanExpansionQuery;
-/**
+/** Spans expanded with min m tokens and max n tokens.
* @author margaretha
* */
public class ExpandedSpans extends SimpleSpans{
@@ -45,16 +45,13 @@
}
private boolean advance() throws IOException {
- while (hasMoreSpans || candidateSpans.size() > 0) {
+ while (candidateSpans.size() > 0 || (hasMoreSpans = firstSpans.next())) {
if (candidateSpans.size() > 0 ){
setMatch(candidateSpans.get(0));
candidateSpans.remove(0);
return true;
}
- else {
- hasMoreSpans = firstSpans.next();
- setCandidateList();
- }
+ else { setCandidateList(); }
}
return false;
}
@@ -101,12 +98,10 @@
throws IOException{
ArrayList<byte[]> payload = new ArrayList<byte[]>();
-
- if (classNumber > 0 ){
- if (firstSpans.isPayloadAvailable()){
- payload.addAll(firstSpans.getPayload());
- }
-
+ if (firstSpans.isPayloadAvailable()){
+ payload.addAll(firstSpans.getPayload());
+ }
+ if (classNumber > 0 ){
//System.out.println("Extension offsets "+start+","+end);
payload.add(calculateExtensionOffsets(start, end));
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
index 1e575ef..b5379dd 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
@@ -38,7 +38,7 @@
SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1, true);
kr = ki.search(seq, (short) 10);
- assertEquals(72,kr.getTotalResults());
+ assertEquals(69,kr.getTotalResults());
assertEquals(5, kr.getMatch(0).getStartPos());
assertEquals(8, kr.getMatch(0).getEndPos());
assertEquals(6, kr.getMatch(1).getStartPos());
@@ -136,6 +136,7 @@
}
/** Left expansion with exclusion
+ * No expansion
* */
@Test
public void testCase4() throws IOException {
@@ -163,4 +164,115 @@
}
+ /** Expansion over start and end documents
+ * start => cut to 0
+ * TODO: end => to be handled in rendering process
+ * @throws IOException
+ * */
+ @Test
+ public void testCase5() throws IOException{
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(createFieldDoc0());
+ ki.commit();
+
+ SpanTermQuery stq = new SpanTermQuery(new Term("base","s:e") );
+ // left expansion precedes 0
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq,2,2, -1, true);
+ kr = ki.search(seq, (short) 10);
+
+ assertEquals(4,kr.getTotalResults());
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(2, kr.getMatch(0).getEndPos());
+
+ //right expansion exceeds end position
+ seq = new SpanExpansionQuery(stq,3,3,0, true);
+ kr = ki.search(seq, (short) 10);
+
+ assertEquals(4,kr.getTotalResults());
+ assertEquals(7, kr.getMatch(2).getStartPos());
+ assertEquals(11, kr.getMatch(2).getEndPos());
+ assertEquals(8, kr.getMatch(3).getStartPos());
+ assertEquals(12, kr.getMatch(3).getEndPos());
+
+ /*for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+ //+km.getSnippetBrackets()
+ );
+ }*/
+ }
+
+ /** Expansion exclusion : multiple documents
+ * @throws IOException
+ * */
+ @Test
+ public void testCase6() throws IOException{
+ KorapIndex ki = new KorapIndex();
+ ki.addDoc(createFieldDoc0()); // same doc
+ ki.addDoc(createFieldDoc1()); // only not clause
+ ki.addDoc(createFieldDoc2()); // only main clause
+ ki.commit();
+
+ SpanTermQuery stq = new SpanTermQuery(new Term("base","s:e"));
+ SpanTermQuery notQuery = new SpanTermQuery(new Term("base","s:d"));
+
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery,
+ 2, 3, 0, true);
+ kr = ki.search(seq, (short) 20);
+
+ // notClause.doc() > firstSpans.doc()
+ assertEquals(7, kr.getMatch(0).getStartPos());
+ assertEquals(10, kr.getMatch(0).getEndPos());
+ assertEquals(7, kr.getMatch(1).getStartPos());
+ assertEquals(11, kr.getMatch(1).getEndPos());
+ // !hasMoreNotClause
+ assertEquals(2, kr.getMatch(4).getLocalDocID());
+ assertEquals(1, kr.getMatch(4).getStartPos());
+ assertEquals(4, kr.getMatch(4).getEndPos());
+ }
+
+ private FieldDocument createFieldDoc0(){
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-0");
+ fd.addTV("base",
+ "ceccecdeec",
+ "[(0-1)s:c|_0#0-1]" +
+ "[(1-2)s:e|_1#1-2]" +
+ "[(2-3)s:c|_2#2-3]" +
+ "[(3-4)s:c|s:d|_3#3-4]" +
+ "[(4-5)s:e|_4#4-5]" +
+ "[(5-6)s:c|_5#5-6]" +
+ "[(6-7)s:d|_6#6-7]" +
+ "[(7-8)s:e|_7#7-8]" +
+ "[(8-9)s:e|_8#8-9]" +
+ "[(9-10)s:c|_9#9-10]");
+ return fd;
+ }
+
+ private FieldDocument createFieldDoc1() {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV("base",
+ "bbccdd",
+ "[(0-1)s:b|s:c|_1#0-1]" +
+ "[(1-2)s:b|_2#1-2]" +
+ "[(2-3)s:c|_3#2-3]" +
+ "[(3-4)s:c|_4#3-4]" +
+ "[(4-5)s:d|_5#4-5]" +
+ "[(5-6)s:d|_6#5-6]");
+ return fd;
+ }
+
+ private FieldDocument createFieldDoc2() {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-2");
+ fd.addTV("base",
+ "text",
+ "[(0-1)s:b|s:c|_1#0-1]" +
+ "[(1-2)s:e|_2#1-2]" +
+ "[(2-3)s:c|_3#2-3]" +
+ "[(3-4)s:c|_4#3-4]" +
+ "[(4-5)s:e|_5#4-5]" +
+ "[(5-6)s:a|_6#5-6]");
+ return fd;
+ }
}