Added class number to expanded exclusion query
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
index 80904f2..11b6fda 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -22,6 +23,7 @@
private int min, max;
private int direction;
+ private byte classNumber;
private List<CandidateSpan> candidateSpans;
private boolean hasMoreNotClause;
private Spans notClause;
@@ -45,6 +47,7 @@
this.min = spanExpansionQuery.getMin();
this.max = spanExpansionQuery.getMax();
this.direction = spanExpansionQuery.getDirection();
+ this.classNumber = spanExpansionQuery.getClassNumber();
this.notClause = secondSpans;
this.hasMoreNotClause = notClause.next();
@@ -167,17 +170,19 @@
throws IOException {
int counter;
int start, end;
-
+ CandidateSpan cs;
if (direction < 0 ) { // left
counter = maxPos;
while (counter >= min){
- start = firstSpans.start() - counter;
- if (start > -1 ){
-
- end = firstSpans.end();
- //System.out.println(start+","+end);
- candidateSpans.add(new CandidateSpan(start, end, firstSpans.doc(),
- firstSpans.cost(), firstSpans.getPayload()));
+ start = Math.max(0,firstSpans.start() - counter);
+ if (start > -1 ){
+ end = firstSpans.end();
+ //System.out.println(start+","+end);
+ cs = new CandidateSpan(start, end, firstSpans.doc(),
+ firstSpans.cost(),
+ calculatePayload(start,firstSpans.start())
+ );
+ candidateSpans.add(cs);
}
counter --;
}
@@ -188,13 +193,39 @@
start = firstSpans.start();
end = firstSpans.end() + counter;
//System.out.println(start+","+end);
- candidateSpans.add(new CandidateSpan(start, end, firstSpans.doc(),
- firstSpans.cost(), firstSpans.getPayload()));
+
+ cs = new CandidateSpan(start, end, firstSpans.doc(),
+ firstSpans.cost(),
+ calculatePayload(firstSpans.end(), end)
+ );
+ candidateSpans.add(cs);
counter++;
}
}
}
+ private ArrayList<byte[]> calculatePayload(int start, int end)
+ throws IOException{
+
+ ArrayList<byte[]> payload = new ArrayList<byte[]>();
+
+ if (firstSpans.isPayloadAvailable()){
+ payload.addAll(firstSpans.getPayload());
+ }
+ if (classNumber > 0 ){
+ System.out.println("Extension offsets "+start+","+end);
+ payload.add(calculateExtensionOffsets(start, end));
+ }
+ return payload;
+ }
+
+ private byte[] calculateExtensionOffsets(int start, int end) {
+ ByteBuffer buffer = ByteBuffer.allocate(9);
+ buffer.put(classNumber);
+ buffer.putInt(start);
+ buffer.putInt(end);
+ return buffer.array();
+ }
@Override
public boolean skipTo(int target) throws IOException {
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
index 3acfc8a..a4df2d6 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
@@ -65,22 +65,46 @@
}
/** Classnumber
- * Cannot check the correctness directly
+ * Cannot check the expansion offset correctness directly
* */
@Test
public void testCase2() {
byte classNumber = 1;
-
+ SpanExpansionQuery sq;
// create new payload for the expansion offsets
SpanTermQuery stq = new SpanTermQuery(new Term("tokens","s:Kaiser") );
- SpanExpansionQuery sq = new SpanExpansionQuery(stq, 0, 2, -1, classNumber, true);
+ sq = new SpanExpansionQuery(stq, 0, 2, -1, classNumber, true);
kr = ki.search(sq, (short) 10);
-
+
+ assertEquals(5, kr.getMatch(0).getStartPos()); // expansion 5,7
+ assertEquals(8, kr.getMatch(0).getEndPos());
+ assertEquals(6, kr.getMatch(1).getStartPos()); // expansion 6,9
+ assertEquals(8, kr.getMatch(1).getEndPos());
+ assertEquals(7, kr.getMatch(2).getStartPos()); // expansion 7,7
+ assertEquals(8, kr.getMatch(2).getEndPos());
+ assertEquals(13, kr.getMatch(3).getStartPos()); // expansion 13,15
+ assertEquals(16, kr.getMatch(3).getEndPos());
+
+ /*for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+ +km.getSnippetBrackets());
+ }*/
+
// add expansion offsets to the existing payload
SpanElementQuery seq = new SpanElementQuery("tokens", "cnx/c:np");
sq = new SpanExpansionQuery(seq, 1, 2, 0, classNumber, true);
kr = ki.search(sq, (short) 10);
+ assertEquals(1, kr.getMatch(0).getStartPos());
+ assertEquals(3, kr.getMatch(0).getEndPos());
+ assertEquals(1, kr.getMatch(1).getStartPos());
+ assertEquals(4, kr.getMatch(1).getEndPos());
+ assertEquals(6, kr.getMatch(2).getStartPos()); // expansion 8,9
+ assertEquals(9, kr.getMatch(2).getEndPos());
+ assertEquals(6, kr.getMatch(3).getStartPos()); // expansion 8,10
+ assertEquals(10, kr.getMatch(3).getEndPos());
+
+
/*for (KorapMatch km : kr.getMatches()){
System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+km.getSnippetBrackets());
@@ -88,40 +112,60 @@
}
- /** Expansion with exclusion
+ /** Right expansion with exclusion
* */
@Test
public void testCase3() throws IOException {
-
+ byte classNumber = 1;
SpanTermQuery stq = new SpanTermQuery(new Term("tokens","cnx/p:N") );
SpanTermQuery notQuery = new SpanTermQuery(new Term("tokens","s:September"));
- SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 2, 3, 0, true);
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 2, 3, 0,
+ classNumber, true);
kr = ki.search(seq, (short) 20);
+ assertEquals(13, kr.getMatch(11).getStartPos()); // expansion 14,17
+ assertEquals(17, kr.getMatch(11).getEndPos());
+ assertEquals(18, kr.getMatch(12).getStartPos()); // expansion 19,21
+ assertEquals(21, kr.getMatch(12).getEndPos());
+ assertEquals(18, kr.getMatch(13).getStartPos()); // expansion 19,22
+ assertEquals(22, kr.getMatch(13).getEndPos());
+ assertEquals(20, kr.getMatch(14).getStartPos()); // expansion 21,23
+ assertEquals(23, kr.getMatch(14).getEndPos());
+
/*for (KorapMatch km : kr.getMatches()){
System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+km.getSnippetBrackets());
}*/
}
- /** Expansion with exclusion
+ /** Left expansion with exclusion
* */
@Test
public void testCase4() throws IOException {
-
+ byte classNumber = 1;
SpanTermQuery stq = new SpanTermQuery(new Term("tokens","cnx/p:N") );
SpanTermQuery notQuery = new SpanTermQuery(new Term("tokens","cnx/p:A"));
- SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 0, 2, -1, true);
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 0, 2, -1,
+ classNumber, true);
kr = ki.search(seq, (short) 10);
- /*
+ assertEquals(7, kr.getMatch(3).getStartPos()); // expansion 7,7
+ assertEquals(8, kr.getMatch(3).getEndPos());
+ assertEquals(7, kr.getMatch(4).getStartPos()); // expansion 7,8
+ assertEquals(9, kr.getMatch(4).getEndPos());
+ assertEquals(8, kr.getMatch(5).getStartPos()); // expansion 8,8 // no expansion // no need???
+ assertEquals(9, kr.getMatch(5).getEndPos());
+ assertEquals(8, kr.getMatch(6).getStartPos()); // expansion 8,10
+ assertEquals(11, kr.getMatch(6).getEndPos());
+
+
for (KorapMatch km : kr.getMatches()){
System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+km.getSnippetBrackets());
}
- */
+
}
}