Added class and extension offsets to payload in SpanExpansionQuery
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
index 33c5b5e..cab5683 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
@@ -19,7 +19,8 @@
* */
public class SpanExpansionQuery extends SimpleSpanQuery{
- int min, max;
+ int min, max;
+ byte classNumber;
boolean isBefore;
public SpanExpansionQuery(SpanQuery firstClause, int min, int max,
@@ -29,6 +30,12 @@
this.max = max;
this.isBefore = isBefore;
}
+
+ public SpanExpansionQuery(SpanQuery firstClause, int min, int max,
+ byte classNumber, boolean isBefore, boolean collectPayloads) {
+ this(firstClause, min,max,isBefore,collectPayloads);
+ this.classNumber = classNumber;
+ }
@Override
public SimpleSpanQuery clone() {
@@ -87,4 +94,12 @@
public void setBefore(boolean isBefore) {
this.isBefore = isBefore;
}
+
+ public byte getClassNumber() {
+ return classNumber;
+ }
+
+ public void setClassNumber(byte classNumber) {
+ this.classNumber = classNumber;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
index a15d80d..7274826 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
@@ -24,8 +24,6 @@
*/
public class SpanNextQuery extends SimpleSpanQuery implements Cloneable {
- private String spanName;
-
// Constructor
public SpanNextQuery(SpanQuery firstClause, SpanQuery secondClause) {
this(firstClause, secondClause, true);
@@ -35,10 +33,14 @@
public SpanNextQuery(SpanQuery firstClause, SpanQuery secondClause,
boolean collectPayloads) {
super(firstClause, secondClause, collectPayloads);
- this.spanName = "spanNext";
};
-
+ public SpanNextQuery(SpanQuery firstClause, SpanQuery secondClause,
+ boolean isFirstNegated, boolean collectPayloads) {
+ super(firstClause, secondClause, collectPayloads);
+ }
+
+
@Override
public Spans getSpans (final AtomicReaderContext context, Bits acceptDocs,
Map<Term,TermContext> termContexts) throws IOException {
@@ -59,8 +61,7 @@
@Override
public String toString(String field) {
StringBuilder sb = new StringBuilder();
- sb.append(this.spanName);
- sb.append("(");
+ sb.append("spanNext(");
sb.append(firstClause.toString(field));
sb.append(", ");
sb.append(secondClause.toString(field));
diff --git a/src/main/java/de/ids_mannheim/korap/query/SubspanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SubspanQuery.java
index b01a37d..4af5e69 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SubspanQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SubspanQuery.java
@@ -51,6 +51,8 @@
public String toString(String field) {
StringBuilder sb = new StringBuilder();
sb.append("subspan(");
+ sb.append(this.firstClause.toString());
+ sb.append(",");
sb.append(this.startOffset);
sb.append(",");
sb.append(this.length);
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
index 29c2d14..2f8ebed 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.query.spans;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -12,9 +13,13 @@
import de.ids_mannheim.korap.query.SpanExpansionQuery;
+/**
+ * @author margaretha
+ * */
public class ExpandedSpans extends SimpleSpans{
private int min, max;
+ private byte classNumber;
private boolean isBefore;
private List<CandidateSpan> candidateSpans;
private long matchCost;
@@ -25,6 +30,7 @@
super(spanExpansionQuery, context, acceptDocs, termContexts);
this.min = spanExpansionQuery.getMin();
this.max = spanExpansionQuery.getMax();
+ this.classNumber = spanExpansionQuery.getClassNumber();
this.isBefore = spanExpansionQuery.isBefore();
candidateSpans = new ArrayList<CandidateSpan>();
@@ -42,7 +48,7 @@
while (hasMoreSpans || candidateSpans.size() > 0) {
if (candidateSpans.size() > 0 ){
setMatch(candidateSpans.get(0));
- candidateSpans.remove(0);
+ candidateSpans.remove(0);
return true;
}
else {
@@ -55,17 +61,20 @@
private void setCandidateList() throws IOException {
CandidateSpan cs;
- int counter;
+ int counter, start, end;
+
if (isBefore){
counter = max;
while (counter >= min ){
+ start = Math.max(0,firstSpans.start() - counter);
cs = new CandidateSpan(
- firstSpans.start() - counter,
+ start,
firstSpans.end(),
firstSpans.doc(),
firstSpans.cost(),
- firstSpans.getPayload()
+ calculatePayload(start, firstSpans.start())
);
+
candidateSpans.add(cs);
counter--;
}
@@ -73,18 +82,44 @@
else{
counter = min;
while (counter <= max){
+ // TODO: How do I know if the end is already too far (over the end of the doc)?
+ end = firstSpans.end() + counter;
cs = new CandidateSpan(
firstSpans.start(),
- firstSpans.end() + counter,
+ end,
firstSpans.doc(),
firstSpans.cost(),
- firstSpans.getPayload()
+ calculatePayload(firstSpans.end(), end)
);
candidateSpans.add(cs);
counter++;
}
}
}
+
+ private ArrayList<byte[]> calculatePayload(int start, int end)
+ throws IOException{
+
+ ArrayList<byte[]> payload = new ArrayList<byte[]>();
+
+ if (classNumber > 0 ){
+ if (firstSpans.isPayloadAvailable()){
+ payload.addAll(firstSpans.getPayload());
+ }
+
+ //System.out.println("Extension offsets "+start+","+end);
+ payload.add(calculateExtensionOffsets(start, end));
+ }
+ return payload;
+ }
+
+ private byte[] calculateExtensionOffsets(int start, int end) {
+ ByteBuffer buffer = ByteBuffer.allocate(9);
+ buffer.put(classNumber);
+ buffer.putInt(start);
+ buffer.putInt(end);
+ return buffer.array();
+ }
private void setMatch(CandidateSpan candidateSpan) {
matchDocNumber = candidateSpan.getDoc();
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
index 9651cba..4936eaa 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
@@ -20,7 +20,7 @@
*
* Update: allow multiple matches at the same firstspan position
*
- * @author margaretha
+ * @author margaretha, diewald
* */
public class NextSpans extends SimpleSpans {
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
index d81878b..4c1704e 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
@@ -11,6 +11,7 @@
import de.ids_mannheim.korap.KorapIndex;
import de.ids_mannheim.korap.KorapMatch;
import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanExpansionQuery;
public class TestSpanExpansionIndex {
@@ -18,14 +19,19 @@
KorapResult kr;
KorapIndex ki;
- @Test
- public void testCase1() throws IOException {
- ki = new KorapIndex();
+ public TestSpanExpansionIndex() throws IOException {
+ ki = new KorapIndex();
for (String i : new String[] {"AAA-12402"}) {
ki.addDocFile(
getClass().getResource("/wiki/" + i + ".json.gz").getFile(),true);
};
ki.commit();
+ }
+
+ /** Left and right expansions
+ * */
+ @Test
+ public void testCase1() throws IOException {
SpanTermQuery stq = new SpanTermQuery(new Term("tokens","s:Kaiser") );
SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, true, true);
@@ -57,4 +63,28 @@
// }
}
+
+ /** Classnumber
+ * Cannot check the correctness directly
+ * */
+ @Test
+ public void testCase2() {
+ byte classNumber = 1;
+
+ // create new payload for the expansion offsets
+ SpanTermQuery stq = new SpanTermQuery(new Term("tokens","s:Kaiser") );
+ SpanExpansionQuery sq = new SpanExpansionQuery(stq, 0, 2, classNumber, true, true);
+ kr = ki.search(sq, (short) 10);
+
+ // add expansion offsets to the existing payload
+ SpanElementQuery seq = new SpanElementQuery("tokens", "cnx/c:np");
+ sq = new SpanExpansionQuery(seq, 1, 2, classNumber, false, true);
+ kr = ki.search(sq, (short) 10);
+
+ /*for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+ +km.getSnippetBrackets());
+ }*/
+ }
+
}