Added SubspanQuery
diff --git a/src/main/java/de/ids_mannheim/korap/query/SubspanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SubspanQuery.java
new file mode 100644
index 0000000..b01a37d
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SubspanQuery.java
@@ -0,0 +1,76 @@
+package de.ids_mannheim.korap.query;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.spans.SubSpans;
+
+/** This query extracts a subspan from a span. The subspan starts from
+ * a startOffset until startOffset + length. A positive startOffset
+ * is counted from the start of the span, while a negative startOffset
+ * is counted from the end of the span.
+ *
+ * @author margaretha
+ * */
+public class SubspanQuery extends SimpleSpanQuery{
+
+ private int startOffset, length;
+
+ public SubspanQuery(SpanQuery firstClause, int startOffset, int length,
+ boolean collectPayloads) {
+ super(firstClause, collectPayloads);
+ this.startOffset = startOffset;
+ this.length = length;
+ }
+
+ @Override
+ public SimpleSpanQuery clone() {
+ SubspanQuery sq = new SubspanQuery(
+ this.getFirstClause(),
+ this.startOffset,
+ this.length,
+ this.collectPayloads);
+ sq.setBoost(this.getBoost());
+ return sq;
+ }
+
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ return new SubSpans(this, context, acceptDocs, termContexts);
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("subspan(");
+ sb.append(this.startOffset);
+ sb.append(",");
+ sb.append(this.length);
+ sb.append(")");
+ return sb.toString();
+ }
+
+ public int getStartOffset() {
+ return startOffset;
+ }
+
+ public void setStartOffset(int startOffset) {
+ this.startOffset = startOffset;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public void setLength(int length) {
+ this.length = length;
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
new file mode 100644
index 0000000..2fe6b2b
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
@@ -0,0 +1,69 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.SubspanQuery;
+
+public class SubSpans extends SimpleSpans{
+
+ private int startOffset, length;
+
+ public SubSpans(SubspanQuery subspanQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(subspanQuery, context, acceptDocs, termContexts);
+ this.startOffset= subspanQuery.getStartOffset();
+ this.length = subspanQuery.getLength();
+ hasMoreSpans = firstSpans.next();
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ matchPayload.clear();
+ isStartEnumeration=false;
+ return advance();
+ }
+
+ private boolean advance() throws IOException {
+ while (hasMoreSpans){
+ setMatch();
+ hasMoreSpans = firstSpans.next();
+ return true;
+ }
+ return false;
+ }
+
+ public void setMatch() throws IOException {
+ if (this.startOffset < 0)
+ matchStartPosition = firstSpans.end() + startOffset;
+ else matchStartPosition = firstSpans.start() + startOffset;
+
+ matchEndPosition = matchStartPosition + this.length;
+ matchPayload = firstSpans.getPayload();
+ matchDocNumber = firstSpans.doc();
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)){
+ if (!firstSpans.skipTo(target)){
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ matchPayload.clear();
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return firstSpans.cost() + 1;
+ }
+
+}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
new file mode 100644
index 0000000..ae2cef4
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
@@ -0,0 +1,61 @@
+package de.ids_mannheim.korap.index;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.junit.Test;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.DistanceConstraint;
+import de.ids_mannheim.korap.query.SpanDistanceQuery;
+import de.ids_mannheim.korap.query.SubspanQuery;
+
+
+public class TestSubSpanIndex {
+
+ KorapResult kr;
+ KorapIndex ki;
+
+ @Test
+ public void testCase1() throws IOException {
+ ki = new KorapIndex();
+ for (String i : new String[] {"AAA-12402"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(), true
+ );
+ };
+ ki.commit();
+
+ SpanDistanceQuery sdq = new SpanDistanceQuery(
+ new SpanTermQuery(new Term("tokens","cnx/p:N")),
+ new SpanTermQuery(new Term("tokens","cnx/p:V")),
+ new DistanceConstraint(5, 5, true, false),
+ true);
+
+ SubspanQuery ssq = new SubspanQuery(sdq, 0, 2, true);
+ kr = ki.search(ssq, (short) 10);
+
+ assertEquals(27,kr.getTotalResults());
+ assertEquals(30, kr.getMatch(0).getStartPos());
+ assertEquals(32, kr.getMatch(0).getEndPos());
+ assertEquals(81, kr.getMatch(1).getStartPos());
+ assertEquals(83, kr.getMatch(1).getEndPos());
+
+ /*for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getStartPos() +","+km.getEndPos()
+ +km.getSnippetBrackets());
+ }*/
+
+ ssq = new SubspanQuery(sdq, -2, 2, true);
+ kr = ki.search(ssq, (short) 10);
+
+ assertEquals(34, kr.getMatch(0).getStartPos());
+ assertEquals(35, kr.getMatch(0).getEndPos());
+ assertEquals(85, kr.getMatch(1).getStartPos());
+ assertEquals(87, kr.getMatch(1).getEndPos());
+ }
+}