Added SpanExpansionQuery
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
new file mode 100644
index 0000000..33c5b5e
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
@@ -0,0 +1,90 @@
+package de.ids_mannheim.korap.query;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.spans.ExpandedSpans;
+
+/** Query to make a span longer by stretching out the start or
+ * the end position of the span.
+ *
+ * @author margaretha
+ * */
+public class SpanExpansionQuery extends SimpleSpanQuery{
+
+ int min, max;
+ boolean isBefore;
+
+ public SpanExpansionQuery(SpanQuery firstClause, int min, int max,
+ boolean isBefore, boolean collectPayloads) {
+ super(firstClause, collectPayloads);
+ this.min = min;
+ this.max = max;
+ this.isBefore = isBefore;
+ }
+
+ @Override
+ public SimpleSpanQuery clone() {
+ SpanExpansionQuery sq = new SpanExpansionQuery(
+ firstClause,
+ min,
+ max,
+ isBefore,
+ collectPayloads);
+ //sq.setBoost(sq.getBoost());
+ return sq;
+ }
+
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ return new ExpandedSpans(this, context, acceptDocs, termContexts);
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("spanExpansion(");
+ sb.append(firstClause.toString());
+ sb.append(",[");
+ sb.append(min);
+ sb.append(",");
+ sb.append(max);
+ sb.append("],");
+ if (isBefore)
+ sb.append("left)");
+ else sb.append("right)");
+ return sb.toString();
+ }
+
+ public int getMin() {
+ return min;
+ }
+
+ public void setMin(int min) {
+ this.min = min;
+ }
+
+ public int getMax() {
+ return max;
+ }
+
+ public void setMax(int max) {
+ this.max = max;
+ }
+
+ public boolean isBefore() {
+ return isBefore;
+ }
+
+ public void setBefore(boolean isBefore) {
+ this.isBefore = isBefore;
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
new file mode 100644
index 0000000..29c2d14
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
@@ -0,0 +1,114 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.SpanExpansionQuery;
+
+public class ExpandedSpans extends SimpleSpans{
+
+ private int min, max;
+ private boolean isBefore;
+ private List<CandidateSpan> candidateSpans;
+ private long matchCost;
+
+ public ExpandedSpans(SpanExpansionQuery spanExpansionQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(spanExpansionQuery, context, acceptDocs, termContexts);
+ this.min = spanExpansionQuery.getMin();
+ this.max = spanExpansionQuery.getMax();
+ this.isBefore = spanExpansionQuery.isBefore();
+
+ candidateSpans = new ArrayList<CandidateSpan>();
+ hasMoreSpans = true;
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ matchPayload.clear();
+ isStartEnumeration = false;
+ return advance();
+ }
+
+ private boolean advance() throws IOException {
+ while (hasMoreSpans || candidateSpans.size() > 0) {
+ if (candidateSpans.size() > 0 ){
+ setMatch(candidateSpans.get(0));
+ candidateSpans.remove(0);
+ return true;
+ }
+ else {
+ hasMoreSpans = firstSpans.next();
+ setCandidateList();
+ }
+ }
+ return false;
+ }
+
+ private void setCandidateList() throws IOException {
+ CandidateSpan cs;
+ int counter;
+ if (isBefore){
+ counter = max;
+ while (counter >= min ){
+ cs = new CandidateSpan(
+ firstSpans.start() - counter,
+ firstSpans.end(),
+ firstSpans.doc(),
+ firstSpans.cost(),
+ firstSpans.getPayload()
+ );
+ candidateSpans.add(cs);
+ counter--;
+ }
+ }
+ else{
+ counter = min;
+ while (counter <= max){
+ cs = new CandidateSpan(
+ firstSpans.start(),
+ firstSpans.end() + counter,
+ firstSpans.doc(),
+ firstSpans.cost(),
+ firstSpans.getPayload()
+ );
+ candidateSpans.add(cs);
+ counter++;
+ }
+ }
+ }
+
+ private void setMatch(CandidateSpan candidateSpan) {
+ matchDocNumber = candidateSpan.getDoc();
+ matchStartPosition = candidateSpan.getStart();
+ matchEndPosition = candidateSpan.getEnd();
+ matchPayload = candidateSpan.getPayloads();
+ matchCost = candidateSpan.getCost();
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)){
+ if (!firstSpans.skipTo(target)){
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ matchPayload.clear();
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return matchCost;
+ }
+
+}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
new file mode 100644
index 0000000..d81878b
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
@@ -0,0 +1,60 @@
+package de.ids_mannheim.korap.index;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.junit.Test;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapMatch;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanExpansionQuery;
+
+public class TestSpanExpansionIndex {
+
+ KorapResult kr;
+ KorapIndex ki;
+
+ @Test
+ public void testCase1() throws IOException {
+ ki = new KorapIndex();
+ for (String i : new String[] {"AAA-12402"}) {
+ ki.addDocFile(
+ getClass().getResource("/wiki/" + i + ".json.gz").getFile(),true);
+ };
+ ki.commit();
+
+ SpanTermQuery stq = new SpanTermQuery(new Term("tokens","s:Kaiser") );
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, true, true);
+ kr = ki.search(seq, (short) 10);
+
+ assertEquals(72,kr.getTotalResults());
+ assertEquals(5, kr.getMatch(0).getStartPos());
+ assertEquals(8, kr.getMatch(0).getEndPos());
+ assertEquals(6, kr.getMatch(1).getStartPos());
+ assertEquals(8, kr.getMatch(1).getEndPos());
+ assertEquals(7, kr.getMatch(2).getStartPos());
+ assertEquals(8, kr.getMatch(2).getEndPos());
+
+ seq = new SpanExpansionQuery(stq, 3, 4, false, true);
+ kr = ki.search(seq, (short) 10);
+
+ assertEquals(7, kr.getMatch(0).getStartPos());
+ assertEquals(11, kr.getMatch(0).getEndPos());
+ assertEquals(7, kr.getMatch(1).getStartPos());
+ assertEquals(12, kr.getMatch(1).getEndPos());
+ assertEquals(15, kr.getMatch(2).getStartPos());
+ assertEquals(19, kr.getMatch(2).getEndPos());
+ assertEquals(15, kr.getMatch(3).getStartPos());
+ assertEquals(20, kr.getMatch(3).getEndPos());
+
+// for (KorapMatch km : kr.getMatches()){
+// System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+// +km.getSnippetBrackets());
+// }
+
+ }
+}