SpanSegmentQuery. Abstract classes for SpanSegmentQuery and SpanNextQuery.
diff --git a/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
new file mode 100644
index 0000000..f0d8589
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
@@ -0,0 +1,90 @@
+package de.ids_mannheim.korap.query;
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.util.ToStringUtils;
+
+/** An abstract class for Spanquery having two clauses.
+ *
+ * @author margaretha
+ * */
+public abstract class SimpleSpanQuery extends SpanQuery implements Cloneable{
+
+ private SpanQuery firstClause, secondClause;
+ private String field;
+ private String spanName;
+
+ public SimpleSpanQuery(SpanQuery firstClause, SpanQuery secondClause, String spanName) {
+ this.field = secondClause.getField();
+ if (!firstClause.getField().equals(field)){
+ throw new IllegalArgumentException("Clauses must have the same field.");
+ }
+ this.setFirstClause(firstClause);
+ this.setSecondClause(secondClause);
+ this.spanName=spanName;
+ }
+
+ @Override
+ public String getField() {
+ return field;
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(this.spanName);
+ sb.append("(");
+ sb.append(firstClause.toString(field));
+ sb.append(", ");
+ sb.append(secondClause.toString(field));
+ sb.append(")");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
+ }
+
+ public SpanQuery getFirstClause() {
+ return firstClause;
+ }
+
+ public void setFirstClause(SpanQuery firstClause) {
+ this.firstClause = firstClause;
+ }
+
+ public SpanQuery getSecondClause() {
+ return secondClause;
+ }
+
+ public void setSecondClause(SpanQuery secondClause) {
+ this.secondClause = secondClause;
+ }
+
+ // For rewriting fuzzy searches like wildcard and regex
+
+ @Override
+ public void extractTerms(Set<Term> terms) {
+ firstClause.extractTerms(terms);
+ secondClause.extractTerms(terms);
+ };
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ SimpleSpanQuery clone = clone();
+ SpanQuery query = (SpanQuery) firstClause.rewrite(reader);
+ if (!query.equals(firstClause)) {
+ clone.firstClause = query;
+ }
+ query = (SpanQuery) secondClause.rewrite(reader);
+ if (!query.equals(secondClause)) {
+ clone.secondClause = query;
+ }
+ return (clone != null ? clone : this );
+ }
+
+ public abstract SimpleSpanQuery clone();
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
index 0fd90e8..32a90d9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
@@ -30,99 +30,35 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-/** Matches spans which are directly next to each other.
+/** Matches spans which are directly next to each other.
+ * This is identical to a phrase query with exactly two clauses.
*/
-public class SpanNextQuery extends SpanQuery implements Cloneable {
- private SpanQuery firstClause, secondClause;
- public String field;
+public class SpanNextQuery extends SimpleSpanQuery implements Cloneable {
+ private SpanQuery firstClause;
+ private SpanQuery secondClause;
private boolean collectPayloads;
- // Logger
- private final static Logger log = LoggerFactory.getLogger(SpanNextQuery.class);
-
// Constructor
public SpanNextQuery(SpanQuery firstClause, SpanQuery secondClause) {
- this(firstClause, secondClause, true);
+ this(firstClause, secondClause, true);
};
// Constructor
- public SpanNextQuery(SpanQuery firstClause,
- SpanQuery secondClause,
- boolean collectPayloads) {
-
- this.field = secondClause.getField();
- if (!firstClause.getField().equals(field)) {
- throw new IllegalArgumentException("Clauses must have same field");
- };
-
- this.collectPayloads = collectPayloads;
- this.firstClause = firstClause;
- this.secondClause = secondClause;
+ public SpanNextQuery(SpanQuery firstClause, SpanQuery secondClause,
+ boolean collectPayloads) {
+ super(firstClause, secondClause, "spanNext");
+ this.collectPayloads = collectPayloads;
+ this.firstClause = firstClause;
+ this.secondClause = secondClause;
};
@Override
- public String getField() { return field; }
-
- public SpanQuery firstClause() { return firstClause; };
-
- public SpanQuery secondClause() { return secondClause; };
-
- @Override
- public void extractTerms(Set<Term> terms) {
- firstClause.extractTerms(terms);
- secondClause.extractTerms(terms);
+ public Spans getSpans (final AtomicReaderContext context, Bits acceptDocs,
+ Map<Term,TermContext> termContexts) throws IOException {
+ return (Spans) new NextSpans (this, context, acceptDocs,
+ termContexts, collectPayloads);
};
-
-
- @Override
- public String toString(String field) {
- StringBuilder sb = new StringBuilder();
- sb.append("spanNext(")
- .append(firstClause.toString(field))
- .append(", ")
- .append(secondClause.toString(field))
- .append(")")
- .append(ToStringUtils.boost(getBoost()));
- return sb.toString();
- };
-
- @Override
- public Spans getSpans (final AtomicReaderContext context,
- Bits acceptDocs,
- Map<Term,TermContext> termContexts) throws IOException {
-
- log.trace("Get Spans");
- return (Spans) new NextSpans (
- this, context, acceptDocs, termContexts, collectPayloads
- );
- };
-
- @Override
- public Query rewrite (IndexReader reader) throws IOException {
- SpanNextQuery clone = null;
-
- SpanQuery query = (SpanQuery) firstClause.rewrite(reader);
-
- if (query != firstClause) {
- if (clone == null)
- clone = this.clone();
- clone.firstClause = query;
- };
-
- query = (SpanQuery) secondClause.rewrite(reader);
- if (query != secondClause) {
- if (clone == null)
- clone = this.clone();
- clone.secondClause = query;
- };
-
- if (clone != null)
- return clone;
-
- return this;
- };
-
@Override
public SpanNextQuery clone() {
@@ -145,7 +81,7 @@
final SpanNextQuery spanNextQuery = (SpanNextQuery) o;
if (collectPayloads != spanNextQuery.collectPayloads) return false;
- if (!firstClause.equals(spanNextQuery.firstClause)) return false;
+ if (!firstClause.equals(spanNextQuery.firstClause)) return false;
if (!secondClause.equals(spanNextQuery.secondClause)) return false;
return getBoost() == spanNextQuery.getBoost();
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
new file mode 100644
index 0000000..3e52cbf
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
@@ -0,0 +1,77 @@
+package de.ids_mannheim.korap.query;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.spans.SegmentSpans;
+
+public class SpanSegmentQuery extends SimpleSpanQuery{
+
+ private boolean collectPayloads;
+ private SpanQuery firstClause, secondClause;
+
+ public SpanSegmentQuery(SpanQuery firstClause, SpanQuery secondClause) {
+ this(firstClause,secondClause,true);
+ }
+
+ public SpanSegmentQuery(SpanQuery firstClause, SpanQuery secondClause,
+ boolean collectPayloads) {
+ super(firstClause,secondClause,"spanSegment");
+ this.collectPayloads = collectPayloads;
+ this.firstClause=firstClause;
+ this.secondClause=secondClause;
+ }
+
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ return (Spans) new SegmentSpans(this, context, acceptDocs,
+ termContexts, collectPayloads);
+ }
+
+ @Override
+ public SimpleSpanQuery clone() {
+ SpanSegmentQuery spanSegmentQuery = new SpanSegmentQuery(
+ (SpanQuery) firstClause.clone(),
+ (SpanQuery) secondClause.clone(),
+ this.collectPayloads
+ );
+ spanSegmentQuery.setBoost(getBoost());
+ return spanSegmentQuery;
+ }
+
+ /* TODO: Where is the hashmap?
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof SpanNextQuery)) return false;
+
+ final SpanNextQuery spanNextQuery = (SpanNextQuery) o;
+
+ if (collectPayloads != spanNextQuery.collectPayloads) return false;
+ if (!firstClause.equals(spanNextQuery.firstClause)) return false;
+ if (!secondClause.equals(spanNextQuery.secondClause)) return false;
+
+ return getBoost() == spanNextQuery.getBoost();
+ };
+
+
+ // I don't know what I am doing here
+ @Override
+ public int hashCode() {
+ int result;
+ result = firstClause.hashCode() + secondClause.hashCode();
+ result ^= (result << 31) | (result >>> 2); // reversible
+ result += Float.floatToRawIntBits(getBoost());
+ return result;
+ };
+ */
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
index b6080fe..e594c7f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
@@ -1,408 +1,52 @@
package de.ids_mannheim.korap.query.spans;
-/* Inspired by NearSpansOrdered
- *
- * REIMPLEMENTATION
- *
- */
+import java.io.IOException;
+
+import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.search.spans.SpanQuery;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Collection;
-import java.util.Map;
-import java.util.Set;
-
-import de.ids_mannheim.korap.query.SpanNextQuery;
-
-// Todo: Disable the option to discard payloads
-
-import java.util.*;
-import java.io.*;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/** From Spans.java:
- * Expert: an enumeration of span matches. Used to implement span searching.
- * Each span represents a range of term positions within a document. Matches
- * are enumerated in order, by increasing document number, within that by
- * increasing start position and finally by increasing end position. */
-public class NextSpans extends Spans {
- private boolean firstTime = true;
- private boolean more = false;
-
- // Initialize as invalid
- private int matchDoc = -1;
- private int matchStart = -1;
- private int matchEnd = -1;
-
- /** Indicates that all both spans have the same doc() */
- private boolean inSameDoc = false;
-
- // First span
- private final Spans firstSpans;
- private final Spans firstSpansByDoc;
-
- // Second span
- private final Spans secondSpans;
- private final Spans secondSpansByDoc;
-
- private SpanNextQuery query;
-
- private List<byte[]> matchPayload;
- private boolean collectPayloads = true;
-
- private final static Logger log = LoggerFactory.getLogger(NextSpans.class);
-
- // Constructor
- public NextSpans (SpanNextQuery spanNextQuery,
- AtomicReaderContext context,
- Bits acceptDocs,
- Map<Term,TermContext> termContexts) throws IOException {
- this(spanNextQuery, context, acceptDocs, termContexts, true);
- };
-
- // Constructor
- public NextSpans (SpanNextQuery spanNextQuery,
- AtomicReaderContext context,
- Bits acceptDocs,
- Map<Term,TermContext> termContexts,
- boolean collectPayloads) throws IOException {
-
- log.trace("Init NextSpans");
-
- // this.collectPayloads = collectPayloads;
-
- // Init copies
- matchPayload = new LinkedList<byte[]>();
-
- firstSpans = spanNextQuery.firstClause().getSpans(
- context, acceptDocs, termContexts
- );
- firstSpansByDoc = firstSpans; // used in toSameDoc()
-
- secondSpans = spanNextQuery.secondClause().getSpans(
- context, acceptDocs, termContexts
- );
- secondSpansByDoc = secondSpans; // used in toSameDoc()
-
- /*
- if (DEBUG) {
- System.err.println("***");
- while (subSpans[i].next()) {
- StringBuffer payloadString = new StringBuffer();
- int docid = subSpans[i].doc();
- System.err.println("Span: "+i+" Doc: " + docid + " with " + subSpans[i].start() + "-" + subSpans[i].end() + " || " + payloadString.toString());
- };
- };
- */
- query = spanNextQuery; // kept for toString() only.
- };
- /** Move to the next match, returning true iff any such exists. */
- @Override
- public boolean next () throws IOException {
- log.trace("Next with doc {}", matchDoc);
+import de.ids_mannheim.korap.query.SimpleSpanQuery;
- // Check for init next
- if (firstTime) {
- log.trace("First retrieval of NextSpans");
- firstTime = false;
- if (!firstSpans.next() || !secondSpans.next()) {
- log.trace("No next in firstSpan nor in secondSpan");
- more = false;
- return false;
- };
- log.trace("Spans are initialized");
- more = true;
- };
-
- // if (collectPayloads)
- matchPayload.clear();
-
- return advance();
- };
-
-
- /** Skips to the first match beyond the current, whose document number is
- * greater than or equal to <i>target</i>. <p>Returns true iff there is such
- * a match. <p>Behaves as if written: <pre class="prettyprint">
- * boolean skipTo(int target) {
- * do {
- * if (!next())
- * return false;
- * } while (target > doc());
- * return true;
- * }
- * </pre>
- * Most implementations are considerably more efficient than that.
- */
- public boolean skipTo (int target) throws IOException {
- log.trace("skipTo {}", target);
-
- // Check for init next
- if (firstTime) {
- firstTime = false;
- if (!firstSpans.next() && !secondSpans.next()) {
- more = false;
- return false;
- };
- more = true;
- }
-
- // There are more spans, but the doc has to be skipped to target
- // Warning: This only skips firstSpans!
- // Maybe that's wrong ...
- else if (more && (firstSpans.doc() < target)) {
- if (firstSpans.skipTo(target)) {
- inSameDoc = false;
- }
-
- else {
- more = false;
- return false;
- };
- };
-
- // if (collectPayloads)
- matchPayload.clear();
-
- return advance();
- };
-
-
- /** Advance the subSpans to the same document */
- private boolean toSameDoc() throws IOException {
- log.trace("toSameDoc");
-
- if (firstSpansByDoc.doc() < secondSpansByDoc.doc()) {
- if (!firstSpansByDoc.skipTo(secondSpansByDoc.doc())) {
- more = false;
- inSameDoc = false;
- return false;
- };
- }
- else if (firstSpansByDoc.doc() > secondSpansByDoc.doc()) {
- if (!secondSpansByDoc.skipTo( firstSpansByDoc.doc() )) {
- more = false;
- inSameDoc = false;
- return false;
- };
- };
- inSameDoc = true;
- return true;
- };
-
-
- /** Advances the subSpans to just after an ordered match with a minimum slop
- * that is smaller than the slop allowed by the SpanNearQuery.
- * @return true iff there is such a match.
- */
- private boolean advance() throws IOException {
- log.trace("advance");
- boolean match = false;
-
- // There are more spans, and both spans are either in the
- // same doc or can be forwarded to the same doc.
- while (more && (inSameDoc || toSameDoc())) {
-
- log.trace("More spans in the same Doc: {}", firstSpansByDoc.doc());
-
- /* spans are in the same doc and in the correct order next to each other */
- if (match()) {
-
- // start and end position of last span
- matchStart = firstSpans.start();
- matchEnd = secondSpans.end();
-
- log.trace("Matching: {}-{}", matchStart, matchEnd);
-
- log.trace("Check for payloads");
-
-
- // if (collectPayloads) {
- log.trace("copy payloads");
-
- if (firstSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = firstSpans.getPayload();
- log.trace("Found {} payloads in firstSpans", payload.size());
- matchPayload.addAll(payload);
- };
- if (secondSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = secondSpans.getPayload();
- log.trace("Found {} payloads in secondSpans", payload.size());
- matchPayload.addAll(payload);
- };
- // };
-
- log.trace("=> MATCH");
- match = true;
- break;
- };
- };
-
- log.trace("Forward secondSpans");
- if (!secondSpans.next()) {
- log.trace("No more secondSpans");
- more = false;
- };
- inSameDoc = false;
- return match;
- };
-
-
- /** Returns the document number of the current match. Initially invalid. */
- @Override
- public int doc () {
- return matchDoc;
- };
-
- /** Returns the start position of the current match. Initially invalid. */
- @Override
- public int start () {
- return matchStart;
- };
-
- /** Returns the end position of the current match. Initially invalid. */
- @Override
- public int end () {
- return matchEnd;
- };
-
- /**
- * Returns the payload data for the current span.
- * This is invalid until {@link #next()} is called for
- * the first time.
- * This method must not be called more than once after each call
- * of {@link #next()}. However, most payloads are loaded lazily,
- * so if the payload data for the current position is not needed,
- * this method may not be called at all for performance reasons. An ordered
- * SpanQuery does not lazy load, so if you have payloads in your index and
- * you do not want ordered SpanNearQuerys to collect payloads, you can
- * disable collection with a constructor option.<br>
- * <br>
- * Note that the return type is a collection, thus the ordering should not be relied upon.
- * <br/>
- * @lucene.experimental
- *
- * @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false
- * @throws IOException if there is a low-level I/O error
- */
- // public abstract Collection<byte[]> getPayload() throws IOException;
- @Override
- public Collection<byte[]> getPayload() throws IOException {
- log.trace("Payload is requested with payload count {}", matchPayload.size());
- return matchPayload;
- };
+/** NextSpans is an enumeration of Span matches, which ensures that
+ * a span is immediately followed by another span.
+ *
+ * @author margaretha
+ * */
+public class NextSpans extends SimpleSpans {
+
+ public NextSpans (SimpleSpanQuery simpleSpanQuery,
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts) throws IOException {
+ this(simpleSpanQuery, context, acceptDocs, termContexts, true);
+ }
-
- /**
- * Checks if a payload can be loaded at this position.
- * <p/>
- * Payloads can only be loaded once per call to
- * {@link #next()}.
- *
- * @return true if there is a payload available at this position that can be loaded
- */
- @Override
- public boolean isPayloadAvailable() {
- log.trace("Check for payload emptyness: {}", matchPayload.isEmpty());
-
- return matchPayload.isEmpty() == false;
- };
-
-
- // Todo: This may be in the wrong version
- @Override
- public long cost() {
- return Math.min(firstSpans.cost(), secondSpans.cost());
- };
-
-
- @Override
- public String toString() {
- return getClass().getName() + "("+query.toString()+")@"+
- (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
- };
-
-
- public boolean match () throws IOException {
- matchDoc = firstSpans.doc();
- log.trace("Check for next match");
-
- byte check;
- while (inSameDoc && ((check = docNext(firstSpans, secondSpans)) != (byte) 0)) {
-
- log.trace("There's no match");
-
- if ((check == (byte) -1) && !secondSpans.next()) {
- log.trace("No more secondSpans");
- inSameDoc = false;
- more = false;
- break;
- }
- else if (check == (byte) 1 && !firstSpans.next()) {
- log.trace("No more firstSpans");
- inSameDoc = false;
- more = false;
- break;
- }
- else if (matchDoc != secondSpans.doc()) {
- log.trace("secondSpans has another doc");
- inSameDoc = false;
- break;
- };
- };
- return inSameDoc;
- };
-
-
- /** Check whether two Spans in the same document are ordered.
- * @return true iff spans1 starts before spans2
- * or the spans start at the same position,
- * and spans1 ends before spans2.
- */
- static final byte docNext (Spans spans1, Spans spans2) {
- // check does
- int start1 = spans1.start();
- int start2 = spans2.start();
-
- // boolean val = (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
- byte val;
- if (start1 >= start2) {
- val = (byte) -1;
+ public NextSpans (SimpleSpanQuery simpleSpanQuery,
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts,
+ boolean collectPayloads) throws IOException {
+ super(simpleSpanQuery, context, acceptDocs, termContexts,collectPayloads);
}
- else {
- int end1 = spans1.end();
- if (end1 == start2) {
- val = (byte) 0;
- }
- else if (end1 > start2) {
- val = (byte) -1;
- }
- else {
- val = (byte) 1;
- };
- }
- // -1: forward secondSpans
- // 1: forward firstSpans
- log.trace("{}-{} next to {}-{}", start1, spans1.end(), start2, spans2.end());
- log.trace("docSpansOrdered: {}", val);
-
- return val;
- };
-};
+ /** Check weather the end position of the current firstspan equals
+ * the start position of the secondspan.
+ * */
+ protected int findMatch() {
+ if (firstSpans.end() == secondSpans.start()) {
+ matchDocNumber = firstSpans.doc();
+ matchStartPosition = firstSpans.start();
+ matchEndPosition = secondSpans.end();
+ return 0;
+ }
+ else if (firstSpans.end() > secondSpans.start())
+ return 1;
+
+ return -1;
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
new file mode 100644
index 0000000..a401b0f
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
@@ -0,0 +1,52 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.SimpleSpanQuery;
+
+/** SegmentSpans is an enumeration of Span matches, which ensures that two spans:
+ * a firstspan and a secondspan have exactly the same start and end positions.
+ *
+ * @author margaretha
+ * */
+public class SegmentSpans extends SimpleSpans {
+
+ public SegmentSpans (SimpleSpanQuery simpleSpanQuery,
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts) throws IOException {
+ this(simpleSpanQuery, context, acceptDocs, termContexts, true);
+ }
+
+ public SegmentSpans (SimpleSpanQuery simpleSpanQuery,
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts,
+ boolean collectPayloads) throws IOException {
+ super(simpleSpanQuery, context, acceptDocs, termContexts,collectPayloads);
+ }
+
+ /** Check weather the start and end positions of the current
+ * firstspan and secondspan are identical.
+ * */
+ protected int findMatch() {
+
+ if (firstSpans.start() == secondSpans.start() &&
+ firstSpans.end() == secondSpans.end() ){
+ matchDocNumber = firstSpans.doc();
+ matchStartPosition = firstSpans.start();
+ matchEndPosition = firstSpans.end();
+ return 0;
+ }
+ else if (firstSpans.start() < secondSpans.start())
+ return -1;
+
+ return 1;
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
new file mode 100644
index 0000000..4169729
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -0,0 +1,198 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import de.ids_mannheim.korap.query.SimpleSpanQuery;
+
+/** An abstract class for Span enumeration whose two child spans are matched by
+ * their positions and do not have a partial overlap.
+ *
+ * @author margaretha
+ *
+ * */
+public abstract class SimpleSpans extends Spans{
+ private boolean isStartEnumeration;
+ private boolean hasMoreSpans;
+ protected int matchDocNumber, matchStartPosition, matchEndPosition;
+ private List<byte[]> matchPayload;
+ private boolean collectPayloads;
+
+ // Warning: enumeration of Spans
+ protected Spans firstSpans, secondSpans;
+ private SimpleSpanQuery query;
+
+ private Logger log = LoggerFactory.getLogger(SimpleSpans.class);
+
+ public SimpleSpans (SimpleSpanQuery simpleSpanQuery,
+ AtomicReaderContext context,
+ Bits acceptDocs,
+ Map<Term,TermContext> termContexts,
+ boolean collectPayloads) throws IOException {
+
+ // Initialize as invalid
+ matchDocNumber= -1;
+ matchStartPosition= -1;
+ matchEndPosition= -1;
+
+ this.collectPayloads = collectPayloads;
+ if (collectPayloads)
+ matchPayload = new LinkedList<byte[]>();
+
+ // Get the enumeration of the two spans to match
+ firstSpans = simpleSpanQuery.getFirstClause().
+ getSpans(context, acceptDocs, termContexts);
+ secondSpans = simpleSpanQuery.getSecondClause().
+ getSpans(context, acceptDocs, termContexts);
+
+ query = simpleSpanQuery;
+ hasMoreSpans = secondSpans.next();
+ isStartEnumeration=true;
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ // Warning: this does not work for overlapping spans
+ // e.g. get multiple second spans in a firstspan
+ hasMoreSpans &= firstSpans.next();
+ isStartEnumeration=false;
+ matchPayload.clear();
+ return advance();
+ }
+
+ /** Advance is a lucene terminology to search for the next match.
+ * */
+ private boolean advance() throws IOException {
+ // The complexity is linear for searching in a document.
+ // It's better if we can skip to >= position in a document.
+ while (hasMoreSpans && ensureSameDoc()){
+ int matchCase = findMatch();
+ if (matchCase == 0){
+ log.trace("Match doc#: {}",matchDocNumber);
+ log.trace("Match positions: {}-{}", matchStartPosition,
+ matchEndPosition);
+ doCollectPayloads();
+ return true;
+ }
+ else if (matchCase == 1) {
+ hasMoreSpans = secondSpans.next();
+ }
+ else{
+ hasMoreSpans = firstSpans.next();
+ }
+ }
+ return false;
+ }
+
+ /** Specify the condition for a match
+ * @return 0 iff match is found,
+ * -1 to advance the firstspan,
+ * 1 to advance the secondspan
+ * */
+ protected abstract int findMatch();
+
+
+ /** If the current firstspan and secondspan are not in the same document,
+ * try to skip the span with the smaller document number, to the same
+ * OR a greater document number than, the document number of the other
+ * span. Do this until the firstspan and the secondspan are in the same
+ * doc, OR until reaching the last document.
+ * @return true iff such a document exists.
+ * */
+ private boolean ensureSameDoc() throws IOException {
+ while (firstSpans.doc() != secondSpans.doc()) {
+ if (firstSpans.doc() < secondSpans.doc()){
+ if (!firstSpans.skipTo(secondSpans.doc())){
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ else {
+ if (!secondSpans.skipTo(firstSpans.doc())){
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /** Collecting available payloads from the current first and second spans */
+ private void doCollectPayloads() throws IOException {
+ if (collectPayloads){
+ log.trace("Collect payloads");
+ if (firstSpans.isPayloadAvailable()) {
+ Collection<byte[]> payload = firstSpans.getPayload();
+ log.trace("Found {} payloads in firstSpans", payload.size());
+ matchPayload.addAll(payload);
+ }
+ if (secondSpans.isPayloadAvailable()) {
+ Collection<byte[]> payload = secondSpans.getPayload();
+ log.trace("Found {} payloads in secondSpans", payload.size());
+ matchPayload.addAll(payload);
+ }
+ }
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)){
+ if (!firstSpans.skipTo(target)){
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ matchPayload.clear();
+ return advance();
+ }
+
+ @Override
+ public int doc() {
+ return matchDocNumber;
+ }
+
+ @Override
+ public int start() {
+ return matchStartPosition;
+ }
+
+ @Override
+ public int end() {
+ return matchEndPosition;
+ }
+
+ @Override
+ public Collection<byte[]> getPayload() throws IOException {
+ return matchPayload;
+ }
+
+ @Override
+ public boolean isPayloadAvailable() throws IOException {
+ return !matchPayload.isEmpty();
+ }
+
+ @Override
+ public long cost() {
+ return firstSpans.cost() + secondSpans.cost();
+ }
+
+ @Override
+ public String toString() { // who does call this?
+ return getClass().getName() + "("+query.toString()+")@"+
+ (isStartEnumeration?"START":(hasMoreSpans?(doc()+":"+
+ start()+"-"+end()):"END"));
+ }
+
+}