Added javadoc comments
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
index c135656..98dd204 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
@@ -8,174 +8,291 @@
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
-// Temporary:
import de.ids_mannheim.korap.query.spans.ExpandedExclusionSpans;
import de.ids_mannheim.korap.query.spans.ExpandedSpans;
-/** Query to make a span longer by stretching out the start or the end
- * position of the span. The constraints of the expansion, such as how
- * large the expansion should be (min and max position) and the
- * direction of the expansion with respect to the "main" span, are
- * specified in ExpansionConstraint.
+/**
+ * SpanExpansionQuery makes a span longer by stretching out the start or the end
+ * position of the span. The constraints of the expansion, such as how large the
+ * expansion should be (min and max position) and the direction of the expansion
+ * with respect to the original span, are specified in ExpansionConstraint.
*
- * The expansion can be specified to not contain any direct/immediate
- * /adjacent occurrence(s) of another span. Examples:
- * [orth=der][orth!=Baum] "der" cannot be followed by "Baum"
- * [pos!=ADJ]{1,2}[orth=Baum] one or two adjectives cannot precedes
- * "Baum"
+ * <pre>
+ * SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "s:lightning"));
+ * SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1, true);
+ * </pre>
*
- * The offsets of the expansion parts can be collected by using a class
- * number.
+ * In the example above, the SpanExpansionQuery describes that the
+ * {@link TermSpans} of "lightning" may be expanded up to two token positions to
+ * the left.
*
- * @author margaretha
+ * <pre>
+ * "Trees are often struck by lightning because they are natural lightning conductors to the ground."
+ * </pre>
+ *
+ * The matches for the sample text are:
+ *
+ * <pre>
+ * [struck by lightning]
+ * [by lightning]
+ * [lightning]
+ * [are natural lightning]
+ * [natural lightning]
+ * [lightning]
+ * </pre>
+ *
+ * The expansion can also be specified to <em>not</em> contain any
+ * direct/immediate /adjacent occurrence(s) of another span. Examples in
+ * Poliqarp:
+ *
+ * <pre>
+ * [orth=the][orth!=lightning] "the" must not be followed by "lightning"
+ * [pos!=ADJ]{1,2}[orth=lightning] one or two adjectives cannot precedes "lightning"
+ * </pre>
+ *
+ * The positions of the expansion parts can be stored in payloads by using a
+ * class number (optional).
+ *
+ * @author margaretha
* */
-public class SpanExpansionQuery extends SimpleSpanQuery{
-
- private int min, max; // min, max expansion position
-
- // if > 0, collect expansion offsets using this label
- private byte classNumber;
-
- // expansion direction with regard to the main span:
- // < 0 to the left of main span
- // >= 0 to the right of main span
- private int direction;
-
- // if true, no occurrence of another span
- final boolean isExclusion;
-
- /** Simple expansion for any/empty token. Use
- * {@link #SpanExpansionQuery(SpanQuery, SpanQuery, ExpansionConstraint,
- * boolean)} for expansion with exclusions of a specific spanquery.
- * */
- public SpanExpansionQuery(SpanQuery firstClause, int min, int max, int direction,
- boolean collectPayloads) {
- super(firstClause, collectPayloads);
- if (max < min){
- throw new IllegalArgumentException("The max position has to be " +
- "bigger than or the same as min position.");
- }
- this.min = min;
- this.max = max;
- this.direction = direction;
- this.isExclusion = false;
- }
-
- public SpanExpansionQuery(SpanQuery firstClause, int min, int max, int direction,
- byte classNumber, boolean collectPayloads) {
- this(firstClause, min, max, direction, collectPayloads);
- this.classNumber = classNumber;
- }
-
- /** Expansion with exclusions of the spanquery specified as the second
- * parameter.
- * */
- public SpanExpansionQuery(SpanQuery firstClause, SpanQuery notClause, int min,
- int max, int direction, boolean collectPayloads) {
- super(firstClause, notClause, collectPayloads);
- if (max < min){
- throw new IllegalArgumentException("The max position has to be " +
- "bigger than or the same as min position.");
- }
- this.min = min;
- this.max = max;
- this.direction = direction;
- this.isExclusion = true;
- }
-
- public SpanExpansionQuery(SpanQuery firstClause, SpanQuery notClause, int min,
- int max, int direction, byte classNumber, boolean collectPayloads) {
- this(firstClause, notClause, min, max, direction, collectPayloads);
- this.classNumber = classNumber;
- }
-
-
- @Override
- public SimpleSpanQuery clone() {
- SpanExpansionQuery sq = null;
- if (isExclusion){
- sq = new SpanExpansionQuery(firstClause, secondClause, min, max, direction,
- classNumber, collectPayloads);
- }
- else{
- sq = new SpanExpansionQuery(firstClause, min, max, direction, classNumber,
- collectPayloads);
- }
- //sq.setBoost(sq.getBoost());
- return sq;
- }
+public class SpanExpansionQuery extends SimpleSpanQuery {
- @Override
- public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
-
-// Temporary:
- if (isExclusion)
- return new ExpandedExclusionSpans(this, context, acceptDocs, termContexts);
- else
-
- return new ExpandedSpans(this, context, acceptDocs, termContexts);
- }
+ private int min, max; // min, max expansion position
- @Override
- public String toString(String field) {
- StringBuilder sb = new StringBuilder();
- sb.append("spanExpansion(");
- sb.append(firstClause.toString());
- if (isExclusion && secondClause != null){
- sb.append(", !");
- sb.append(secondClause.toString());
- }
- else{
- sb.append(", []");
- }
- sb.append("{");
- sb.append(min);
- sb.append(", ");
- sb.append(max);
- sb.append("}, ");
- if (direction < 0)
- sb.append("left");
- else sb.append("right");
- if (classNumber > 0){
- sb.append(", class:");
- sb.append(classNumber);
- }
- sb.append(")");
- return sb.toString();
- }
+ // if > 0, collect expansion offsets using this label
+ private byte classNumber;
- public int getMin() {
- return min;
- }
+ // expansion direction with regard to the main span:
+ // < 0 to the left of main span
+ // >= 0 to the right of main span
+ private int direction;
- public void setMin(int min) {
- this.min = min;
- }
+ // if true, no occurrence of another span
+ final boolean isExclusion;
- public int getMax() {
- return max;
- }
+ /**
+ * Constructs a SpanExpansionQuery for simple expansion of the specified
+ * {@link SpanQuery}.
+ *
+ * @param firstClause a {@link SpanQuery}
+ * @param min the minimum length of the expansion
+ * @param max the maximum length of the expansion
+ * @param direction the direction of the expansion
+ * @param collectPayloads a boolean flag representing the value
+ * <code>true</code> if payloads are to be collected, otherwise
+ * <code>false</code>.
+ */
+ public SpanExpansionQuery(SpanQuery firstClause, int min, int max,
+ int direction, boolean collectPayloads) {
+ super(firstClause, collectPayloads);
+ if (max < min) {
+ throw new IllegalArgumentException("The max position has to be "
+ + "bigger than or the same as min position.");
+ }
+ this.min = min;
+ this.max = max;
+ this.direction = direction;
+ this.isExclusion = false;
+ }
- public void setMax(int max) {
- this.max = max;
- }
+ /**
+ * Constructs a SpanExpansionQuery for simple expansion of the specified
+ * {@link SpanQuery} and stores expansion offsets in payloads associated
+ * with the given class number.
+ *
+ * @param firstClause a {@link SpanQuery}
+ * @param min the minimum length of the expansion
+ * @param max the maximum length of the expansion
+ * @param direction the direction of the expansion
+ * @param classNumber the class number for storing expansion offsets in
+ * payloads
+ * @param collectPayloads a boolean flag representing the value
+ * <code>true</code> if payloads are to be collected, otherwise
+ * <code>false</code>.
+ */
+ public SpanExpansionQuery(SpanQuery firstClause, int min, int max,
+ int direction, byte classNumber, boolean collectPayloads) {
+ this(firstClause, min, max, direction, collectPayloads);
+ this.classNumber = classNumber;
+ }
- public byte getClassNumber() {
- return classNumber;
- }
+ /**
+ * Constructs a SpanExpansionQuery for expansion of the first
+ * {@link SpanQuery} with exclusions of the second {@link SpanQuery}.
+ *
+ * @param firstClause the SpanQuery to be expanded
+ * @param notClause the SpanQuery to be excluded
+ * @param min the minimum length of the expansion
+ * @param max the maximum length of the expansion
+ * @param direction the direction of the expansion
+ * @param collectPayloads a boolean flag representing the value
+ * <code>true</code> if payloads are to be collected, otherwise
+ * <code>false</code>.
+ */
+ public SpanExpansionQuery(SpanQuery firstClause, SpanQuery notClause,
+ int min, int max, int direction, boolean collectPayloads) {
+ super(firstClause, notClause, collectPayloads);
+ if (max < min) {
+ throw new IllegalArgumentException("The max position has to be "
+ + "bigger than or the same as min position.");
+ }
+ this.min = min;
+ this.max = max;
+ this.direction = direction;
+ this.isExclusion = true;
+ }
- public void setClassNumber(byte classNumber) {
- this.classNumber = classNumber;
- }
+ /**
+ * Constructs a SpanExpansionQuery for expansion of the first
+ * {@link SpanQuery} with exclusions of the second {@link SpanQuery}, and
+ * stores expansion offsets in payloads associated with the given class
+ * number.
+ *
+ * @param firstClause the SpanQuery to be expanded
+ * @param notClause the SpanQuery to be excluded
+ * @param min the minimum length of the expansion
+ * @param max the maximum length of the expansion
+ * @param direction the direction of the expansion
+ * @param classNumber the class number for storing expansion offsets in
+ * payloads
+ * @param collectPayloads a boolean flag representing the value
+ * <code>true</code> if payloads are to be collected, otherwise
+ * <code>false</code>.
+ */
+ public SpanExpansionQuery(SpanQuery firstClause, SpanQuery notClause,
+ int min, int max, int direction, byte classNumber,
+ boolean collectPayloads) {
+ this(firstClause, notClause, min, max, direction, collectPayloads);
+ this.classNumber = classNumber;
+ }
- public int getDirection() {
- return direction;
- }
+ @Override
+ public SimpleSpanQuery clone() {
+ SpanExpansionQuery sq = null;
+ if (isExclusion) {
+ sq = new SpanExpansionQuery(firstClause, secondClause, min, max,
+ direction, classNumber, collectPayloads);
+ } else {
+ sq = new SpanExpansionQuery(firstClause, min, max, direction,
+ classNumber, collectPayloads);
+ }
+ //sq.setBoost(sq.getBoost());
+ return sq;
+ }
- public void setDirection(int direction) {
- this.direction = direction;
- }
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+
+ // Temporary:
+ if (isExclusion)
+ return new ExpandedExclusionSpans(this, context, acceptDocs,
+ termContexts);
+ else
+
+ return new ExpandedSpans(this, context, acceptDocs, termContexts);
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("spanExpansion(");
+ sb.append(firstClause.toString());
+ if (isExclusion && secondClause != null) {
+ sb.append(", !");
+ sb.append(secondClause.toString());
+ } else {
+ sb.append(", []");
+ }
+ sb.append("{");
+ sb.append(min);
+ sb.append(", ");
+ sb.append(max);
+ sb.append("}, ");
+ if (direction < 0)
+ sb.append("left");
+ else
+ sb.append("right");
+ if (classNumber > 0) {
+ sb.append(", class:");
+ sb.append(classNumber);
+ }
+ sb.append(")");
+ return sb.toString();
+ }
+
+ /**
+ * Returns the minimum length of the expansion.
+ *
+ * @return the minimum length of the expansion
+ */
+ public int getMin() {
+ return min;
+ }
+
+ /**
+ * Sets the minimum length of the expansion.
+ *
+ * @param min the minimum length of the expansion
+ */
+ public void setMin(int min) {
+ this.min = min;
+ }
+
+ /**
+ * Returns the maximum length of the expansion.
+ *
+ * @return the maximum length of the expansion
+ */
+ public int getMax() {
+ return max;
+ }
+
+ /**
+ * Sets the maximum length of the expansion.
+ *
+ * @param max the maximum length of the expansion
+ */
+ public void setMax(int max) {
+ this.max = max;
+ }
+
+ /**
+ * Returns the class number associated with the expansion offsets
+ *
+ * @return the class number associated with the expansion offsets
+ */
+ public byte getClassNumber() {
+ return classNumber;
+ }
+
+ /**
+ * Sets the class number associated with the expansion offsets
+ *
+ * @param classNumber the class number associated with the expansion offsets
+ */
+ public void setClassNumber(byte classNumber) {
+ this.classNumber = classNumber;
+ }
+
+ /**
+ * Returns the direction of the expansion
+ *
+ * @return the direction of the expansion
+ */
+ public int getDirection() {
+ return direction;
+ }
+
+ /**
+ * Sets the direction of the expansion
+ *
+ * @param direction the direction of the expansion
+ */
+ public void setDirection(int direction) {
+ this.direction = direction;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
index 122a65b..d77a4bb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
@@ -3,133 +3,160 @@
// Based on SpanNearQuery
/*
- Todo: Make one Spanarray and switch between the results of A and B.
-*/
+ * Todo: Make one Spanarray and switch between the results of A and B.
+ */
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.Query;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
-
import de.ids_mannheim.korap.query.spans.NextSpans;
/**
- * Matches spans which are directly next to each other.
- * this is identical to a phrase query with exactly two clauses.
+ * SpanNextQuery matches two spans which are directly next to each other. It is
+ * identical to a phrase query with exactly two clauses.
+ *
+ * In the example below, the SpanNextQuery retrieves {@link NextSpans} starting
+ * from the start position of {@link TermSpans} "turn" and ending at the end
+ * position of {@link TermSpans} "off" occurring immediately after the
+ * {@link TermSpans} "turn".
+ *
+ * <pre>
+ * SpanNextQuery sq = new SpanNextQuery(
+ * new SpanTermQuery(new Term("tokens","s:turn")),
+ * new SpanTermQuery(new Term("tokens", "s:off")));
+ * </pre>
+ *
+ * @author diewald, margaretha
+ *
*/
public class SpanNextQuery extends SimpleSpanQuery implements Cloneable {
- // Constructor
+ /**
+ * Constructs a SpanNextQuery for the two specified {@link SpanQuery
+ * SpanQueries} whose payloads are to be collected for the resulting
+ * {@link NextSpans}. The first SpanQuery is immediately followed by the
+ * second SpanQuery.
+ *
+ * @param firstClause the first SpanQuery
+ * @param secondClause the second SpanQuery
+ */
public SpanNextQuery(SpanQuery firstClause, SpanQuery secondClause) {
- this(firstClause, secondClause, true);
+ this(firstClause, secondClause, true);
};
- // Constructor
+ /**
+ * Constructs a SpanNextQuery for the two specified {@link SpanQuery
+ * SpanQueries} where the first SpanQuery is immediately followed by the
+ * second SpanQuery.
+ *
+ * @param firstClause the first SpanQuery
+ * @param secondClause the second SpanQuery
+ * @param collectPayloads a boolean flag representing the value
+ * <code>true</code> if payloads are to be collected, otherwise
+ * <code>false</code>.
+ */
public SpanNextQuery(SpanQuery firstClause, SpanQuery secondClause,
- boolean collectPayloads) {
- super(firstClause, secondClause, collectPayloads);
+ boolean collectPayloads) {
+ super(firstClause, secondClause, collectPayloads);
};
- public SpanNextQuery(SpanQuery firstClause, SpanQuery secondClause,
- boolean isFirstNegated, boolean collectPayloads) {
- super(firstClause, secondClause, collectPayloads);
- }
-
-
@Override
- public Spans getSpans (final AtomicReaderContext context, Bits acceptDocs,
- Map<Term,TermContext> termContexts) throws IOException {
- return (Spans) new NextSpans (this, context, acceptDocs, termContexts);
+ public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ return (Spans) new NextSpans(this, context, acceptDocs, termContexts);
};
@Override
public SpanNextQuery clone() {
- SpanNextQuery spanNextQuery = new SpanNextQuery(
- (SpanQuery) firstClause.clone(),
- (SpanQuery) secondClause.clone(),
- collectPayloads
- );
- spanNextQuery.setBoost(getBoost());
- return spanNextQuery;
+ SpanNextQuery spanNextQuery = new SpanNextQuery(
+ (SpanQuery) firstClause.clone(),
+ (SpanQuery) secondClause.clone(), collectPayloads);
+ spanNextQuery.setBoost(getBoost());
+ return spanNextQuery;
};
-
/*
* Rewrite query in case it includes regular expressions or wildcards
*/
@Override
- public Query rewrite (IndexReader reader) throws IOException {
- SpanNextQuery clone = null;
+ public Query rewrite(IndexReader reader) throws IOException {
+ SpanNextQuery clone = null;
- // Does the first clause needs a rewrite?
- SpanQuery query = (SpanQuery) firstClause.rewrite(reader);
- if (query != firstClause) {
- if (clone == null)
- clone = this.clone();
- clone.firstClause = query;
- };
+ // Does the first clause needs a rewrite?
+ SpanQuery query = (SpanQuery) firstClause.rewrite(reader);
+ if (query != firstClause) {
+ if (clone == null)
+ clone = this.clone();
+ clone.firstClause = query;
+ }
+ ;
- // Does the second clause needs a rewrite?
- query = (SpanQuery) secondClause.rewrite(reader);
- if (query != secondClause) {
- if (clone == null)
- clone = this.clone();
- clone.secondClause = query;
- };
+ // Does the second clause needs a rewrite?
+ query = (SpanQuery) secondClause.rewrite(reader);
+ if (query != secondClause) {
+ if (clone == null)
+ clone = this.clone();
+ clone.secondClause = query;
+ }
+ ;
- // There is a clone and it is important
- if (clone != null)
- return clone;
+ // There is a clone and it is important
+ if (clone != null)
+ return clone;
- return this;
+ return this;
};
-
@Override
- public String toString(String field) {
- StringBuilder sb = new StringBuilder();
- sb.append("spanNext(");
- sb.append(firstClause.toString(field));
- sb.append(", ");
- sb.append(secondClause.toString(field));
- sb.append(")");
- sb.append(ToStringUtils.boost(getBoost()));
- return sb.toString();
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("spanNext(");
+ sb.append(firstClause.toString(field));
+ sb.append(", ");
+ sb.append(secondClause.toString(field));
+ sb.append(")");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
}
-
/** Returns true iff <code>o</code> is equal to this. */
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (!(o instanceof SpanNextQuery)) return false;
-
- final SpanNextQuery spanNextQuery = (SpanNextQuery) o;
-
- if (collectPayloads != spanNextQuery.collectPayloads) return false;
- if (!firstClause.equals(spanNextQuery.firstClause)) return false;
- if (!secondClause.equals(spanNextQuery.secondClause)) return false;
+ if (this == o)
+ return true;
+ if (!(o instanceof SpanNextQuery))
+ return false;
- return getBoost() == spanNextQuery.getBoost();
+ final SpanNextQuery spanNextQuery = (SpanNextQuery) o;
+
+ if (collectPayloads != spanNextQuery.collectPayloads)
+ return false;
+ if (!firstClause.equals(spanNextQuery.firstClause))
+ return false;
+ if (!secondClause.equals(spanNextQuery.secondClause))
+ return false;
+
+ return getBoost() == spanNextQuery.getBoost();
};
-
// I don't know what I am doing here
@Override
public int hashCode() {
- int result;
- result = firstClause.hashCode() + secondClause.hashCode();
- result ^= (result << 31) | (result >>> 2); // reversible
- result += Float.floatToRawIntBits(getBoost());
- return result;
+ int result;
+ result = firstClause.hashCode() + secondClause.hashCode();
+ result ^= (result << 31) | (result >>> 2); // reversible
+ result += Float.floatToRawIntBits(getBoost());
+ return result;
};
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
index 94b273b..9ea420f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
@@ -13,68 +13,119 @@
import de.ids_mannheim.korap.query.spans.RepetitionSpans;
-/** SpanRepetitionQuery means that the given query can appears
- * multiple times specified by the minimum and the maximum number
- * of repetitions parameters.
+/**
+ * SpanRepetitionQuery means that the given SpanQuery must appears multiple
+ * times in a sequence. The number of repetition depends on the minimum and the
+ * maximum number parameters. <br />
+ * <br />
+ *
+ * In the example below, SpanRepetitionQuery retrieves {@link RepetitionSpans}
+ * consisting of the TermSpans "tt:p/ADJ" that must appear at least once or
+ * consecutively two times. What appears after the RepetitionSpans is not
+ * considered, so it is possible that it is another "tt:p/ADJ". <br />
+ * <br />
+ *
+ * <pre>
+ * SpanRepetitionQuery sq = new SpanRepetitionQuery(new SpanTermQuery(new Term(
+ * "tokens", "tt:p/ADJ")), 1, 2, true);
+ * </pre>
+ *
+ * For instance, "a large black leather jacket" contains the following matches.
+ *
+ * <pre>
+ * [large]
+ * [large black]
+ * [black]
+ * [black leather]
+ * [leather]
+ * </pre>
*
* @author margaretha
* */
-public class SpanRepetitionQuery extends SimpleSpanQuery{
-
- private int min, max;
-
- public SpanRepetitionQuery(SpanQuery sq, int min, int max,
- boolean collectPayloads) {
- super(sq, collectPayloads);
- this.min = min;
- this.max = max;
- }
+public class SpanRepetitionQuery extends SimpleSpanQuery {
- @Override
- public SimpleSpanQuery clone() {
- SpanRepetitionQuery sq = new SpanRepetitionQuery(
- (SpanQuery) this.firstClause.clone(),
- this.min,
- this.max,
- this.collectPayloads);
- sq.setBoost(getBoost());
- return sq;
- }
+ private int min, max;
- @Override
- public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- return new RepetitionSpans(this, context, acceptDocs, termContexts);
- }
+ /**
+ * Constructs a SpanRepetitionQuery for the given {@link SpanQuery}.
+ *
+ * @param sq a SpanQuery
+ * @param min the minimum number of the required repetition
+ * @param max the maximum number of the required repetition
+ * @param collectPayloads a boolean flag representing the value
+ * <code>true</code> if payloads are to be collected, otherwise
+ * <code>false</code>.
+ */
+ public SpanRepetitionQuery(SpanQuery sq, int min, int max,
+ boolean collectPayloads) {
+ super(sq, collectPayloads);
+ this.min = min;
+ this.max = max;
+ }
- @Override
- public String toString(String field) {
- StringBuilder sb = new StringBuilder();
- sb.append("spanRepetition(");
- sb.append(firstClause.toString(field));
- sb.append("{");
- sb.append(min);
- sb.append(",");
- sb.append(max);
- sb.append("})");
- sb.append(ToStringUtils.boost(getBoost()));
- return sb.toString();
- }
+ @Override
+ public SimpleSpanQuery clone() {
+ SpanRepetitionQuery sq = new SpanRepetitionQuery(
+ (SpanQuery) this.firstClause.clone(), this.min, this.max,
+ this.collectPayloads);
+ sq.setBoost(getBoost());
+ return sq;
+ }
- public int getMin() {
- return min;
- }
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ return new RepetitionSpans(this, context, acceptDocs, termContexts);
+ }
- public void setMin(int min) {
- this.min = min;
- }
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("spanRepetition(");
+ sb.append(firstClause.toString(field));
+ sb.append("{");
+ sb.append(min);
+ sb.append(",");
+ sb.append(max);
+ sb.append("})");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
+ }
- public int getMax() {
- return max;
- }
+ /**
+ * Returns the minimum number of required repetitions.
+ *
+ * @return the minimum number of required repetitions
+ */
+ public int getMin() {
+ return min;
+ }
- public void setMax(int max) {
- this.max = max;
- }
-
+ /**
+ * Sets the minimum number of required repetitions.
+ *
+ * @param min the minimum number of required repetitions
+ */
+ public void setMin(int min) {
+ this.min = min;
+ }
+
+ /**
+ * Returns the maximum number of required repetitions.
+ *
+ * @return the maximum number of required repetitions
+ */
+ public int getMax() {
+ return max;
+ }
+
+ /**
+ * Sets the maximum number of required repetitions.
+ *
+ * @param max the maximum number of required repetitions
+ */
+ public void setMax(int max) {
+ this.max = max;
+ }
+
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
index 286e239..57d3b76 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
@@ -18,17 +18,16 @@
* positions, for instance:
*
* <pre>
- * sq = new SpanSegmentQuery(
- new SpanTermQuery(new Term("tokens","s:Hund")),
- new SpanTermQuery(new Term("tokens","tt/p:NN"))
- );
+ * sq = new SpanSegmentQuery(new SpanTermQuery(new Term("tokens", "s:Hund")),
+ * new SpanTermQuery(new Term("tokens", "tt/p:NN")));
* </pre>
+ *
* @author margaretha
* */
public class SpanSegmentQuery extends SimpleSpanQuery {
/**
- * Creates a SpanSegmentQuery from the two given SpanQueries, by default
+ * Constructs a SpanSegmentQuery from the two given SpanQueries, by default
* payloads are to be collected.
*
* @param firstClause a {@link SpanQuery}
@@ -39,6 +38,8 @@
}
/**
+ * Constructs a SpanSegmentQuery from the two given SpanQueries.
+ *
* @param firstClause a {@link SpanQuery}
* @param secondClause a {@link SpanQuery}
* @param collectPayloads a boolean flag representing the value
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
index 182942a..2d79132 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
@@ -35,7 +35,7 @@
/**
* Constructs NextSpans for the given {@link SpanNextQuery}.
*
- * @param spanNextQuery
+ * @param spanNextQuery a SpanNextQuery
* @param context
* @param acceptDocs
* @param termContexts
@@ -75,11 +75,6 @@
matchEndPosition = matchList.get(0).getEnd();
if (collectPayloads)
matchPayload.addAll(matchList.get(0).getPayloads());
- //System.out.println(this.toString());
- /*
- * System.out.println("Match positions "+
- * matchStartPosition+","+ matchEndPosition);
- */
matchList.remove(0);
return true;
}
@@ -115,8 +110,9 @@
}
/**
- * Finds all candidates whose start position is the same as the firstspan's
- * end position.
+ * Removes all second span candidates whose start position is not the same
+ * as the firstspan's end position, otherwise creates a match and add it to
+ * the matchlist.
*
* @throws IOException
*/
@@ -128,13 +124,17 @@
if (cs.getStart() == firstSpans.end()) {
addMatch(cs);
} else {
- //System.out.println(cs.getStart() + " " +firstSpans.end());
i.remove();
}
}
}
/**
+ * Finds all secondspans whose start position is the same as the end
+ * position of the firstspans, until the secondspans' start position is
+ * bigger than the firstspans' end position. Adds those secondspans to the
+ * candidateList and creates matches.
+ *
* @throws IOException
*/
private void searchMatches() throws IOException {
@@ -151,6 +151,14 @@
}
}
+ /**
+ * Creates a match from the given CandidateSpan representing a secondspan
+ * state whose start position is identical to the end position of the
+ * current firstspan, and adds it to the matchlist.
+ *
+ * @param cs a CandidateSpan
+ * @throws IOException
+ */
private void addMatch(CandidateSpan cs) throws IOException {
int start = firstSpans.start();
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
index 6019125..f66d78c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RepetitionSpans.java
@@ -11,184 +11,208 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SpanRepetitionQuery;
-/** Enumeration of spans occurring multiple times in a sequence.
- * The number of min and max repetition can be set.
- *
- * @author margaretha
+/**
+ * Enumeration of spans occurring multiple times in a sequence. The number of
+ * repetition depends on the min and max parameters.
+ *
+ * @author margaretha
* */
-public class RepetitionSpans extends SimpleSpans{
-
- private int min,max;
- private long matchCost;
- private List<CandidateSpan> matchList;
- private Logger log = LoggerFactory.getLogger(RepetitionSpans.class);
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
+public class RepetitionSpans extends SimpleSpans {
+ private int min, max;
+ private long matchCost;
+ private List<CandidateSpan> matchList;
- public RepetitionSpans(SpanRepetitionQuery query,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts)
- throws IOException {
- super(query, context, acceptDocs, termContexts);
- this.min = query.getMin();
- this.max = query.getMax();
- matchList = new ArrayList<CandidateSpan>();
- hasMoreSpans = firstSpans.next();
- }
+ /**
+ * Constructs RepetitionSpans from the given {@link SpanRepetitionQuery}.
+ *
+ * @param query a SpanRepetitionQuery
+ * @param context
+ * @param acceptDocs
+ * @param termContexts
+ * @throws IOException
+ */
+ public RepetitionSpans(SpanRepetitionQuery query,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(query, context, acceptDocs, termContexts);
+ this.min = query.getMin();
+ this.max = query.getMax();
+ matchList = new ArrayList<CandidateSpan>();
+ hasMoreSpans = firstSpans.next();
+ }
- @Override
- public boolean next() throws IOException {
- isStartEnumeration = false;
- matchPayload.clear();
- return advance();
- }
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration = false;
+ matchPayload.clear();
+ return advance();
+ }
- /** Get the next span from the candidate match list, or set it first when
- * it is empty.
- * */
- private boolean advance() throws IOException {
+ /**
+ * Advances the RepetitionSpans to the next match by setting the first
+ * element in the matchlist as the current match. When the matchlist is
+ * empty, it has to be set first.
+ *
+ * @return <code>true</code> if a match is found, <code>false</code>
+ * otherwise.
+ * @throws IOException
+ */
+ private boolean advance() throws IOException {
- while (hasMoreSpans || !matchList.isEmpty()){
- if (!matchList.isEmpty()){
- setMatchProperties(matchList.get(0));
- matchList.remove(0);
- return true;
- }
- matchCost = 0;
-
- List<CandidateSpan> adjacentSpans = collectAdjacentSpans();
- setMatchList(adjacentSpans);
- }
- return false;
- }
-
- /** Collect all adjacent spans occurring in a sequence.
- * @return a list of the adjacent spans
- * */
- private List<CandidateSpan> collectAdjacentSpans() throws IOException {
-
- CandidateSpan startSpan = new CandidateSpan(firstSpans);
-
- List<CandidateSpan> adjacentSpans = new ArrayList<CandidateSpan>();
- adjacentSpans.add(startSpan);
-
- CandidateSpan prevSpan = startSpan;
-
- while ((hasMoreSpans = firstSpans.next()) &&
- startSpan.getDoc() == firstSpans.doc() ){
-
- if (firstSpans.start() > prevSpan.getEnd()){
- break;
- }
- else if (firstSpans.start() == prevSpan.getEnd()){
- prevSpan = new CandidateSpan(firstSpans);
- adjacentSpans.add(prevSpan);
- }
- }
- return adjacentSpans;
- }
-
- /** Generate all possible repetition candidate spans from the adjacent spans
- * and add them to the match list.
- * */
- private void setMatchList(List<CandidateSpan> adjacentSpans){
- CandidateSpan startSpan, endSpan, matchSpan;
- for (int i=min; i<max+1; i++){
- //System.out.println("num: "+i);
- int j=0;
- int endIndex;
- while ((endIndex = j+i-1) < adjacentSpans.size()){
- startSpan = adjacentSpans.get(j);
- if (i == 1){
- try {
- matchSpan = startSpan.clone();
- matchSpan.setPayloads(computeMatchPayload(adjacentSpans, 0, endIndex-1));
- matchList.add(matchSpan);
- } catch (CloneNotSupportedException e) {
- e.printStackTrace();
- }
- }
- else {
- endSpan = adjacentSpans.get(endIndex);
- matchSpan = new CandidateSpan(
- startSpan.getStart(),
- endSpan.getEnd(),
- startSpan.getDoc(),
- computeMatchCost(adjacentSpans, 0, endIndex),
- computeMatchPayload(adjacentSpans, 0, endIndex));
-
- //System.out.println("c:"+matchSpan.getCost() +" p:"+ matchSpan.getPayloads().size());
- //System.out.println(startSpan.getStart() +","+endSpan.getEnd());
-
- matchList.add(matchSpan);
- }
- j++;
- }
- }
-
- Collections.sort(matchList);
- }
-
- /** Add all the payloads of a candidate span
- * */
- private Collection<byte[]> computeMatchPayload(
- List<CandidateSpan> adjacentSpans, int start, int end) {
- Collection<byte[]> payload = new ArrayList<byte[]>();
- for (int i=start; i<= end; i++){
- payload.addAll(adjacentSpans.get(i).getPayloads());
- }
- return payload;
- }
+ while (hasMoreSpans || !matchList.isEmpty()) {
+ if (!matchList.isEmpty()) {
+ setMatchProperties(matchList.get(0));
+ matchList.remove(0);
+ return true;
+ }
+ matchCost = 0;
- /** Add all the cost of a candidate span
- * */
- private long computeMatchCost(List<CandidateSpan> adjacentSpans,
- int start, int end){
- long matchCost = 0;
- for (int i=start; i<= end; i++){
- CandidateSpan c = adjacentSpans.get(i);
- matchCost += adjacentSpans.get(i).getCost();
- }
- return matchCost;
- }
-
-
- /** Setting match properties from the candidate span
- * */
- private void setMatchProperties(CandidateSpan candidateSpan)
- throws IOException {
- matchDocNumber = candidateSpan.getDoc();
- matchStartPosition = candidateSpan.getStart();
- matchEndPosition = candidateSpan.getEnd();
- if (collectPayloads && candidateSpan.getPayloads() != null) {
- matchPayload.addAll(candidateSpan.getPayloads());
- }
-
- if (DEBUG)
- log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
- matchEndPosition);
- }
+ List<CandidateSpan> adjacentSpans = collectAdjacentSpans();
+ setMatchList(adjacentSpans);
+ }
+ return false;
+ }
- @Override
- public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && firstSpans.doc() < target){
- if (!firstSpans.skipTo(target)){
- hasMoreSpans = false;
- return false;
- }
- }
- matchList.clear();
- return advance();
- }
+ /**
+ * Collects all adjacent firstspans occurring in a sequence.
+ *
+ * @return a list of the adjacent spans
+ * @throws IOException
+ */
+ private List<CandidateSpan> collectAdjacentSpans() throws IOException {
- @Override
- public long cost() {
- return matchCost;
- }
+ CandidateSpan startSpan = new CandidateSpan(firstSpans);
+
+ List<CandidateSpan> adjacentSpans = new ArrayList<CandidateSpan>();
+ adjacentSpans.add(startSpan);
+
+ CandidateSpan prevSpan = startSpan;
+
+ while ((hasMoreSpans = firstSpans.next())
+ && startSpan.getDoc() == firstSpans.doc()) {
+
+ if (firstSpans.start() > prevSpan.getEnd()) {
+ break;
+ } else if (firstSpans.start() == prevSpan.getEnd()) {
+ prevSpan = new CandidateSpan(firstSpans);
+ adjacentSpans.add(prevSpan);
+ }
+ }
+ return adjacentSpans;
+ }
+
+ /**
+ * Generates all possible repetition match spans from the given list of
+ * adjacent spans and add them to the match list.
+ *
+ * @param adjacentSpans
+ */
+ private void setMatchList(List<CandidateSpan> adjacentSpans) {
+ CandidateSpan startSpan, endSpan, matchSpan;
+ for (int i = min; i < max + 1; i++) {
+ int j = 0;
+ int endIndex;
+ while ((endIndex = j + i - 1) < adjacentSpans.size()) {
+ startSpan = adjacentSpans.get(j);
+ if (i == 1) {
+ try {
+ matchSpan = startSpan.clone();
+ matchSpan.setPayloads(computeMatchPayload(
+ adjacentSpans, 0, endIndex - 1));
+ matchList.add(matchSpan);
+ } catch (CloneNotSupportedException e) {
+ e.printStackTrace();
+ }
+ } else {
+ endSpan = adjacentSpans.get(endIndex);
+ matchSpan = new CandidateSpan(startSpan.getStart(),
+ endSpan.getEnd(), startSpan.getDoc(),
+ computeMatchCost(adjacentSpans, 0, endIndex),
+ computeMatchPayload(adjacentSpans, 0, endIndex));
+ //System.out.println("c:"+matchSpan.getCost() +" p:"+ matchSpan.getPayloads().size());
+ //System.out.println(startSpan.getStart() +","+endSpan.getEnd());
+ matchList.add(matchSpan);
+ }
+ j++;
+ }
+ }
+
+ Collections.sort(matchList);
+ }
+
+ /**
+ * Creates payloads by adding all the payloads of some adjacent spans, that
+ * are all spans in the given list whose index is between the start and end
+ * index (including those with these indexes).
+ *
+ * @param adjacentSpans a list of adjacentSpans
+ * @param start the start index representing the first adjacent span in the
+ * list to be computed
+ * @param end the end index representing the last adjacent span in the list
+ * to be computed
+ * @return payloads
+ */
+ private Collection<byte[]> computeMatchPayload(
+ List<CandidateSpan> adjacentSpans, int start, int end) {
+ Collection<byte[]> payload = new ArrayList<byte[]>();
+ for (int i = start; i <= end; i++) {
+ payload.addAll(adjacentSpans.get(i).getPayloads());
+ }
+ return payload;
+ }
+
+ /**
+ * Computes the matchcost by adding all the cost of the adjacent spans
+ * between the start and end index in the given list.
+ *
+ * @param adjacentSpans a list of adjacent spans
+ * @param start the start index
+ * @param end the end index
+ * @return
+ */
+ private long computeMatchCost(List<CandidateSpan> adjacentSpans, int start,
+ int end) {
+ long matchCost = 0;
+ for (int i = start; i <= end; i++) {
+ matchCost += adjacentSpans.get(i).getCost();
+ }
+ return matchCost;
+ }
+
+ /**
+ * Sets properties for the current match from the given candidate span.
+ *
+ * @param candidateSpan the match candidate span
+ * @throws IOException
+ */
+ private void setMatchProperties(CandidateSpan candidateSpan)
+ throws IOException {
+ matchDocNumber = candidateSpan.getDoc();
+ matchStartPosition = candidateSpan.getStart();
+ matchEndPosition = candidateSpan.getEnd();
+ if (collectPayloads && candidateSpan.getPayloads() != null) {
+ matchPayload.addAll(candidateSpan.getPayloads());
+ }
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && firstSpans.doc() < target) {
+ if (!firstSpans.skipTo(target)) {
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ matchList.clear();
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return matchCost;
+ }
}