Added javadoc comments
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
index 98dd204..2a0c80e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
@@ -18,7 +18,10 @@
* SpanExpansionQuery makes a span longer by stretching out the start or the end
* position of the span. The constraints of the expansion, such as how large the
* expansion should be (min and max position) and the direction of the expansion
- * with respect to the original span, are specified in ExpansionConstraint.
+ * with respect to the original span, are specified in ExpansionConstraint. The
+ * direction is designated with the sign of a number, namely a negative number
+ * signifies the left direction, and a positive number (including 0) signifies
+ * the right direction.
*
* <pre>
* SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "s:lightning"));
@@ -50,7 +53,26 @@
*
* <pre>
* [orth=the][orth!=lightning] "the" must not be followed by "lightning"
- * [pos!=ADJ]{1,2}[orth=lightning] one or two adjectives cannot precedes "lightning"
+ * [pos!=ADJ]{1,2}[orth=jacket] one or two adjectives cannot precedes "jacket"
+ * </pre>
+ *
+ * The SpanExpansionQuery for the latter Poliqarp query with left direction from
+ * "jacket" example is:
+ *
+ * <pre>
+ * SpanTermQuery notQuery = new SpanTermQuery(new Term("tokens", "tt:p:/ADJ"));
+ * SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "s:jacket"));
+ * SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 1, 2, -1, true);
+ * </pre>
+ *
+ * Matches and non matches example:
+ *
+ * <pre>
+ * [a jacket] match
+ * [such a jacket] non match, where such is an ADJ
+ * [leather jacket] non match
+ * [black leather jacket] non match
+ * [large black leather jacket] non match
* </pre>
*
* The positions of the expansion parts can be stored in payloads by using a
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
index 5cc4d28..b8ab9fe 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
@@ -14,108 +14,172 @@
import de.ids_mannheim.korap.query.spans.MultipleDistanceSpans;
-/** Match two spans with respect to a list of distance constraints.
- * No repetition of constraints of the same type is allowed. For example,
- * there must only exactly one constraint for word/token-based distance.
- *
- * @author margaretha
+/**
+ * SpanMultipleDistanceQuery matches two spans with respect to a list of
+ * distance constraints. No repetition of constraints of the same unit type
+ * (e.g. word, sentence, paragraph) is allowed. For example, there must only
+ * exactly one constraint for word/token-based distance. A SpanDistanceQuery is
+ * created for each constraint.<br />
+ * <br />
+ * Examples:
+ * <ul>
+ *
+ * <li>
+ * Search two terms x and y which are separated by minimum two and maximum three
+ * other words within the same sentence. The order of x and y does not matter.
+ *
+ * <pre>
+ * List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
+ * constraints.add(new DistanceConstraint(2, 3, false, false));
+ * constraints.add(DistanceConstraint(new SpanElementQuery("tokens", "s"), 0, 0,
+ * false, false));
+ *
+ * SpanMultipleDistanceQuery mdq = SpanMultipleDistanceQuery(x, y, constraints,
+ * false, true);
+ * </pre>
+ *
+ * </li>
+ *
+ * <li>
+ * Search term x which do <em>not</em> occur with term y in minimum two and
+ * maximum three other words and <em>not</em> in the same sentence. X must
+ * precede y.
+ *
+ * <pre>
+ * List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
+ * constraints.add(new DistanceConstraint(2, 3, false, true));
+ * constraints.add(DistanceConstraint(new SpanElementQuery("tokens", "s"), 0, 0,
+ * false, true));
+ *
+ * SpanMultipleDistanceQuery mdq = SpanMultipleDistanceQuery(x, y, constraints,
+ * true, true);
+ * </pre>
+ *
+ * </li>
+ * </ul>
+ *
+ * @author margaretha
* */
-public class SpanMultipleDistanceQuery extends SimpleSpanQuery{
-
- private List<DistanceConstraint> constraints;
- private boolean isOrdered;
- private String spanName;
-
- public SpanMultipleDistanceQuery(SpanQuery firstClause, SpanQuery secondClause,
- List<DistanceConstraint> constraints, boolean isOrdered,
- boolean collectPayloads) {
- super(firstClause, secondClause, collectPayloads);
- this.constraints = constraints;
- this.isOrdered = isOrdered;
- spanName = "spanMultipleDistance";
- }
+public class SpanMultipleDistanceQuery extends SimpleSpanQuery {
- @Override
- public SpanMultipleDistanceQuery clone() {
- SpanMultipleDistanceQuery query = new SpanMultipleDistanceQuery(
- (SpanQuery) firstClause.clone(),
- (SpanQuery) secondClause.clone(),
- this.constraints,
- this.isOrdered,
- collectPayloads
- );
-
- query.setBoost(getBoost());
- return query;
- }
-
- @Override
- public String toString(String field) {
- StringBuilder sb = new StringBuilder();
- sb.append(this.spanName);
- sb.append("(");
- sb.append(firstClause.toString(field));
- sb.append(", ");
- sb.append(secondClause.toString(field));
- sb.append(", ");
- sb.append("[");
-
- DistanceConstraint c;
- int size = constraints.size();
- for (int i=0; i < size; i++){
- c = constraints.get(i);
- sb.append("(");
- sb.append(c.getUnit());
- sb.append("[");
- sb.append(c.getMinDistance());
- sb.append(":");
- sb.append(c.getMaxDistance());
- sb.append("], ");
- sb.append(c.isOrdered() ? "ordered, " : "notOrdered, ");
- sb.append(c.isExclusion() ? "excluded)]" : "notExcluded)");
- if (i < size-1) sb.append(", ");
- }
- sb.append("])");
- sb.append(ToStringUtils.boost(getBoost()));
- return sb.toString();
+ private List<DistanceConstraint> constraints;
+ private boolean isOrdered;
+ private String spanName;
+
+ /**
+ * Constructs a SpanMultipleDistanceQuery for the two given SpanQueries.
+ *
+ * @param firstClause the first SpanQuery
+ * @param secondClause the second SpanQuery
+ * @param constraints the list of distance constraints
+ * @param isOrdered a boolean representing the value <code>true</code>, if
+ * the firstspans must occur before the secondspans, otherwise
+ * <code>false</code>.
+ * @param collectPayloads a boolean flag representing the value
+ * <code>true</code> if payloads are to be collected, otherwise
+ * <code>false</code>.
+ */
+ public SpanMultipleDistanceQuery(SpanQuery firstClause,
+ SpanQuery secondClause, List<DistanceConstraint> constraints,
+ boolean isOrdered, boolean collectPayloads) {
+ super(firstClause, secondClause, collectPayloads);
+ this.constraints = constraints;
+ this.isOrdered = isOrdered;
+ spanName = "spanMultipleDistance";
}
-
- /** Filter the span matches of each constraint, returning only the matches
- * meeting all the constraints.
- * @return only the span matches meeting all the constraints.
- * */
- @Override
- public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
-
- SpanDistanceQuery sdq,sdq2;
- Spans ds,ds2;
- MultipleDistanceSpans mds = null;
- boolean exclusion;
-
- sdq = new SpanDistanceQuery(firstClause, secondClause, constraints.get(0), collectPayloads);
- ds = sdq.getSpans(context, acceptDocs, termContexts);
-
- for (int i=1; i< constraints.size(); i++){
- sdq2 = new SpanDistanceQuery(firstClause, secondClause, constraints.get(i),
- collectPayloads);
- ds2 = sdq2.getSpans(context, acceptDocs, termContexts);
-
- exclusion = sdq.isExclusion() && sdq2.isExclusion();
- mds = new MultipleDistanceSpans(this, context, acceptDocs,
- termContexts, ds, ds2, isOrdered, exclusion);
- ds = mds;
- }
-
- return mds;
- }
- public List<DistanceConstraint> getConstraints() {
- return constraints;
- }
+ @Override
+ public SpanMultipleDistanceQuery clone() {
+ SpanMultipleDistanceQuery query = new SpanMultipleDistanceQuery(
+ (SpanQuery) firstClause.clone(),
+ (SpanQuery) secondClause.clone(), this.constraints,
+ this.isOrdered, collectPayloads);
- public void setConstraints(List<DistanceConstraint> constraints) {
- this.constraints = constraints;
- }
-
+ query.setBoost(getBoost());
+ return query;
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(this.spanName);
+ sb.append("(");
+ sb.append(firstClause.toString(field));
+ sb.append(", ");
+ sb.append(secondClause.toString(field));
+ sb.append(", ");
+ sb.append("[");
+
+ DistanceConstraint c;
+ int size = constraints.size();
+ for (int i = 0; i < size; i++) {
+ c = constraints.get(i);
+ sb.append("(");
+ sb.append(c.getUnit());
+ sb.append("[");
+ sb.append(c.getMinDistance());
+ sb.append(":");
+ sb.append(c.getMaxDistance());
+ sb.append("], ");
+ sb.append(c.isOrdered() ? "ordered, " : "notOrdered, ");
+ sb.append(c.isExclusion() ? "excluded)]" : "notExcluded)");
+ if (i < size - 1)
+ sb.append(", ");
+ }
+ sb.append("])");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
+ }
+
+ /**
+ * Filters the span matches of each constraint, returning only the matches
+ * meeting all the constraints.
+ *
+ * @return only the span matches meeting all the constraints.
+ * */
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+
+ SpanDistanceQuery sdq, sdq2;
+ Spans ds, ds2;
+ MultipleDistanceSpans mds = null;
+ boolean exclusion;
+
+ sdq = new SpanDistanceQuery(firstClause, secondClause,
+ constraints.get(0), collectPayloads);
+ ds = sdq.getSpans(context, acceptDocs, termContexts);
+
+ for (int i = 1; i < constraints.size(); i++) {
+ sdq2 = new SpanDistanceQuery(firstClause, secondClause,
+ constraints.get(i), collectPayloads);
+ ds2 = sdq2.getSpans(context, acceptDocs, termContexts);
+
+ exclusion = sdq.isExclusion() && sdq2.isExclusion();
+ mds = new MultipleDistanceSpans(this, context, acceptDocs,
+ termContexts, ds, ds2, isOrdered, exclusion);
+ ds = mds;
+ }
+
+ return mds;
+ }
+
+ /**
+ * Returns the list of distance constraints.
+ *
+ * @return the list of distance constraints
+ */
+ public List<DistanceConstraint> getConstraints() {
+ return constraints;
+ }
+
+ /**
+ * Sets the list of distance constraints.
+ *
+ * @param constraints the list of distance constraints
+ */
+ public void setConstraints(List<DistanceConstraint> constraints) {
+ this.constraints = constraints;
+ }
+
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
index 79c6db4..2f3857d 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedExclusionSpans.java
@@ -14,236 +14,304 @@
import de.ids_mannheim.korap.query.SpanExpansionQuery;
-/** Spans expanded with min m tokens and max n tokens, and throughout all
- * the expansions do not contain the notClause.
+/**
+ * Enumeration of Spans expanded with minimum <code>m</code> and maximum
+ * <code>n</code> tokens, and throughout all the expansions do <em>not</em>
+ * contain a specific Spans (notClause). See examples in
+ * {@link SpanExpansionQuery}.
*
- * @author margaretha
+ * The expansion direction is designated with the sign of a number, namely a
+ * negative number signifies the expansion to the <em>left</em> of the original
+ * span, and a positive number (including 0) signifies the expansion to the
+ * <em>right</em> of the original span.
+ *
+ * @author margaretha
* */
-public class ExpandedExclusionSpans extends SimpleSpans{
-
- private int min, max;
- private int direction;
- private byte classNumber;
- private List<CandidateSpan> candidateSpans;
- private boolean hasMoreNotClause;
- private Spans notClause;
-
- private long matchCost;
-
- public ExpandedExclusionSpans(SpanExpansionQuery spanExpansionQuery,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(spanExpansionQuery, context, acceptDocs, termContexts);
-
- if (spanExpansionQuery.getSecondClause() == null){
- throw new IllegalArgumentException("The SpanExpansionQuery " +
- "is not valid. The spanquery to exclude (notClause) cannot " +
- "be null.");
- }
-
- /*if (spanExpansionQuery.getMin() < 1){
- throw new IllegalArgumentException("Min occurrence for notClause " +
- "must be at least 1.");
- }*/
-
- this.min = spanExpansionQuery.getMin();
- this.max = spanExpansionQuery.getMax();
- this.direction = spanExpansionQuery.getDirection();
- this.classNumber = spanExpansionQuery.getClassNumber();
-
- this.notClause = secondSpans;
- this.hasMoreNotClause = notClause.next();
-
- candidateSpans = new ArrayList<CandidateSpan>();
- hasMoreSpans = firstSpans.next();
- }
+public class ExpandedExclusionSpans extends SimpleSpans {
- @Override
- public boolean next() throws IOException {
- matchPayload.clear();
- isStartEnumeration = false;
- return advance();
- }
+ private int min, max;
+ private int direction;
+ private byte classNumber;
+ private List<CandidateSpan> candidateSpans;
+ private boolean hasMoreNotClause;
+ private Spans notClause;
- private boolean advance() throws IOException {
- while (hasMoreSpans || candidateSpans.size() > 0){
- if (candidateSpans.size() > 0){
- // set a candidate span as a match
- CandidateSpan cs = candidateSpans.get(0);
- matchDocNumber = cs.getDoc();
- matchStartPosition = cs.getStart();
- matchEndPosition = cs.getEnd();
- matchPayload = cs.getPayloads();
- matchCost = cs.getCost() + notClause.cost();
- candidateSpans.remove(0);
- return true;
- }
- else if (!hasMoreNotClause || notClause.doc() > firstSpans.doc()){
- generateCandidates(min, max, direction);
- hasMoreSpans = firstSpans.next();
- }
- else findMatches();
- }
- return false;
- }
-
- private void findMatches() throws IOException {
- while (hasMoreNotClause && notClause.doc() <= firstSpans.doc()){
- if (notClause.doc() == firstSpans.doc()){
- if (direction < 0 ){ // left
- expandLeft();
- } // right
- else { expandRight(); }
- break;
- }
- else if (!notClause.next()) hasMoreNotClause = false;
- }
- }
-
- private void expandLeft() throws IOException{
- //int counter = max;
- int maxPos = max;
- CandidateSpan lastNotClause = null;
- while (hasMoreNotClause &&
- notClause.start() < firstSpans.start()){
-
- // between max and firstspan.start()
- if (notClause.start() >= firstSpans.start() - maxPos){
- maxPos = firstSpans.start() - notClause.start() -1;
- lastNotClause = new CandidateSpan(notClause);
- //counter--;
- }
- if (!notClause.next()) hasMoreNotClause = false;
- }
-
- // if a notClause is between max and firstspan.start,
- // then maxPos = last NotClause pos -1
- generateCandidates(min, maxPos, direction);
-
- if (lastNotClause != null)
- while ((hasMoreSpans = firstSpans.next())
- // the next notClause is not in between max and firstspan.start()
- && notClause.start() > firstSpans.start()
- // the last notClause is in between max and firstspan.start()
- && lastNotClause.getStart() < firstSpans.start()
- && lastNotClause.getStart() >= firstSpans.start() - max
- ){
-
- maxPos = firstSpans.start() - lastNotClause.getStart() -1;
- generateCandidates(min, maxPos, direction);
- }
- else hasMoreSpans = firstSpans.next();
- }
-
- private void expandRight() throws IOException{
- int expansionEnd = firstSpans.end() + max;
- int maxPos = max;
- boolean isFound = false;
-
- CandidateSpan firstNotClause = null;
- //System.out.println("main start:"+firstSpans.start());
- while (hasMoreNotClause && notClause.start() < expansionEnd){
- // between firstspan.end() and expansionEnd
- if (!isFound && notClause.start() >= firstSpans.end()){
- maxPos = notClause.start() - firstSpans.end() -1;
- firstNotClause = new CandidateSpan(notClause);
- isFound = true;
- }
- if (!notClause.next()) hasMoreNotClause = false;
- }
- // if a notClause is between firstSpan.end and max
- // then maxPos = the first notClause pos -1
- generateCandidates(min, maxPos, direction);
-
- if (firstNotClause !=null){
- while ((hasMoreSpans = firstSpans.next())
- // in between
- && firstNotClause.getStart() < firstSpans.end() + max
- && firstNotClause.getStart() >= firstSpans.end())
- {
- //System.out.println("first start:"+firstNotClause.getStart()+", main start:"+firstSpans.start());
- maxPos = firstNotClause.getStart() - firstSpans.end() -1;
- generateCandidates(min, maxPos, direction);
- }
- }
- else hasMoreSpans = firstSpans.next();
- }
-
- private void generateCandidates(int minPos, int maxPos, int direction)
- throws IOException {
- int counter;
- int start, end;
- CandidateSpan cs;
- if (direction < 0 ) { // left
- counter = maxPos;
- while (counter >= min){
- start = Math.max(0,firstSpans.start() - counter);
- if (start > -1 ){
- end = firstSpans.end();
- //System.out.println(start+","+end);
- cs = new CandidateSpan(start, end, firstSpans.doc(),
- firstSpans.cost(),
- calculatePayload(start,firstSpans.start())
- );
- candidateSpans.add(cs);
- }
- counter --;
- }
- }
- else { // right
- counter = minPos;
- while(counter <= maxPos){
- start = firstSpans.start();
- end = firstSpans.end() + counter;
- //System.out.println(start+","+end);
-
- cs = new CandidateSpan(start, end, firstSpans.doc(),
- firstSpans.cost(),
- calculatePayload(firstSpans.end(), end)
- );
- candidateSpans.add(cs);
- counter++;
- }
- }
- }
-
- private ArrayList<byte[]> calculatePayload(int start, int end)
- throws IOException{
-
- ArrayList<byte[]> payload = new ArrayList<byte[]>();
-
- if (firstSpans.isPayloadAvailable()){
- payload.addAll(firstSpans.getPayload());
- }
- if (classNumber > 0 ){
- //System.out.println("Extension offsets "+start+","+end);
- payload.add(calculateExtensionOffsets(start, end));
- }
- return payload;
- }
-
- private byte[] calculateExtensionOffsets(int start, int end) {
- ByteBuffer buffer = ByteBuffer.allocate(9);
- buffer.putInt(start);
- buffer.putInt(end);
- buffer.put(classNumber);
- return buffer.array();
- }
-
- @Override
- public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (firstSpans.doc() < target)){
- if (!firstSpans.skipTo(target)){
- hasMoreSpans = false;
- return false;
- }
- }
- matchPayload.clear();
- return advance();
- }
+ private long matchCost;
- @Override
- public long cost() {
- return matchCost;
- }
+ /**
+ * Constructs ExpandedExclusionSpans from the given
+ * {@link SpanExpansionQuery}.
+ *
+ * @param spanExpansionQuery a SpanExpansionQuery
+ * @param context
+ * @param acceptDocs
+ * @param termContexts
+ * @throws IOException
+ */
+ public ExpandedExclusionSpans(SpanExpansionQuery spanExpansionQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(spanExpansionQuery, context, acceptDocs, termContexts);
+
+ if (spanExpansionQuery.getSecondClause() == null) {
+ throw new IllegalArgumentException(
+ "The SpanExpansionQuery "
+ + "is not valid. The spanquery to exclude (notClause) cannot "
+ + "be null.");
+ }
+
+ /*
+ * if (spanExpansionQuery.getMin() < 1){ throw new
+ * IllegalArgumentException("Min occurrence for notClause " +
+ * "must be at least 1."); }
+ */
+
+ this.min = spanExpansionQuery.getMin();
+ this.max = spanExpansionQuery.getMax();
+ this.direction = spanExpansionQuery.getDirection();
+ this.classNumber = spanExpansionQuery.getClassNumber();
+
+ this.notClause = secondSpans;
+ this.hasMoreNotClause = notClause.next();
+
+ candidateSpans = new ArrayList<CandidateSpan>();
+ hasMoreSpans = firstSpans.next();
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ matchPayload.clear();
+ isStartEnumeration = false;
+ return advance();
+ }
+
+ /**
+ * Advances the ExpandedExclusionSpans to the next match.
+ *
+ * @return <code>true</code> if a match is found, <code>false</code>
+ * otherwise.
+ * @throws IOException
+ */
+ private boolean advance() throws IOException {
+ while (hasMoreSpans || candidateSpans.size() > 0) {
+ if (candidateSpans.size() > 0) {
+ // set a candidate span as a match
+ CandidateSpan cs = candidateSpans.get(0);
+ matchDocNumber = cs.getDoc();
+ matchStartPosition = cs.getStart();
+ matchEndPosition = cs.getEnd();
+ matchPayload = cs.getPayloads();
+ matchCost = cs.getCost() + notClause.cost();
+ candidateSpans.remove(0);
+ return true;
+ } else if (!hasMoreNotClause || notClause.doc() > firstSpans.doc()) {
+ generateCandidates(min, max, direction);
+ hasMoreSpans = firstSpans.next();
+ } else
+ findMatches();
+ }
+ return false;
+ }
+
+ /**
+ * Finds matches by expanding the firstspans either to the left or to the
+ * right.
+ *
+ * @throws IOException
+ */
+ private void findMatches() throws IOException {
+ while (hasMoreNotClause && notClause.doc() <= firstSpans.doc()) {
+ if (notClause.doc() == firstSpans.doc()) {
+ if (direction < 0) { // left
+ expandLeft();
+ } // right
+ else {
+ expandRight();
+ }
+ break;
+ } else if (!notClause.next())
+ hasMoreNotClause = false;
+ }
+ }
+
+ /**
+ * Expands the firstspans to the left.
+ *
+ * @throws IOException
+ */
+ private void expandLeft() throws IOException {
+ //int counter = max;
+ int maxPos = max;
+ CandidateSpan lastNotClause = null;
+ while (hasMoreNotClause && notClause.start() < firstSpans.start()) {
+
+ // between max and firstspan.start()
+ if (notClause.start() >= firstSpans.start() - maxPos) {
+ maxPos = firstSpans.start() - notClause.start() - 1;
+ lastNotClause = new CandidateSpan(notClause);
+ //counter--;
+ }
+ if (!notClause.next())
+ hasMoreNotClause = false;
+ }
+
+ // if a notClause is between max and firstspan.start,
+ // then maxPos = last NotClause pos -1
+ generateCandidates(min, maxPos, direction);
+
+ if (lastNotClause != null)
+ while ((hasMoreSpans = firstSpans.next())
+ // the next notClause is not in between max and firstspan.start()
+ && notClause.start() > firstSpans.start()
+ // the last notClause is in between max and firstspan.start()
+ && lastNotClause.getStart() < firstSpans.start()
+ && lastNotClause.getStart() >= firstSpans.start() - max) {
+
+ maxPos = firstSpans.start() - lastNotClause.getStart() - 1;
+ generateCandidates(min, maxPos, direction);
+ }
+ else
+ hasMoreSpans = firstSpans.next();
+ }
+
+ /**
+ * Expands the firstspans to the right.
+ *
+ * @throws IOException
+ */
+ private void expandRight() throws IOException {
+ int expansionEnd = firstSpans.end() + max;
+ int maxPos = max;
+ boolean isFound = false;
+
+ CandidateSpan firstNotClause = null;
+ //System.out.println("main start:"+firstSpans.start());
+ while (hasMoreNotClause && notClause.start() < expansionEnd) {
+ // between firstspan.end() and expansionEnd
+ if (!isFound && notClause.start() >= firstSpans.end()) {
+ maxPos = notClause.start() - firstSpans.end() - 1;
+ firstNotClause = new CandidateSpan(notClause);
+ isFound = true;
+ }
+ if (!notClause.next())
+ hasMoreNotClause = false;
+ }
+ // if a notClause is between firstSpan.end and max
+ // then maxPos = the first notClause pos -1
+ generateCandidates(min, maxPos, direction);
+
+ if (firstNotClause != null) {
+ while ((hasMoreSpans = firstSpans.next())
+ // in between
+ && firstNotClause.getStart() < firstSpans.end() + max
+ && firstNotClause.getStart() >= firstSpans.end()) {
+ //System.out.println("first start:"+firstNotClause.getStart()+", main start:"+firstSpans.start());
+ maxPos = firstNotClause.getStart() - firstSpans.end() - 1;
+ generateCandidates(min, maxPos, direction);
+ }
+ } else
+ hasMoreSpans = firstSpans.next();
+ }
+
+ /**
+ * Creates new candidate matches for the given direction, minimum and
+ * maximum positions.
+ *
+ * @param minPos minimum position
+ * @param maxPos maximum position
+ * @param direction the expansion direction
+ * @throws IOException
+ */
+ private void generateCandidates(int minPos, int maxPos, int direction)
+ throws IOException {
+ int counter;
+ int start, end;
+ CandidateSpan cs;
+ if (direction < 0) { // left
+ counter = maxPos;
+ while (counter >= min) {
+ start = Math.max(0, firstSpans.start() - counter);
+ if (start > -1) {
+ end = firstSpans.end();
+ //System.out.println(start+","+end);
+ cs = new CandidateSpan(start, end, firstSpans.doc(),
+ firstSpans.cost(), createPayloads(start,
+ firstSpans.start()));
+ candidateSpans.add(cs);
+ }
+ counter--;
+ }
+ } else { // right
+ counter = minPos;
+ while (counter <= maxPos) {
+ start = firstSpans.start();
+ end = firstSpans.end() + counter;
+ //System.out.println(start+","+end);
+
+ cs = new CandidateSpan(start, end, firstSpans.doc(),
+ firstSpans.cost(), createPayloads(firstSpans.end(),
+ end));
+ candidateSpans.add(cs);
+ counter++;
+ }
+ }
+ }
+
+ /**
+ * Creates payloads for a candiate match by copying the payloads of the
+ * firstspans, and adds expansion offsets with the given start and end
+ * positions to the payloads, if the class number is set.
+ *
+ * @param start the start offset
+ * @param end the end offset
+ * @return payloads
+ * @throws IOException
+ */
+ private ArrayList<byte[]> createPayloads(int start, int end)
+ throws IOException {
+
+ ArrayList<byte[]> payload = new ArrayList<byte[]>();
+
+ if (firstSpans.isPayloadAvailable()) {
+ payload.addAll(firstSpans.getPayload());
+ }
+ if (classNumber > 0) {
+ //System.out.println("Extension offsets "+start+","+end);
+ payload.add(createExtensionPayloads(start, end));
+ }
+ return payload;
+ }
+
+ /**
+ * Generates a byte array of extension offsets and class number to be added
+ * into the payloads.
+ *
+ * @param start the start offset
+ * @param end the end offset
+ * @return a byte array of extension offsets and class number
+ */
+ private byte[] createExtensionPayloads(int start, int end) {
+ ByteBuffer buffer = ByteBuffer.allocate(9);
+ buffer.putInt(start);
+ buffer.putInt(end);
+ buffer.put(classNumber);
+ return buffer.array();
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)) {
+ if (!firstSpans.skipTo(target)) {
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ matchPayload.clear();
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return matchCost;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
index 81032a6..4e4f551 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
@@ -13,134 +13,178 @@
import de.ids_mannheim.korap.query.SpanExpansionQuery;
-/** Spans expanded with min m tokens and max n tokens.
+/**
+ * Enumeration of spans expanded with minimum <code>m</code> and maximum
+ * <code>n</code> token positions to either left or right direction from the
+ * original spans. See examples in {@link SpanExpansionQuery}.
+ *
* @author margaretha
* */
-public class ExpandedSpans extends SimpleSpans{
-
- private int min, max;
- private byte classNumber;
- private int direction;
- private List<CandidateSpan> candidateSpans;
- private long matchCost;
-
- public ExpandedSpans(SpanExpansionQuery spanExpansionQuery,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(spanExpansionQuery, context, acceptDocs, termContexts);
- this.min = spanExpansionQuery.getMin();
- this.max = spanExpansionQuery.getMax();
- this.direction = spanExpansionQuery.getDirection();
- this.classNumber = spanExpansionQuery.getClassNumber();
-
- candidateSpans = new ArrayList<CandidateSpan>();
- hasMoreSpans = true;
- }
+public class ExpandedSpans extends SimpleSpans {
- @Override
- public boolean next() throws IOException {
- matchPayload.clear();
- isStartEnumeration = false;
- if (candidateSpans.size() == 0 && hasMoreSpans)
- hasMoreSpans = firstSpans.next();
- return advance();
- }
+ private int min, max;
+ private byte classNumber;
+ private int direction;
+ private List<CandidateSpan> candidateSpans;
+ private long matchCost;
- private boolean advance() throws IOException {
- while (candidateSpans.size() > 0 || hasMoreSpans) {
- if (candidateSpans.size() > 0 ){
- setMatch(candidateSpans.get(0));
- candidateSpans.remove(0);
- return true;
- }
- else { setCandidateList(); }
- }
- return false;
- }
-
- private void setCandidateList() throws IOException {
- CandidateSpan cs;
- int counter, start, end;
-
- if (direction < 0 ){
- counter = max;
- while (counter >= min ){
- start = Math.max(0,firstSpans.start() - counter);
- cs = new CandidateSpan(
- start,
- firstSpans.end(),
- firstSpans.doc(),
- firstSpans.cost(),
- calculatePayload(start, firstSpans.start())
- );
-
- candidateSpans.add(cs);
- counter--;
- }
- }
- else{
- counter = min;
- while (counter <= max){
- // TODO: How do I know if the end is already too far (over the end of the doc)?
- end = firstSpans.end() + counter;
- cs = new CandidateSpan(
- firstSpans.start(),
- end,
- firstSpans.doc(),
- firstSpans.cost(),
- calculatePayload(firstSpans.end(), end)
- );
- candidateSpans.add(cs);
- counter++;
- }
- }
- }
-
- private ArrayList<byte[]> calculatePayload(int start, int end)
- throws IOException{
-
- ArrayList<byte[]> payload = new ArrayList<byte[]>();
- if (firstSpans.isPayloadAvailable()){
- payload.addAll(firstSpans.getPayload());
- }
- if (classNumber > 0 ){
- //System.out.println("Extension offsets "+start+","+end);
- payload.add(calculateExtensionOffsets(start, end));
- }
- return payload;
- }
-
- private byte[] calculateExtensionOffsets(int start, int end) {
- ByteBuffer buffer = ByteBuffer.allocate(9);
- buffer.putInt(start);
- buffer.putInt(end);
- buffer.put(classNumber);
- return buffer.array();
- }
+ /**
+ * Constructs ExpandedSpans from the given {@link SpanExpansionQuery}.
+ *
+ * @param spanExpansionQuery a SpanExpansionQuery
+ * @param context
+ * @param acceptDocs
+ * @param termContexts
+ * @throws IOException
+ */
+ public ExpandedSpans(SpanExpansionQuery spanExpansionQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(spanExpansionQuery, context, acceptDocs, termContexts);
+ this.min = spanExpansionQuery.getMin();
+ this.max = spanExpansionQuery.getMax();
+ this.direction = spanExpansionQuery.getDirection();
+ this.classNumber = spanExpansionQuery.getClassNumber();
- private void setMatch(CandidateSpan candidateSpan) {
- matchDocNumber = candidateSpan.getDoc();
- matchStartPosition = candidateSpan.getStart();
- matchEndPosition = candidateSpan.getEnd();
- matchPayload = candidateSpan.getPayloads();
- matchCost = candidateSpan.getCost();
- }
+ candidateSpans = new ArrayList<CandidateSpan>();
+ hasMoreSpans = true;
+ }
- @Override
- public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (firstSpans.doc() < target)){
- if (!firstSpans.skipTo(target)){
- hasMoreSpans = false;
- return false;
- }
- }
- matchPayload.clear();
- return advance();
- }
+ @Override
+ public boolean next() throws IOException {
+ matchPayload.clear();
+ isStartEnumeration = false;
+ if (candidateSpans.size() == 0 && hasMoreSpans)
+ hasMoreSpans = firstSpans.next();
+ return advance();
+ }
- @Override
- public long cost() {
- return matchCost;
- }
+ /**
+ * Advances the ExpandedSpans to the next match by setting the first element
+ * in the candidateList as the match. Set the candidateList, if it is empty
+ *
+ * @return <code>true</code> if a match is found, <code>false</code>
+ * otherwise.
+ * @throws IOException
+ */
+ private boolean advance() throws IOException {
+ while (candidateSpans.size() > 0 || hasMoreSpans) {
+ if (candidateSpans.size() > 0) {
+ setMatch(candidateSpans.get(0));
+ candidateSpans.remove(0);
+ return true;
+ } else {
+ setCandidateList();
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Sets the candidateList by adding new candidate match spans for all
+ * possible expansion with respect to the expansion length (min,max)
+ * variables.
+ *
+ * @throws IOException
+ */
+ private void setCandidateList() throws IOException {
+ CandidateSpan cs;
+ int counter, start, end;
+
+ if (direction < 0) {
+ counter = max;
+ while (counter >= min) {
+ start = Math.max(0, firstSpans.start() - counter);
+ cs = new CandidateSpan(start, firstSpans.end(),
+ firstSpans.doc(), firstSpans.cost(), createPayloads(
+ start, firstSpans.start()));
+
+ candidateSpans.add(cs);
+ counter--;
+ }
+ } else {
+ counter = min;
+ while (counter <= max) {
+ // TODO: How do I know if the end is already too far (over the end of the doc)?
+ end = firstSpans.end() + counter;
+ cs = new CandidateSpan(firstSpans.start(), end,
+ firstSpans.doc(), firstSpans.cost(), createPayloads(
+ firstSpans.end(), end));
+ candidateSpans.add(cs);
+ counter++;
+ }
+ }
+ }
+
+ /**
+ * Prepares the payloads for a candidate match (ExpandedSpans). If the class
+ * number is set, the extension offsets with the given start and end
+ * positions are to be stored in the payloads.
+ *
+ * @param start
+ * @param end
+ * @return the payloads for a candidaete match
+ * @throws IOException
+ */
+ private ArrayList<byte[]> createPayloads(int start, int end)
+ throws IOException {
+
+ ArrayList<byte[]> payload = new ArrayList<byte[]>();
+ if (firstSpans.isPayloadAvailable()) {
+ payload.addAll(firstSpans.getPayload());
+ }
+ if (classNumber > 0) {
+ //System.out.println("Extension offsets "+start+","+end);
+ payload.add(createExtensionPayloads(start, end));
+ }
+ return payload;
+ }
+
+ /**
+ * Prepares a byte array of extension offsets with the given start and end
+ * positions and the class number, to be stored in payloads.
+ *
+ * @param start
+ * @param end
+ * @return a byte array of extension offsets and the class number
+ */
+ private byte[] createExtensionPayloads(int start, int end) {
+ ByteBuffer buffer = ByteBuffer.allocate(9);
+ buffer.putInt(start);
+ buffer.putInt(end);
+ buffer.put(classNumber);
+ return buffer.array();
+ }
+
+ /**
+ * Sets the properties of the given candidate match span as the current
+ * match (state of ExpandedSpans).
+ *
+ * @param candidateSpan
+ */
+ private void setMatch(CandidateSpan candidateSpan) {
+ matchDocNumber = candidateSpan.getDoc();
+ matchStartPosition = candidateSpan.getStart();
+ matchEndPosition = candidateSpan.getEnd();
+ matchPayload = candidateSpan.getPayloads();
+ matchCost = candidateSpan.getCost();
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)) {
+ if (!firstSpans.skipTo(target)) {
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ matchPayload.clear();
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return matchCost;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
index 0dbd06e..f2edd23 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
@@ -8,151 +8,177 @@
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
-/** Span enumeration of matches whose two sub-spans have exactly the same
- * first and second sub-sub-spans. This class basically filters the span
- * matches of its child spans.
+/**
+ * Span enumeration of matches whose two sub-spans have exactly the same first
+ * and second sub-sub-spans. To obtain these matches, the span matches of the
+ * child spans are filtered.
*
- * This accommodates distance constraint with exclusion.
- * Case 1: return the match from another non-exclusion constraint.
- * Case 2: return only the first-span when all constraints are exclusions.
- * Case 3: spans are not in the same doc
- *
- * @author margaretha
+ * MultipleDistanceSpans accommodates distance constraint with exclusion. <br />
+ * <br />
+ *
+ * This class deals with the following cases:
+ * <ol>
+ * <li>return the match from another non-exclusion constraint.</li>
+ * <li>return only the first-span when all constraints are exclusions.</li>
+ * <li>spans are not in the same doc</li>
+ * </ol>
+ *
+ * @author margaretha
* */
-public class MultipleDistanceSpans extends DistanceSpans{
+public class MultipleDistanceSpans extends DistanceSpans {
- private final static Logger log = LoggerFactory.getLogger(MultipleDistanceSpans.class);
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
+ private DistanceSpans x, y;
+ private boolean isOrdered;
- private DistanceSpans x,y;
- private boolean isOrdered;
-
- public MultipleDistanceSpans(SpanMultipleDistanceQuery query,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts, Spans firstSpans,
- Spans secondSpans, boolean isOrdered, boolean exclusion)
- throws IOException {
- super(query, context, acceptDocs, termContexts);
- this.isOrdered = isOrdered;
- this.exclusion = exclusion;
- x = (DistanceSpans) firstSpans;
- y = (DistanceSpans) secondSpans;
- hasMoreSpans = x.next() && y.next();
- }
+ /**
+ * Constructs MultipleDistanceSpans for the two given Spans with the given
+ * {@link SpanMultipleDistanceQuery}.
+ *
+ * @param query a SpanMultipleDistanceQuery
+ * @param context
+ * @param acceptDocs
+ * @param termContexts
+ * @param firstSpans the firstspans
+ * @param secondSpans the secondspans
+ * @param isOrdered <code>true</code> if the spans must occur in order,
+ * <code>false</code> otherwise.
+ * @param exclusion <code>true</code> if the secondspans must <em>not</em>
+ * occur together with the firstspans.
+ * @throws IOException
+ */
+ public MultipleDistanceSpans(SpanMultipleDistanceQuery query,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts, Spans firstSpans,
+ Spans secondSpans, boolean isOrdered, boolean exclusion)
+ throws IOException {
+ super(query, context, acceptDocs, termContexts);
+ this.isOrdered = isOrdered;
+ this.exclusion = exclusion;
+ x = (DistanceSpans) firstSpans;
+ y = (DistanceSpans) secondSpans;
+ hasMoreSpans = x.next() && y.next();
+ }
- @Override
- public boolean next() throws IOException {
- isStartEnumeration=false;
- matchPayload.clear();
- return advance();
- }
-
- /** Find the next match.
- * */
- protected boolean advance() throws IOException {
- while (hasMoreSpans && ensureSameDoc(x, y)){
- if (findMatch()){
- moveForward();
- return true;
- }
- moveForward();
- }
- return false;
- }
-
- /** Find the next match of one of the sub/child-span.
- * */
- private void moveForward() throws IOException{
- if (isOrdered){
- if (x.end() < y.end() ||
- (x.end() == y.end() && x.start() < y.start()) )
- hasMoreSpans = x.next();
- else hasMoreSpans = y.next();
- }
- // The matches of unordered distance spans are ordered by the
- // start position
- else {
- if (x.start() < y.start() ||
- (x.start() == y.start() && x.end() < y.end()) )
- hasMoreSpans = x.next();
- else hasMoreSpans = y.next();
- }
- }
-
- /** Check if the sub-spans of x and y having exactly the same position.
- * This is basically an AND operation.
- * @return true iff the sub-spans are identical.
- * */
- protected boolean findMatch() throws IOException {
-
- CandidateSpan xf = x.getMatchFirstSpan();
- CandidateSpan xs = x.getMatchSecondSpan();
-
- CandidateSpan yf = y.getMatchFirstSpan();
- CandidateSpan ys = y.getMatchSecondSpan();
-
- if (x.isExclusion() || y.isExclusion()){
- if (xf.getStart() == yf.getStart() && xf.getEnd() == yf.getEnd()){
- // case 2
- if (x.isExclusion() && y.isExclusion()){
- // set x or y doesnt matter
- setMatchProperties(x,true);
- }
- // case 1
- else if (x.isExclusion()){
- // set y, the usual match
- setMatchProperties(y,true);
- }
- // case 1
- else { setMatchProperties(x,true); }
- return true;
- }
- }
- else if (xf.getStart() == yf.getStart() &&
- xf.getEnd() == yf.getEnd() &&
- xs.getStart() == ys.getStart() &&
- xs.getEnd() == ys.getEnd()){
- setMatchProperties(x,false);
- return true;
- }
- return false;
- }
-
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration = false;
+ matchPayload.clear();
+ return advance();
+ }
- private void setMatchProperties(DistanceSpans span, boolean exclusion) {
- matchStartPosition = span.start();
- matchEndPosition = span.end();
- matchDocNumber = span.doc();
- matchPayload = span.matchPayload;
-
- setMatchFirstSpan(span.getMatchFirstSpan());
- if (!exclusion) setMatchSecondSpan(span.getMatchSecondSpan());
- if (DEBUG)
- log.trace("doc# {}, start {}, end {}",matchDocNumber,
- matchStartPosition,matchEndPosition);
- }
+ /**
+ * Finds the next match.
+ * */
+ protected boolean advance() throws IOException {
+ while (hasMoreSpans && ensureSameDoc(x, y)) {
+ if (findMatch()) {
+ moveForward();
+ return true;
+ }
+ moveForward();
+ }
+ return false;
+ }
- @Override
- public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (y.doc() < target)){
- if (!y.skipTo(target)){
- return false;
- }
- }
- matchPayload.clear();
- isStartEnumeration=false;
- return advance();
- }
+ /**
+ * Finds the next match of one of the sub/child-span.
+ *
+ * @throws IOException
+ */
+ private void moveForward() throws IOException {
+ if (isOrdered) {
+ if (x.end() < y.end()
+ || (x.end() == y.end() && x.start() < y.start()))
+ hasMoreSpans = x.next();
+ else
+ hasMoreSpans = y.next();
+ }
+ // The matches of unordered distance spans are ordered by the
+ // start position
+ else {
+ if (x.start() < y.start()
+ || (x.start() == y.start() && x.end() < y.end()))
+ hasMoreSpans = x.next();
+ else
+ hasMoreSpans = y.next();
+ }
+ }
- @Override
- public long cost() {
- return x.cost() + y.cost();
- }
+ /**
+ * Checks if the sub-spans of x and y having exactly the same position. This
+ * is basically an AND operation.
+ *
+ * @return true iff the sub-spans are identical.
+ * @throws IOException
+ */
+ protected boolean findMatch() throws IOException {
+
+ CandidateSpan xf = x.getMatchFirstSpan();
+ CandidateSpan xs = x.getMatchSecondSpan();
+
+ CandidateSpan yf = y.getMatchFirstSpan();
+ CandidateSpan ys = y.getMatchSecondSpan();
+
+ if (x.isExclusion() || y.isExclusion()) {
+ if (xf.getStart() == yf.getStart() && xf.getEnd() == yf.getEnd()) {
+ // case 2
+ if (x.isExclusion() && y.isExclusion()) {
+ // set x or y doesnt matter
+ setMatchProperties(x, true);
+ }
+ // case 1
+ else if (x.isExclusion()) {
+ // set y, the usual match
+ setMatchProperties(y, true);
+ }
+ // case 1
+ else {
+ setMatchProperties(x, true);
+ }
+ return true;
+ }
+ } else if (xf.getStart() == yf.getStart() && xf.getEnd() == yf.getEnd()
+ && xs.getStart() == ys.getStart() && xs.getEnd() == ys.getEnd()) {
+ setMatchProperties(x, false);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * @param span a DistanceSpan
+ * @param exclusion <code>true</code> if the spans must <em>not</em> occur
+ * together, <code>false</code> otherwise.
+ */
+ private void setMatchProperties(DistanceSpans span, boolean exclusion) {
+ matchStartPosition = span.start();
+ matchEndPosition = span.end();
+ matchDocNumber = span.doc();
+ matchPayload = span.matchPayload;
+
+ setMatchFirstSpan(span.getMatchFirstSpan());
+ if (!exclusion)
+ setMatchSecondSpan(span.getMatchSecondSpan());
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (y.doc() < target)) {
+ if (!y.skipTo(target)) {
+ return false;
+ }
+ }
+ matchPayload.clear();
+ isStartEnumeration = false;
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return x.cost() + y.cost();
+ }
}