customize toString() for various spanqueries,
add DistanceExclusionSpan,
update DistanceConstraint (also in SpanSequenceQueryWrapper)
diff --git a/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java b/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java
index 686f3dc..3617066 100644
--- a/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java
+++ b/src/main/java/de/ids_mannheim/korap/query/DistanceConstraint.java
@@ -1,22 +1,22 @@
package de.ids_mannheim.korap.query;
-import org.apache.lucene.search.spans.SpanQuery;
-
public class DistanceConstraint {
- int minDistance, maxDistance;
- String unit;
- SpanQuery elementQuery;
+ private int minDistance, maxDistance;
+ private String unit;
+ private SpanElementQuery elementQuery;
+ private boolean exclusion;
- public DistanceConstraint(String unit, int min, int max) {
+ public DistanceConstraint(String unit, int min, int max, boolean exclusion) {
this.unit = unit;
this.minDistance = min;
this.maxDistance = max;
+ this.exclusion = exclusion;
}
- public DistanceConstraint(SpanQuery elementQuery, String unit,
- int min, int max) {
- this(unit, min, max);
- this.elementQuery = elementQuery;
+ public DistanceConstraint(SpanElementQuery elementQuery, String unit,
+ int min, int max, boolean exclusion) {
+ this(unit, min, max, exclusion);
+ this.elementQuery = elementQuery;
}
public int getMinDistance() {
@@ -37,10 +37,18 @@
public void setUnit(String unit) {
this.unit = unit;
}
- public SpanQuery getElementQuery() {
+ public SpanElementQuery getElementQuery() {
return elementQuery;
}
- public void setElementQuery(SpanQuery elementQuery) {
+ public void setElementQuery(SpanElementQuery elementQuery) {
this.elementQuery = elementQuery;
}
+
+ public boolean isExclusion() {
+ return exclusion;
+ }
+
+ public void setExclusion(boolean exclusion) {
+ this.exclusion = exclusion;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
index d7dcc02..ba398be 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SimpleSpanQuery.java
@@ -17,18 +17,16 @@
protected SpanQuery firstClause, secondClause;
private String field;
- private String spanName;
protected boolean collectPayloads;
public SimpleSpanQuery(SpanQuery firstClause, SpanQuery secondClause,
- String spanName, boolean collectPayloads) {
+ boolean collectPayloads) {
this.field = secondClause.getField();
if (!firstClause.getField().equals(field)){
throw new IllegalArgumentException("Clauses must have the same field.");
}
this.setFirstClause(firstClause);
- this.setSecondClause(secondClause);
- this.spanName=spanName;
+ this.setSecondClause(secondClause);
this.collectPayloads = collectPayloads;
}
@@ -36,20 +34,7 @@
public String getField() {
return field;
}
-
- @Override
- public String toString(String field) {
- StringBuilder sb = new StringBuilder();
- sb.append(this.spanName);
- sb.append("(");
- sb.append(firstClause.toString(field));
- sb.append(", ");
- sb.append(secondClause.toString(field));
- sb.append(")");
- sb.append(ToStringUtils.boost(getBoost()));
- return sb.toString();
- }
-
+
public SpanQuery getFirstClause() {
return firstClause;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java
index 1915336..95ff5cb 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanDistanceQuery.java
@@ -9,7 +9,9 @@
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ToStringUtils;
+import de.ids_mannheim.korap.query.spans.DistanceExclusionSpan;
import de.ids_mannheim.korap.query.spans.ElementDistanceSpans;
import de.ids_mannheim.korap.query.spans.TokenDistanceSpans;
import de.ids_mannheim.korap.query.spans.UnorderedElementDistanceSpans;
@@ -23,29 +25,61 @@
* */
public class SpanDistanceQuery extends SimpleSpanQuery {
- public boolean isOrdered;
- protected int minDistance, maxDistance;
- private SpanQuery elementQuery; // element distance unit
+ private boolean exclusion;
+ private boolean isOrdered;
+ private int minDistance, maxDistance;
+ private SpanElementQuery elementQuery; // element distance unit
+ private String distanceUnit;
+ private String spanName;
public SpanDistanceQuery(SpanQuery firstClause, SpanQuery secondClause,
int minDistance, int maxDistance, boolean isOrdered,
boolean collectPayloads) {
- super(firstClause, secondClause, "spanDistance",collectPayloads);
- this.minDistance =minDistance;
- this.maxDistance = maxDistance;
- this.isOrdered = isOrdered;
+ super(firstClause, secondClause, collectPayloads);
+ init(minDistance, maxDistance, isOrdered);
+ distanceUnit = "w";
+ spanName = "spanDistance";
}
- public SpanDistanceQuery(SpanQuery elementQuery, SpanQuery firstClause,
+ public SpanDistanceQuery(SpanElementQuery elementQuery, SpanQuery firstClause,
SpanQuery secondClause, int minDistance, int maxDistance,
boolean isOrdered, boolean collectPayloads) {
- super(firstClause, secondClause, "spanElementDistance",collectPayloads);
- this.minDistance =minDistance;
+ super(firstClause, secondClause, collectPayloads);
+ init(minDistance, maxDistance, isOrdered);
+ this.elementQuery = elementQuery;
+ distanceUnit = elementQuery.getElementStr();
+ spanName = "spanElementDistance";
+ }
+
+ private void init(int minDistance, int maxDistance,boolean isOrdered){
+ this.minDistance = minDistance;
this.maxDistance = maxDistance;
this.isOrdered = isOrdered;
- this.elementQuery = elementQuery;
+ this.exclusion = false;
}
-
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(this.spanName);
+ sb.append("(");
+ sb.append(firstClause.toString(field));
+ sb.append(", ");
+ sb.append(secondClause.toString(field));
+ sb.append(", ");
+ sb.append("[(");
+ sb.append(distanceUnit);
+ sb.append("[");
+ sb.append(minDistance);
+ sb.append(":");
+ sb.append(maxDistance);
+ sb.append("], ");
+ sb.append( isOrdered ? "ordered, " : "notOrdered, " );
+ sb.append( exclusion ? "excluded)]" : "notExcluded)])");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
+ }
+
@Override
public SpanDistanceQuery clone() {
SpanDistanceQuery spanDistanceQuery = new SpanDistanceQuery(
@@ -56,29 +90,41 @@
this.isOrdered,
this.collectPayloads
);
-
+
if (this.elementQuery != null) {
spanDistanceQuery.setElementQuery(this.elementQuery);
}
-
+ spanDistanceQuery.setExclusion(this.exclusion);
spanDistanceQuery.setBoost(getBoost());
return spanDistanceQuery;
}
@Override
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
+ Map<Term, TermContext> termContexts) throws IOException {
- if (isOrdered){
- if (this.elementQuery != null) {
- return new ElementDistanceSpans(this, context, acceptDocs, termContexts);
+ if (this.elementQuery != null) {
+ if (isExclusion()){
+
+ }
+ else if (isOrdered){
+ return new ElementDistanceSpans(this, context, acceptDocs,
+ termContexts);
}
- return new TokenDistanceSpans(this, context, acceptDocs, termContexts);
+ return new UnorderedElementDistanceSpans(this, context, acceptDocs,
+ termContexts);
+
}
- else if (this.elementQuery != null) {
- return new UnorderedElementDistanceSpans(this, context, acceptDocs, termContexts);
+ else if (isExclusion()){
+ return new DistanceExclusionSpan(this, context, acceptDocs,
+ termContexts, isOrdered);
}
- return new UnorderedTokenDistanceSpans(this, context, acceptDocs, termContexts);
+ else if (isOrdered) {
+ return new TokenDistanceSpans(this, context, acceptDocs,
+ termContexts);
+ }
+ return new UnorderedTokenDistanceSpans(this, context, acceptDocs,
+ termContexts);
}
public int getMinDistance() {
@@ -97,12 +143,21 @@
this.maxDistance = maxDistance;
}
- public SpanQuery getElementQuery() {
+ public SpanElementQuery getElementQuery() {
return elementQuery;
}
- public void setElementQuery(SpanQuery elementQuery) {
+ public void setElementQuery(SpanElementQuery elementQuery) {
this.elementQuery = elementQuery;
}
+ public boolean isExclusion() {
+ return exclusion;
+ }
+
+ public void setExclusion(boolean exclusion) {
+ this.exclusion = exclusion;
+ }
+
+
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
index 86c5d56..e21910e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
@@ -124,5 +124,13 @@
// element does exist, but has no positions
throw new IllegalStateException("field \"" + element.field() + "\" was indexed without position data; cannot run SpanElementQuery (element=" + element.text() + ")");
- };
+ }
+
+ public String getElementStr() {
+ return elementStr;
+ }
+
+ public void setElementStr(String elementStr) {
+ this.elementStr = elementStr;
+ };
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
index 60e9a2c..08628ff 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanMultipleDistanceQuery.java
@@ -10,6 +10,7 @@
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ToStringUtils;
import de.ids_mannheim.korap.query.spans.MultipleDistanceSpans;
@@ -23,13 +24,15 @@
private List<DistanceConstraint> constraints;
private boolean isOrdered;
+ private String spanName;
public SpanMultipleDistanceQuery(SpanQuery firstClause, SpanQuery secondClause,
List<DistanceConstraint> constraints, boolean isOrdered,
boolean collectPayloads) {
- super(firstClause, secondClause, "spanMultipleDistance",collectPayloads);
+ super(firstClause, secondClause, collectPayloads);
this.constraints = constraints;
this.isOrdered = isOrdered;
+ spanName = "spanMultipleDistance";
}
@Override
@@ -44,7 +47,38 @@
query.setBoost(getBoost());
return query;
- }
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(this.spanName);
+ sb.append("(");
+ sb.append(firstClause.toString(field));
+ sb.append(", ");
+ sb.append(secondClause.toString(field));
+ sb.append(", ");
+ sb.append("[");
+
+ DistanceConstraint c;
+ int size = constraints.size();
+ for (int i=0; i < size; i++){
+ c = constraints.get(i);
+ sb.append("(");
+ sb.append(c.getUnit());
+ sb.append("[");
+ sb.append(c.getMinDistance());
+ sb.append(":");
+ sb.append(c.getMaxDistance());
+ sb.append("], ");
+ sb.append( isOrdered ? "ordered, " : "notOrdered, " );
+ sb.append( c.isExclusion() ? "excluded)]" : "notExcluded)");
+ if (i < size-1) sb.append(", ");
+ }
+ sb.append("])");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
+ }
/** Filter the span matches of each constraint, returning only the matches
* meeting all the constraints.
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
index 68d35f8..22c55e7 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanNextQuery.java
@@ -15,6 +15,7 @@
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ToStringUtils;
import de.ids_mannheim.korap.query.spans.NextSpans;
@@ -23,7 +24,9 @@
*/
public class SpanNextQuery extends SimpleSpanQuery implements Cloneable {
- // Constructor
+ private String spanName;
+
+ // Constructor
public SpanNextQuery(SpanQuery firstClause, SpanQuery secondClause) {
this(firstClause, secondClause, true);
};
@@ -31,7 +34,8 @@
// Constructor
public SpanNextQuery(SpanQuery firstClause, SpanQuery secondClause,
boolean collectPayloads) {
- super(firstClause, secondClause, "spanNext",collectPayloads);
+ super(firstClause, secondClause, collectPayloads);
+ this.spanName = "spanNext";
};
@@ -52,6 +56,19 @@
spanNextQuery.setBoost(getBoost());
return spanNextQuery;
};
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(this.spanName);
+ sb.append("(");
+ sb.append(firstClause.toString(field));
+ sb.append(", ");
+ sb.append(secondClause.toString(field));
+ sb.append(")");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
+ }
/** Returns true iff <code>o</code> is equal to this. */
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
index 632c6f6..9e0741e 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSegmentQuery.java
@@ -4,13 +4,12 @@
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ToStringUtils;
import de.ids_mannheim.korap.query.spans.SegmentSpans;
@@ -19,13 +18,16 @@
* */
public class SpanSegmentQuery extends SimpleSpanQuery{
+ private String spanName;
+
public SpanSegmentQuery(SpanQuery firstClause, SpanQuery secondClause) {
this(firstClause,secondClause,true);
}
public SpanSegmentQuery(SpanQuery firstClause, SpanQuery secondClause,
boolean collectPayloads) {
- super(firstClause,secondClause,"spanSegment",collectPayloads);
+ super(firstClause,secondClause,collectPayloads);
+ spanName = "spanSegment";
}
@Override
@@ -46,6 +48,19 @@
return spanSegmentQuery;
}
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(this.spanName);
+ sb.append("(");
+ sb.append(firstClause.toString(field));
+ sb.append(", ");
+ sb.append(secondClause.toString(field));
+ sb.append(")");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
+ }
+
//TODO: Where is the hashmap?
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java
new file mode 100644
index 0000000..cbf6b45
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpan.java
@@ -0,0 +1,134 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.SpanDistanceQuery;
+
+/** Span enumeration of the first spans which do NOT occur
+ * together with the second spans within a range of distance.
+ *
+ * @author margaretha
+ * */
+public class DistanceExclusionSpan extends DistanceSpans{
+
+ private int minDistance, maxDistance;
+ private boolean isOrdered;
+ private boolean hasMoreSecondSpans;
+
+ public DistanceExclusionSpan(SpanDistanceQuery query,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts, boolean isOrdered)
+ throws IOException {
+ super(query, context, acceptDocs, termContexts);
+ minDistance = query.getMinDistance();
+ maxDistance = query.getMaxDistance();
+ this.isOrdered = isOrdered;
+ hasMoreSpans = firstSpans.next();
+ hasMoreSecondSpans = secondSpans.next();
+ }
+
+ @Override
+ protected boolean advance() throws IOException {
+
+ while(hasMoreSpans){
+ if (hasMoreSecondSpans && forward())
+ continue;
+
+ if (findMatch()){
+ hasMoreSpans = firstSpans.next();
+ return true;
+ }
+ hasMoreSpans = firstSpans.next();
+ }
+ return false;
+ }
+
+ private boolean forward() throws IOException{
+
+ if (secondSpans.doc() <= firstSpans.doc() &&
+ firstSpans.start() >= secondSpans.end()){
+
+ if (isOrdered){
+ hasMoreSecondSpans = secondSpans.next();
+ return true;
+ }
+
+ else {
+ int actualDistance = calculateActualDistance();
+ if (actualDistance > maxDistance){
+ hasMoreSecondSpans = secondSpans.next();
+ return true;
+ }
+ }
+
+ }
+
+ return false;
+ }
+
+ private int calculateActualDistance(){
+ // right secondSpan
+ if (firstSpans.end() <= secondSpans.start())
+ return secondSpans.start() - firstSpans.end() +1;
+ // left secondSpan
+ return firstSpans.start() - secondSpans.end() +1;
+ }
+
+ private boolean findMatch() throws IOException {
+ if (!hasMoreSecondSpans){
+ setMatchProperties();
+ return true;
+ }
+ if (minDistance == 0 &&
+ firstSpans.start() < secondSpans.end() &&
+ secondSpans.start() < firstSpans.end()){
+ return false;
+ }
+
+ int actualDistance = calculateActualDistance();
+ if (actualDistance < minDistance || actualDistance > maxDistance){
+ setMatchProperties();
+ return true;
+ }
+
+ return false;
+ }
+
+ private void setMatchProperties() throws IOException{
+ matchDocNumber = firstSpans.doc();
+ matchStartPosition = firstSpans.start();
+ matchEndPosition = firstSpans.end();
+
+ if (collectPayloads && firstSpans.isPayloadAvailable())
+ matchPayload.addAll(firstSpans.getPayload());
+
+ setMatchFirstSpan(new CandidateSpan(firstSpans));
+ setMatchSecondSpan(new CandidateSpan(secondSpans));
+
+ log.trace("doc# {}, start {}, end {}",matchDocNumber,matchStartPosition,
+ matchEndPosition);
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && firstSpans.doc() < target){
+ if (!firstSpans.skipTo(target)){
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return firstSpans.cost() + secondSpans.cost();
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
index b2f923e..f2169df 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
@@ -11,7 +11,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import de.ids_mannheim.korap.query.SimpleSpanQuery;
+import de.ids_mannheim.korap.query.SpanDistanceQuery;
+import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
/** DistanceSpan is a base class for enumeration of span matches,
* whose two child spans have a specific range of distance (within
@@ -25,7 +26,13 @@
protected CandidateSpan matchFirstSpan,matchSecondSpan;
protected Logger log = LoggerFactory.getLogger(DistanceSpans.class);
- public DistanceSpans(SimpleSpanQuery query,
+ public DistanceSpans(SpanDistanceQuery query,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(query, context, acceptDocs, termContexts);
+ }
+
+ public DistanceSpans(SpanMultipleDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
super(query, context, acceptDocs, termContexts);
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
index f9c5e68..698b88f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/MultipleDistanceSpans.java
@@ -9,7 +9,8 @@
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
-import de.ids_mannheim.korap.query.SimpleSpanQuery;
+import de.ids_mannheim.korap.query.SpanDistanceQuery;
+import de.ids_mannheim.korap.query.SpanMultipleDistanceQuery;
/** Span enumeration of matches whose two sub-spans has exactly the same
* first and second sub-sub-spans. This class basically filters the span
@@ -22,12 +23,26 @@
private DistanceSpans x,y;
private boolean isOrdered;
- public MultipleDistanceSpans(SimpleSpanQuery query,
+ public MultipleDistanceSpans(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts,
Spans firstSpans, Spans secondSpans, boolean isOrdered)
throws IOException {
super(query, context, acceptDocs, termContexts);
+ init(firstSpans, secondSpans, isOrdered);
+ }
+
+ public MultipleDistanceSpans(SpanMultipleDistanceQuery query,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts,
+ Spans firstSpans, Spans secondSpans, boolean isOrdered)
+ throws IOException {
+ super(query, context, acceptDocs, termContexts);
+ init(firstSpans, secondSpans, isOrdered);
+ }
+
+ private void init(Spans firstSpans, Spans secondSpans,
+ boolean isOrdered) throws IOException{
this.isOrdered =isOrdered;
x = (DistanceSpans) firstSpans;
y = (DistanceSpans) secondSpans;
@@ -47,17 +62,17 @@
while (hasMoreSpans && ensureSameDoc(x, y)){
if (findMatch()){
- advanceChild();
+ moveForward();
return true;
}
- advanceChild();
+ moveForward();
}
return false;
}
/** Find the next match of one of the sub/child-span.
* */
- private void advanceChild() throws IOException{
+ private void moveForward() throws IOException{
if (isOrdered){
if (x.end() < y.end() ||
(x.end() == y.end() && x.start() < y.start()) )
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
index 50fd643..e435d50 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
@@ -21,7 +21,6 @@
public abstract class NonPartialOverlappingSpans extends SimpleSpans{
private Logger log = LoggerFactory.getLogger(NonPartialOverlappingSpans.class);
- protected boolean collectPayloads;
public NonPartialOverlappingSpans(SimpleSpanQuery simpleSpanQuery,
AtomicReaderContext context, Bits acceptDocs,
@@ -30,7 +29,7 @@
// Warning: not implemented, results in errors for SpanNextQuery
// This.collectPayloads = simpleSpanQuery.isCollectPayloads()
- this.collectPayloads = true;
+ collectPayloads = true;
hasMoreSpans = secondSpans.next();
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/OrderedDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/OrderedDistanceSpans.java
index 0d4e459..ef29174 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/OrderedDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/OrderedDistanceSpans.java
@@ -18,7 +18,6 @@
public abstract class OrderedDistanceSpans extends DistanceSpans {
protected boolean hasMoreFirstSpans;
- protected boolean collectPayloads;
protected int minDistance,maxDistance;
protected List<CandidateSpan> candidateList;
@@ -34,7 +33,6 @@
minDistance = query.getMinDistance();
maxDistance = query.getMaxDistance();
- collectPayloads = query.isCollectPayloads();
hasMoreFirstSpans = firstSpans.next();
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index 0bacd04..17bb6ba 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -23,6 +23,7 @@
public abstract class SimpleSpans extends Spans{
private SimpleSpanQuery query;
protected boolean isStartEnumeration;
+ protected boolean collectPayloads;
protected boolean hasMoreSpans;
// Warning: enumeration of Spans
@@ -37,6 +38,8 @@
Map<Term,TermContext> termContexts) throws IOException {
query = simpleSpanQuery;
+ collectPayloads = query.isCollectPayloads();
+
matchDocNumber= -1;
matchStartPosition= -1;
matchEndPosition= -1;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
index 3a03560..b4b71d8 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/UnorderedDistanceSpans.java
@@ -23,8 +23,6 @@
public abstract class UnorderedDistanceSpans extends DistanceSpans{
protected int minDistance, maxDistance;
- private boolean collectPayloads;
-
protected boolean hasMoreFirstSpans, hasMoreSecondSpans;
protected List<CandidateSpan> firstSpanList, secondSpanList;
protected List<CandidateSpan> matchList;
diff --git a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
index b746315..d57eb0c 100644
--- a/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
+++ b/src/main/java/de/ids_mannheim/korap/query/wrap/SpanSequenceQueryWrapper.java
@@ -92,14 +92,14 @@
public SpanSequenceQueryWrapper withConstraint (int min, int max) {
if (this.constraints == null)
this.constraints = new ArrayList<DistanceConstraint>(1);
- this.constraints.add(new DistanceConstraint("w", min, max));
+ this.constraints.add(new DistanceConstraint("w", min, max,false));
return this;
};
public SpanSequenceQueryWrapper withConstraint (int min, int max, String unit) {
if (this.constraints == null)
this.constraints = new ArrayList<DistanceConstraint>(1);
- this.constraints.add(new DistanceConstraint(unit, min, max));
+ this.constraints.add(new DistanceConstraint(unit, min, max,false));
return this;
};
@@ -129,7 +129,7 @@
if (!constraint.getUnit().equals("w")) {
for (int i = 1; i < this.segments.size(); i++) {
query = new SpanDistanceQuery(
- (SpanQuery) new SpanElementQuery(this.field, constraint.getUnit()),
+ new SpanElementQuery(this.field, constraint.getUnit()),
query,
this.segments.get(i),
constraint.getMinDistance(),
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
new file mode 100644
index 0000000..b822b34
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
@@ -0,0 +1,86 @@
+package de.ids_mannheim.korap.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.junit.Test;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanDistanceQuery;
+
+public class TestDistanceExclusionIndex {
+
+ private KorapIndex ki;
+ private KorapResult kr;
+
+ @Test
+ public void testCase1() throws IOException{
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc0());
+ ki.addDoc(createFieldDoc1());
+ ki.commit();
+ SpanQuery sq;
+ // ---- Distance 0 to 1
+ sq = createQuery("s:c","s:e",0,1,false);
+ kr = ki.search(sq, (short) 10);
+
+// System.out.println(sq.toString("base"));
+
+// System.out.print(kr.getTotalResults()+"\n");
+// for (int i=0; i< kr.getTotalResults(); i++){
+// System.out.println(
+// kr.match(i).getLocalDocID()+" "+
+// kr.match(i).startPos + " " +
+// kr.match(i).endPos
+// );
+// }
+ }
+
+ private SpanQuery createQuery(String x, String y, int min, int max, boolean isOrdered){
+ SpanDistanceQuery sq = new SpanDistanceQuery(
+ new SpanTermQuery(new Term("base",x)),
+ new SpanTermQuery(new Term("base",y)),
+ min,
+ max,
+ isOrdered,
+ true
+ );
+ sq.setExclusion(true);
+ return sq;
+ }
+
+ private FieldDocument createFieldDoc0(){
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-0");
+ fd.addTV("base",
+ "text",
+ "[(0-1)s:c|_1#0-1]" +
+ "[(1-2)s:e|_2#1-2]" +
+ "[(2-3)s:c|_3#2-3|<>:y#2-4$<i>4]" +
+ "[(3-4)s:c|_4#3-4|<>:x#3-7$<i>7]" +
+ "[(4-5)s:d|_5#4-5|<>:y#4-6$<i>6]" +
+ "[(5-6)s:c|_6#5-6|<>:y#5-8$<i>8]" +
+ "[(6-7)s:d|_7#6-7]" +
+ "[(7-8)s:e|_8#7-8|<>:x#7-9$<i>9]" +
+ "[(8-9)s:e|_9#8-9|<>:x#8-10$<i>10]" +
+ "[(9-10)s:d|_10#9-10]");
+ return fd;
+ }
+
+ private FieldDocument createFieldDoc1() {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV("base",
+ "text",
+ "[(0-1)s:b|s:c|_1#0-1]" +
+ "[(1-2)s:b|_2#1-2]" +
+ "[(2-3)s:c|_3#2-3]" +
+ "[(3-4)s:c|_4#3-4]" +
+ "[(4-5)s:d|_5#4-5]" +
+ "[(5-6)s:d|_6#5-6]");
+ return fd;
+ }
+}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestDistanceIndex.java
index 7200bf3..2273881 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestDistanceIndex.java
@@ -105,7 +105,7 @@
// ---- Distance 0 to 1
sq = createQuery("s:b","s:c",0,1,true);
kr = ki.search(sq, (short) 10);
-
+// System.out.println(sq);
assertEquals(2, kr.totalResults());
assertEquals(0, kr.match(0).startPos);
assertEquals(1, kr.match(0).endPos);
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMultipleDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestMultipleDistanceIndex.java
index a4f8ca6..61035f7 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestMultipleDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestMultipleDistanceIndex.java
@@ -37,10 +37,10 @@
public DistanceConstraint createConstraint(String unit, int min, int max){
if (unit.equals("w")){
- return new DistanceConstraint(unit, min, max);
+ return new DistanceConstraint(unit, min, max, false);
}
- SpanQuery sq = new SpanElementQuery("base", unit);
- return new DistanceConstraint(sq, unit, min, max);
+ SpanElementQuery sq = new SpanElementQuery("base", unit);
+ return new DistanceConstraint(sq, unit, min, max, false);
}
private FieldDocument createFieldDoc0() {
@@ -116,6 +116,7 @@
SpanQuery mdq;
mdq = createQuery("s:b", "s:c", constraints, false);
kr = ki.search(mdq, (short) 10);
+ System.out.println(mdq);
assertEquals(3, kr.getTotalResults());
assertEquals(0, kr.getMatch(0).getStartPos());