Added Span expansion query with exclusion
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
index c752776..150f060 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanAttributeQuery.java
@@ -54,9 +54,10 @@
public String toString(String field) {
StringBuilder sb = new StringBuilder();
sb.append("spanAttribute(");
- sb.append(firstClause.toString(field));
- sb.append(",");
- sb.append(isNegation ? "negated)" : "notNegated)");
+ sb.append(firstClause.toString(field));
+ if (isNegation)
+ sb.append( ", not");
+ sb.append(")");
sb.append(ToStringUtils.boost(getBoost()));
return sb.toString();
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
index cab5683..654cb52 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanExpansionQuery.java
@@ -10,49 +10,95 @@
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import de.ids_mannheim.korap.query.spans.ExpandedExclusionSpans;
import de.ids_mannheim.korap.query.spans.ExpandedSpans;
-/** Query to make a span longer by stretching out the start or
- * the end position of the span.
+/** Query to make a span longer by stretching out the start or the end
+ * position of the span. The constraints of the expansion, such as how
+ * large the expansion should be (min and max position) and the
+ * direction of the expansion with respect to the "main" span, are
+ * specified in ExpansionConstraint.
+ *
+ * The expansion can be specified to not contain any direct/immediate
+ * /adjacent occurrence(s) of another span. Examples:
+ * [orth=der][orth!=Baum] "der" cannot be followed by "Baum"
+ * [pos!=ADJ]{1,2}[orth=Baum] one or two adjectives cannot precedes
+ * "Baum"
+ *
+ * The offsets of the expansion parts can be collected by using a class
+ * number.
*
* @author margaretha
* */
public class SpanExpansionQuery extends SimpleSpanQuery{
-
- int min, max;
- byte classNumber;
- boolean isBefore;
+
+ private int min, max; // min, max expansion position
- public SpanExpansionQuery(SpanQuery firstClause, int min, int max,
- boolean isBefore, boolean collectPayloads) {
+ // if > 0, collect expansion offsets using this label
+ private byte classNumber;
+
+ // expansion direction with regard to the main span:
+ // < 0 to the left of main span
+ // >= 0 to the right of main span
+ private int direction;
+
+ // if true, no occurrence of another span
+ final boolean isExclusion;
+
+ /** Simple expansion for any/empty token. Use
+ * {@link #SpanExpansionQuery(SpanQuery, SpanQuery, ExpansionConstraint,
+ * boolean)} for expansion with exclusions of a specific spanquery.
+ * */
+ public SpanExpansionQuery(SpanQuery firstClause, int min, int max, int direction,
+ boolean collectPayloads) {
super(firstClause, collectPayloads);
this.min = min;
this.max = max;
- this.isBefore = isBefore;
+ this.direction = direction;
+ this.isExclusion = false;
}
- public SpanExpansionQuery(SpanQuery firstClause, int min, int max,
- byte classNumber, boolean isBefore, boolean collectPayloads) {
- this(firstClause, min,max,isBefore,collectPayloads);
+ public SpanExpansionQuery(SpanQuery firstClause, int min, int max, int direction,
+ byte classNumber, boolean collectPayloads) {
+ this(firstClause, min, max, direction, collectPayloads);
this.classNumber = classNumber;
}
-
+
+ /** Expansion with exclusions of the spanquery specified as the second
+ * parameter.
+ * */
+ public SpanExpansionQuery(SpanQuery firstClause, SpanQuery notClause, int min,
+ int max, int direction, boolean collectPayloads) {
+ super(firstClause, notClause, collectPayloads);
+ this.min = min;
+ this.max = max;
+ this.direction = direction;
+ this.isExclusion = true;
+ }
+
+
@Override
public SimpleSpanQuery clone() {
- SpanExpansionQuery sq = new SpanExpansionQuery(
- firstClause,
- min,
- max,
- isBefore,
- collectPayloads);
+ SpanExpansionQuery sq = null;
+ if (isExclusion){
+ sq = new SpanExpansionQuery(firstClause, secondClause, min, max,
+ direction, collectPayloads);
+ }
+ else{
+ sq = new SpanExpansionQuery(firstClause, min, max, direction, classNumber,
+ collectPayloads);
+ }
//sq.setBoost(sq.getBoost());
return sq;
}
@Override
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- return new ExpandedSpans(this, context, acceptDocs, termContexts);
+ Map<Term, TermContext> termContexts) throws IOException {
+ if (isExclusion)
+ return new ExpandedExclusionSpans(this, context, acceptDocs, termContexts);
+ else
+ return new ExpandedSpans(this, context, acceptDocs, termContexts);
}
@Override
@@ -60,14 +106,26 @@
StringBuilder sb = new StringBuilder();
sb.append("spanExpansion(");
sb.append(firstClause.toString());
- sb.append(",[");
+ if (isExclusion && secondClause != null){
+ sb.append(", !");
+ sb.append(secondClause.toString());
+ }
+ else{
+ sb.append(", []");
+ }
+ sb.append("{");
sb.append(min);
- sb.append(",");
+ sb.append(", ");
sb.append(max);
- sb.append("],");
- if (isBefore)
- sb.append("left)");
- else sb.append("right)");
+ sb.append("}, ");
+ if (direction < 0)
+ sb.append("left");
+ else sb.append("right");
+ if (classNumber > 0){
+ sb.append(", class:");
+ sb.append(classNumber);
+ }
+ sb.append(")");
return sb.toString();
}
@@ -87,14 +145,6 @@
this.max = max;
}
- public boolean isBefore() {
- return isBefore;
- }
-
- public void setBefore(boolean isBefore) {
- this.isBefore = isBefore;
- }
-
public byte getClassNumber() {
return classNumber;
}
@@ -102,4 +152,12 @@
public void setClassNumber(byte classNumber) {
this.classNumber = classNumber;
}
+
+ public int getDirection() {
+ return direction;
+ }
+
+ public void setDirection(int direction) {
+ this.direction = direction;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
index 97ecd0a..94b273b 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRepetitionQuery.java
@@ -50,13 +50,13 @@
@Override
public String toString(String field) {
StringBuilder sb = new StringBuilder();
- sb.append("spanQuantifier(");
+ sb.append("spanRepetition(");
sb.append(firstClause.toString(field));
- sb.append("[");
+ sb.append("{");
sb.append(min);
- sb.append(":");
+ sb.append(",");
sb.append(max);
- sb.append("])");
+ sb.append("})");
sb.append(ToStringUtils.boost(getBoost()));
return sb.toString();
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
index 9a1c13b..a378e1a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementAttributeSpans.java
@@ -122,7 +122,8 @@
* */
private boolean checkNotElementRef() throws IOException{
for (AttributeSpans notAttribute: notAttributeList){
- if (elements.start() == notAttribute.start() &&
+ if (!notAttribute.isFinish() &&
+ elements.start() == notAttribute.start() &&
elements.getElementRef() == notAttribute.getElementRef()){
if (DEBUG)
logger.info("not attribute ref exists");
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
index 2f8ebed..68ae7f2 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ExpandedSpans.java
@@ -20,7 +20,7 @@
private int min, max;
private byte classNumber;
- private boolean isBefore;
+ private int direction;
private List<CandidateSpan> candidateSpans;
private long matchCost;
@@ -30,8 +30,8 @@
super(spanExpansionQuery, context, acceptDocs, termContexts);
this.min = spanExpansionQuery.getMin();
this.max = spanExpansionQuery.getMax();
+ this.direction = spanExpansionQuery.getDirection();
this.classNumber = spanExpansionQuery.getClassNumber();
- this.isBefore = spanExpansionQuery.isBefore();
candidateSpans = new ArrayList<CandidateSpan>();
hasMoreSpans = true;
@@ -63,7 +63,7 @@
CandidateSpan cs;
int counter, start, end;
- if (isBefore){
+ if (direction < 0 ){
counter = max;
while (counter >= min ){
start = Math.max(0,firstSpans.start() - counter);