Added a window option for relation spans with variable query.
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
index 22ef490..cbbab44 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanElementQuery.java
@@ -19,7 +19,7 @@
*/
/** Matches spans wrapped by an element. */
-public class SpanElementQuery extends SimpleSpanQuery {
+public class SpanElementQuery extends SpanWithIdQuery {
//private SpanTermQuery termQuery;
private static Term elementTerm;
private String elementStr;
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
index 1a0ce77..27061b9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
@@ -21,7 +21,7 @@
*
* @author margaretha
* */
-public class SpanRelationQuery extends SimpleSpanQuery {
+public class SpanRelationQuery extends SpanWithIdQuery {
public SpanRelationQuery(SpanQuery firstClause, boolean collectPayloads) {
super(firstClause, collectPayloads);
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java
index c031c59..359ca75 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java
@@ -6,7 +6,6 @@
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
@@ -18,36 +17,23 @@
*
* @author margaretha
* */
-public class SpanRelationWithVariableQuery extends SpanRelationQuery{
+public class SpanRelationWithVariableQuery extends SpanWithIdQuery{
private static String elementStr = "s"; // default element interval type
private SpanElementQuery elementQuery;
private boolean matchRight; // if false, match left
+ private int window;
public SpanRelationWithVariableQuery(SpanRelationQuery spanRelationQuery,
- SpanElementQuery secondClause, // match span
+ SpanWithIdQuery secondClause, // match tokenWithIdQuery, ElementQuery, RelationQuery
boolean matchRight,
boolean collectPayloads) {
this(spanRelationQuery, secondClause, elementStr, matchRight, collectPayloads);
}
public SpanRelationWithVariableQuery(SpanRelationQuery spanRelationQuery,
- SpanTermWithIdQuery secondClause, // match token
- boolean matchRight,
- boolean collectPayloads) {
- this(spanRelationQuery, secondClause, elementStr, matchRight, collectPayloads);
- }
-
- public SpanRelationWithVariableQuery(SpanRelationQuery spanRelationQuery,
- SpanRelationQuery secondClause, // match span
- boolean matchRight,
- boolean collectPayloads) {
- this(spanRelationQuery, secondClause, elementStr, matchRight, collectPayloads);
- }
-
- public SpanRelationWithVariableQuery(SpanRelationQuery spanRelationQuery,
- SpanQuery secondClause, // match span
+ SpanWithIdQuery secondClause,
String elementStr,
boolean matchRight,
boolean collectPayloads) {
@@ -56,6 +42,16 @@
elementQuery = new SpanElementQuery(spanRelationQuery.getField(), elementStr);
}
+ public SpanRelationWithVariableQuery(SpanRelationQuery spanRelationQuery,
+ SpanWithIdQuery secondClause, // match tokenWithIdQuery, ElementQuery, RelationQuery
+ int window,
+ boolean matchRight,
+ boolean collectPayloads) {
+ super(spanRelationQuery, secondClause, collectPayloads);
+ this.matchRight = matchRight;
+ this.window = window;
+ }
+
@Override
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
@@ -66,7 +62,7 @@
public SimpleSpanQuery clone() {
SpanRelationWithVariableQuery sq = new SpanRelationWithVariableQuery(
(SpanRelationQuery) this.firstClause,
- this.secondClause,
+ (SpanWithIdQuery) this.secondClause,
this.elementQuery.getElementStr(),
this.matchRight,
this.collectPayloads
@@ -84,8 +80,14 @@
sb.append(",");
sb.append( matchRight ? "matchRight, " : "matchLeft, " );
sb.append(",");
- sb.append("element:");
- sb.append(elementQuery.getElementStr());
+ if (elementQuery != null){
+ sb.append("element:");
+ sb.append(elementQuery.getElementStr());
+ }
+ else {
+ sb.append("window:");
+ sb.append(this.window);
+ }
sb.append(")");
sb.append(ToStringUtils.boost(getBoost()));
return sb.toString();
@@ -106,4 +108,12 @@
public void setElementQuery(SpanElementQuery root) {
this.elementQuery = root;
}
+
+ public int getWindow() {
+ return window;
+ }
+
+ public void setWindow(int window) {
+ this.window = window;
+ }
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
index 42e8305..230728a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
@@ -18,7 +18,7 @@
*
* @author margaretha
* */
-public class SpanTermWithIdQuery extends SimpleSpanQuery{
+public class SpanTermWithIdQuery extends SpanWithIdQuery{
public SpanTermWithIdQuery(Term term, boolean collectPayloads) {
super(new SpanTermQuery(term), collectPayloads);
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanWithIdQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanWithIdQuery.java
new file mode 100644
index 0000000..d117f98
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanWithIdQuery.java
@@ -0,0 +1,15 @@
+package de.ids_mannheim.korap.query;
+
+import org.apache.lucene.search.spans.SpanQuery;
+
+public abstract class SpanWithIdQuery extends SimpleSpanQuery{
+
+ public SpanWithIdQuery(SpanQuery firstClause, boolean collectPayloads) {
+ super(firstClause, collectPayloads);
+ }
+
+ public SpanWithIdQuery(SpanQuery firstClause, SpanQuery secondClause,
+ boolean collectPayloads) {
+ super(firstClause, secondClause, collectPayloads);
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java
index 63c5278..871f45a 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java
@@ -17,9 +17,9 @@
* whose left side token/element positions matching the second spans,
* or vice versa.
*
- * Relations within a certain interval, e.g element-based or token-
- * distance-based, are sorted to resolve reference within that interval.
- * Resolution is limited only within an interval.
+ * Relations within a certain window, e.g element-based or token-
+ * distance-based, are sorted to resolve reference within that window.
+ * Resolution is limited only within an window.
*
* @author margaretha
* */
@@ -34,21 +34,29 @@
private boolean hasMoreMatchees;
private short leftId, rightId;
+ private int window;
public RelationSpansWithVariable(SpanRelationWithVariableQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
- super(query, context, acceptDocs, termContexts);
- element = (ElementSpans) query.getElementQuery().getSpans(context, acceptDocs,
- termContexts);
+ super(query, context, acceptDocs, termContexts);
+ if (query.getElementQuery() != null){
+ element = (ElementSpans) query.getElementQuery().getSpans(context, acceptDocs,
+ termContexts);
+ }
+ else{
+ window = query.getWindow();
+ }
relationSpans = (RelationSpans) firstSpans;
- matcheeSpans = (SpansWithId) secondSpans;
-
+ matcheeSpans = (SpansWithId) secondSpans;
// hack
matcheeSpans.hasSpanId = true;
hasMoreMatchees = matcheeSpans.next();
- hasMoreSpans = element.next() && relationSpans.next() && hasMoreMatchees;
+ hasMoreSpans = relationSpans.next() && hasMoreMatchees;
+ if (element != null){
+ hasMoreSpans &= element.next();
+ }
candidateRelations = new ArrayList<CandidateRelationSpan>();
matchRight = query.isMatchRight();
}
@@ -79,17 +87,40 @@
candidateRelations.remove(0);
return true;
}
- else { setCandidateList(); }
+ else if (element != null){
+ setCandidateList();
+ }
+ else { setCandidateListWithWindow(); }
}
return false;
}
+ /** A window starts at the same token position as a relation span,
+ * and ends at the start + window length.
+ * */
+ private void setCandidateListWithWindow() throws IOException {
+ if (hasMoreSpans && ensureSameDoc(relationSpans, matcheeSpans) ){
+ int windowEnd = relationSpans.start() + window;
+ if (relationSpans.end() > windowEnd){
+ throw new IllegalArgumentException("The window length "+window
+ +" is too small. The relation span ("+relationSpans.start()+
+ ","+relationSpans.end()+") is longer than " +"the window " +
+ "length.");
+ }
+ else {
+ collectRelations(relationSpans.doc(), windowEnd);
+ // sort results
+ Collections.sort(candidateRelations);
+ }
+ }
+ }
+
private void setCandidateList() throws IOException {
while (hasMoreSpans && findSameDoc(element, relationSpans, matcheeSpans) ){
// if the relation is within a sentence
if (relationSpans.start() >= element.start() &&
relationSpans.end() <= element.end()){
- collectRelations();
+ collectRelations(element.doc(),element.end());
// sort results
Collections.sort(candidateRelations);
}
@@ -104,14 +135,14 @@
/** Collect all relations within an element whose left side matching the secondspans.
* */
- private void collectRelations() throws IOException {
+ private void collectRelations(int currentDoc, int windowEnd) throws IOException {
List<CandidateRelationSpan> temp = new ArrayList<CandidateRelationSpan>();
boolean sortRight = false;
if (matchRight) sortRight = true;
// collect all relations within an element
while (hasMoreSpans &&
- relationSpans.doc() == element.doc() &&
- relationSpans.end() <= element.end()){
+ relationSpans.doc() == currentDoc &&
+ relationSpans.end() <= windowEnd){
temp.add(new CandidateRelationSpan(relationSpans,sortRight));
hasMoreSpans = relationSpans.next();
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java
index 465d11f..b851e04 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java
@@ -8,9 +8,7 @@
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
-import de.ids_mannheim.korap.query.SpanElementQuery;
-import de.ids_mannheim.korap.query.SpanRelationQuery;
-import de.ids_mannheim.korap.query.SpanTermWithIdQuery;
+import de.ids_mannheim.korap.query.SpanWithIdQuery;
/** Base class for span enumeration with spanid property.
* @author margaretha
@@ -20,22 +18,10 @@
protected short spanId;
protected boolean hasSpanId = false; // A dummy flag
- public SpansWithId(SpanElementQuery spanElementQuery,
+ public SpansWithId(SpanWithIdQuery spanWithIdQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
- super(spanElementQuery, context, acceptDocs, termContexts);
- }
-
- public SpansWithId(SpanRelationQuery spanRelationQuery,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(spanRelationQuery, context, acceptDocs, termContexts);
- }
-
- public SpansWithId(SpanTermWithIdQuery spanTermWithIdQuery,
- AtomicReaderContext context, Bits acceptDocs,
- Map<Term, TermContext> termContexts) throws IOException {
- super(spanTermWithIdQuery, context, acceptDocs, termContexts);
+ super(spanWithIdQuery, context, acceptDocs, termContexts);
}
public short getSpanId() {
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
index 23f9467..cccbd23 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -579,4 +579,38 @@
assertEquals(4,kr.getMatch(2).getStartPos());
assertEquals(5,kr.getMatch(2).getEndPos());
}
+
+ /** Window
+ * */
+ @Test
+ public void testCase9() throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ SpanRelationWithVariableQuery rv = new SpanRelationWithVariableQuery(
+ new SpanRelationQuery(
+ new SpanTermQuery(new Term("base",">:child-of")),true
+ ),
+ new SpanElementQuery("base","np"),
+ 6, true, true);
+
+ kr = ki.search(rv,(short) 10);
+ assertEquals(7, kr.getTotalResults());
+
+ rv =new SpanRelationWithVariableQuery(
+ new SpanRelationQuery(
+ new SpanTermQuery(new Term("base","<:dep")),true
+ ),
+ new SpanTermWithIdQuery(new Term("base","pos:NN"),true),
+ 3, false, true);
+ kr = ki.search(rv,(short) 10);
+ assertEquals(3, kr.getTotalResults());
+
+ /*for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getStartPos() +","+km.getEndPos()
+ //+" "+km.getSnippetBrackets()
+ );
+ }*/
+ }
}
+