Added span relation with variable query matching either the left side or the right side token/element of span relations
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
index 5bee05f..1789a54 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
@@ -18,6 +18,12 @@
public SpanRelationQuery(SpanQuery firstClause, boolean collectPayloads) {
super(firstClause, collectPayloads);
}
+
+ // for spanRelationWithVariable
+ public SpanRelationQuery(SpanRelationQuery spanRelationQuery,
+ SpanQuery secondClause, boolean collectPayloads) {
+ super(spanRelationQuery, secondClause, collectPayloads);
+ }
@Override
public SimpleSpanQuery clone() {
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java
new file mode 100644
index 0000000..cddc326
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java
@@ -0,0 +1,69 @@
+package de.ids_mannheim.korap.query;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.spans.RelationSpansWithVariable;
+
+/** This query returns the tokens/elements of the left-side of a relation
+ * whose right-side tokens/elements' positions match the spans of another
+ * spanquery.
+ *
+ * */
+public class SpanRelationWithVariableQuery extends SpanRelationQuery{
+
+ private String rootElementStr = "s";
+ private SpanElementQuery root;
+ private boolean matchRight; // if false, match left
+
+ public SpanRelationWithVariableQuery(SpanRelationQuery spanRelationQuery,
+ SpanQuery secondClause, // span to match
+ boolean matchRight,
+ boolean collectPayloads) {
+ super(spanRelationQuery, secondClause, collectPayloads);
+ this.matchRight = matchRight;
+ root = new SpanElementQuery(spanRelationQuery.getField(), rootElementStr);
+ }
+
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ return new RelationSpansWithVariable(this, context, acceptDocs, termContexts);
+ }
+
+ @Override
+ public SimpleSpanQuery clone() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public String toString(String field) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public boolean isMatchRight() {
+ return matchRight;
+ }
+
+ public void setMatchRight(boolean matchRight) {
+ this.matchRight = matchRight;
+ }
+
+ public SpanElementQuery getRoot() {
+ return root;
+ }
+
+ public void setRoot(SpanElementQuery root) {
+ this.root = root;
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
index fb5ce66..4ce39c3 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceSpans.java
@@ -52,9 +52,9 @@
* */
protected abstract boolean advance() throws IOException;
- /** Find the same doc shared by element, firstspan and secondspan.
+/* *//** Find the same doc shared by element, firstspan and secondspan.
* @return true iff such a doc is found.
- * */
+ * *//*
protected boolean findSameDoc(Spans x,
Spans y, Spans e) throws IOException{
@@ -68,7 +68,7 @@
};
}
return false;
- }
+ }*/
public CandidateSpan getMatchFirstSpan() {
return matchFirstSpan;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
index 6a1c3d8..92801a1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NonPartialOverlappingSpans.java
@@ -55,11 +55,8 @@
while (hasMoreSpans && ensureSameDoc(firstSpans,secondSpans)){
int matchCase = findMatch();
if (matchCase == 0){
- if (DEBUG) {
- log.trace("Match doc#: {}",matchDocNumber);
- log.trace("Match positions: {}-{}", matchStartPosition,
- matchEndPosition);
- };
+ //log.trace("Match doc#: {}",matchDocNumber);
+ //log.trace("Match positions: {}-{}", matchStartPosition,matchEndPosition);
doCollectPayloads();
return true;
}
@@ -82,19 +79,14 @@
/** Collecting available payloads from the current first and second spans */
private void doCollectPayloads() throws IOException {
+ Collection<byte[]> payload;
if (collectPayloads){
- if (DEBUG)
- log.trace("Collect payloads");
if (firstSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = firstSpans.getPayload();
- if (DEBUG)
- log.trace("Found {} payloads in firstSpans", payload.size());
+ payload = firstSpans.getPayload();
matchPayload.addAll(payload);
}
if (secondSpans.isPayloadAvailable()) {
- Collection<byte[]> payload = secondSpans.getPayload();
- if (DEBUG)
- log.trace("Found {} payloads in secondSpans", payload.size());
+ payload = secondSpans.getPayload();
matchPayload.addAll(payload);
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
index 72d59c8..757c7e8 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
@@ -19,7 +19,7 @@
import de.ids_mannheim.korap.query.SpanRelationQuery;
/** Enumeration of spans denoting relations between two tokens/elements. The start and end of
- * a RelationSpan always denote the start and end of the source token/element.
+ * a RelationSpan always denote the start and end of the left-side token/element.
*
* There are 4 types of relations, which is differentiated by the payload length in bytes.
* 1. Token to token relation (1 int & 1 short, length: 6)
@@ -41,8 +41,8 @@
public class RelationSpans extends SpansWithId{
//short relationId;
- int targetStart, targetEnd;
- int currentDoc, currentPosition;
+ private int rightStart, rightEnd;
+ private int currentDoc, currentPosition;
private TermSpans relationTermSpan;
@@ -71,7 +71,8 @@
this.matchDocNumber = cs.getDoc();
this.matchStartPosition = cs.getStart();
this.matchEndPosition = cs.getEnd();
- this.matchPayload = cs.getPayloads();
+ this.setRightStart(cs.getRightStart());
+ this.setRightEnd(cs.getRightEnd());
this.spanId = cs.getSpanId(); // relation id
candidateList.remove(0);
return true;
@@ -112,30 +113,31 @@
switch (length) {
case 6: // Token to token
i = PayloadReader.readInteger(payloadBytesRef,0);
- cs.setTargetStart(i);
- cs.setTargetEnd(i);
+ cs.setRightStart(i-1);
+ cs.setRightEnd(i);
break;
case 10: // Token to span
- cs.setTargetStart(PayloadReader.readInteger(payloadBytesRef,0));
- cs.setTargetEnd(PayloadReader.readInteger(payloadBytesRef,4));
+ cs.setRightStart(PayloadReader.readInteger(payloadBytesRef,0));
+ cs.setRightEnd(PayloadReader.readInteger(payloadBytesRef,4));
break;
case 11: // Span to token
cs.setEnd(PayloadReader.readInteger(payloadBytesRef,0));
i = PayloadReader.readInteger(payloadBytesRef,5);
- cs.setTargetStart(i);
- cs.setTargetEnd(i);
+ cs.setRightStart(i-1);
+ cs.setRightEnd(i);
break;
case 14: // Span to span
cs.setEnd(PayloadReader.readInteger(payloadBytesRef,0));
- cs.setTargetStart(PayloadReader.readInteger(payloadBytesRef,4));
- cs.setTargetEnd(PayloadReader.readInteger(payloadBytesRef,8));
+ cs.setRightStart(PayloadReader.readInteger(payloadBytesRef,4));
+ cs.setRightEnd(PayloadReader.readInteger(payloadBytesRef,8));
break;
}
cs.setSpanId(PayloadReader.readShort(payloadBytesRef, length-2)); //relation id
+ // Payload is cleared.
}
@Override
@@ -156,35 +158,27 @@
public long cost() {
return firstSpans.cost();
}
-/*
- public short getRelationId() {
- return relationId;
+
+ public int getRightStart() {
+ return rightStart;
}
- public void setRelationId(short relationId) {
- this.relationId = relationId;
- }*/
-
- public int getTargetStart() {
- return targetStart;
+ public void setRightStart(int rightStart) {
+ this.rightStart = rightStart;
}
- public void setTargetStart(int targetStart) {
- this.targetStart = targetStart;
+ public int getRightEnd() {
+ return rightEnd;
}
- public int getTargetEnd() {
- return targetEnd;
- }
-
- public void setTargetEnd(int targetEnd) {
- this.targetEnd = targetEnd;
+ public void setRightEnd(int rightEnd) {
+ this.rightEnd = rightEnd;
}
class CandidateRelationSpan extends CandidateSpan implements Comparable<CandidateSpan>{
- private int targetStart, targetEnd;
+ private int rightStart, rightEnd;
public CandidateRelationSpan(Spans span) throws IOException{
super(span);
@@ -197,14 +191,14 @@
CandidateRelationSpan cs = (CandidateRelationSpan) o;
if (sourcePositionComparison == 0){
- if (this.getTargetStart() == cs.getTargetStart()){
- if (this.getTargetEnd() == cs.getTargetEnd())
+ if (this.getRightStart() == cs.getRightStart()){
+ if (this.getRightEnd() == cs.getRightEnd())
return 0;
- if (this.getTargetEnd() > cs.getTargetEnd() )
+ if (this.getRightEnd() > cs.getRightEnd() )
return 1;
else return -1;
}
- else if (this.getTargetStart() < cs.getTargetStart())
+ else if (this.getRightStart() < cs.getRightStart())
return -1;
else return 1;
}
@@ -212,20 +206,20 @@
return sourcePositionComparison;
}
- public int getTargetEnd() {
- return targetEnd;
+ public int getRightEnd() {
+ return rightEnd;
}
- public void setTargetEnd(int targetEnd) {
- this.targetEnd = targetEnd;
+ public void setRightEnd(int rightEnd) {
+ this.rightEnd = rightEnd;
}
- public int getTargetStart() {
- return targetStart;
+ public int getRightStart() {
+ return rightStart;
}
- public void setTargetStart(int targetStart) {
- this.targetStart = targetStart;
+ public void setRightStart(int rightStart) {
+ this.rightStart = rightStart;
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java
new file mode 100644
index 0000000..4c661da
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java
@@ -0,0 +1,223 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.SpanRelationWithVariableQuery;
+
+/** This span enumeration returns the right side of relation spans
+ * whose left side token/element positions matching the second spans.
+ *
+ * @author margaretha
+ * */
+public class RelationSpansWithVariable extends SimpleSpans{
+
+ private RelationSpans relationSpans;
+ private ElementSpans element;
+ private List<CandidateRelationSpan> candidateRelations;
+
+ private boolean matchRight;
+ private boolean hasMoreSecondSpans;
+
+ public RelationSpansWithVariable(SpanRelationWithVariableQuery query,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(query, context, acceptDocs, termContexts);
+ element = (ElementSpans) query.getRoot().getSpans(context, acceptDocs,
+ termContexts);
+ relationSpans = (RelationSpans) firstSpans;
+ hasMoreSecondSpans = secondSpans.next();
+ hasMoreSpans = element.next() && relationSpans.next() && hasMoreSecondSpans;
+ candidateRelations = new ArrayList<CandidateRelationSpan>();
+
+ matchRight = query.isMatchRight();
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration=false;
+ matchPayload.clear();
+ return advance();
+ }
+
+ protected boolean advance() throws IOException {
+ while (candidateRelations.size() > 0 || hasMoreSpans){
+ if (candidateRelations.size() > 0){
+ CandidateRelationSpan relationSpan = candidateRelations.get(0);
+ matchDocNumber = relationSpan.getDoc();
+ if (matchRight){
+ matchStartPosition = relationSpan.getStart();
+ matchEndPosition = relationSpan.getEnd();
+ }
+ else{
+ matchStartPosition = relationSpan.getRightStart();
+ matchEndPosition = relationSpan.getRightEnd();
+ }
+ candidateRelations.remove(0);
+ return true;
+ }
+ else { setCandidateList(); }
+ }
+ return false;
+ }
+
+ private void setCandidateList() throws IOException {
+ while (hasMoreSpans && findSameDoc(element, relationSpans, secondSpans) ){
+ // if the relation is within a sentence
+ if (relationSpans.start() >= element.start() &&
+ relationSpans.end() <= element.end()){
+ collectRelations();
+ // sort results
+ Collections.sort(candidateRelations);
+ }
+ else if (relationSpans.end() < element.end()){
+ hasMoreSpans = relationSpans.next();
+ }
+ else {
+ hasMoreSpans = element.next();
+ }
+ }
+ }
+
+ /** Collect all relations within an element whose left side matching the secondspans.
+ * */
+ private void collectRelations() throws IOException {
+ List<CandidateRelationSpan> temp = new ArrayList<CandidateRelationSpan>();
+ boolean sortRight = false;
+ if (matchRight) sortRight = true;
+ // collect all relations within an element
+ while (hasMoreSpans &&
+ relationSpans.doc() == element.doc() &&
+ relationSpans.end() <= element.end()){
+ temp.add(new CandidateRelationSpan(relationSpans,sortRight));
+ hasMoreSpans = relationSpans.next();
+ }
+
+ if(matchRight) Collections.sort(temp);
+
+ // do the matching for each relation
+ int i=0;
+ CandidateRelationSpan r;
+ while (hasMoreSecondSpans && i < temp.size()){
+ r = temp.get(i);
+ if (matchRight){
+ System.out.println(i+" "+r.getStart()+","+r.getEnd()+" "+
+ r.getRightStart()+","+r.getRightEnd()+
+ " "+secondSpans.start()+","+secondSpans.end());
+ i = matchRelation(i, r,r.getRightStart(), r.getRightEnd());
+ }
+ else{
+ System.out.println(i+" "+r.getStart()+","+r.getEnd()+" "+
+ r.getRightStart()+","+r.getRightEnd()+" "
+ +secondSpans.start()+","+secondSpans.end());
+ i = matchRelation(i, r,r.getStart(), r.getEnd());
+ }
+ }
+
+ hasMoreSpans &= hasMoreSecondSpans;
+ }
+
+ private int matchRelation(int i, CandidateRelationSpan r, int startPos, int endPos) throws IOException {
+ if(startPos == secondSpans.start() ){
+ if (endPos == secondSpans.end()){
+ if (matchRight) r.sortRight = false;
+ else r.sortRight = true;
+
+ candidateRelations.add(r);
+ i++;
+ }
+ else if (endPos <= secondSpans.end()){
+ i++;
+ }
+ else { hasMoreSecondSpans = secondSpans.next(); }
+ }
+ else if (startPos < secondSpans.start()){
+ i++;
+ }
+ else { hasMoreSecondSpans = secondSpans.next(); }
+ return i;
+ }
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)){
+ if (!firstSpans.skipTo(target)){
+ candidateRelations.clear();
+ return false;
+ }
+ }
+ setCandidateList();
+ matchPayload.clear();
+ isStartEnumeration=false;
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ class CandidateRelationSpan extends CandidateSpan implements Comparable<CandidateSpan>{
+
+ private int rightStart, rightEnd;
+ private boolean sortRight;
+ public CandidateRelationSpan(RelationSpans span, boolean sortRight)
+ throws IOException {
+ super(span);
+ this.rightStart = span.getRightStart();
+ this.rightEnd = span.getRightEnd();
+ this.sortRight = sortRight;
+ }
+
+ @Override
+ public int compareTo(CandidateSpan o) {
+ CandidateRelationSpan cs = (CandidateRelationSpan) o;
+ if (sortRight)
+ return sortByRight(cs);
+
+ return super.compareTo(o);
+ }
+
+ private int sortByRight(CandidateRelationSpan cs) {
+ if (this.getRightStart() == cs.getRightStart()){
+ if (this.getRightEnd() == cs.getRightEnd())
+ return 0;
+ if (this.getRightEnd() > cs.getRightEnd() )
+ return 1;
+ else return -1;
+ }
+ else if (this.getRightStart() < cs.getRightStart())
+ return -1;
+ else return 1;
+ }
+
+ /*private void sortByLeft(CandidateSpan o) {
+ super.compareTo(o);
+ }*/
+
+ public int getRightStart() {
+ return rightStart;
+ }
+
+ public void setRightStart(int rightStart) {
+ this.rightStart = rightStart;
+ }
+
+ public int getRightEnd() {
+ return rightEnd;
+ }
+
+ public void setRightEnd(int rightEnd) {
+ this.rightEnd = rightEnd;
+ }
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
index 136bdd7..d4a0e58 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SegmentSpans.java
@@ -30,7 +30,6 @@
* */
@Override
protected int findMatch() {
-
if (firstSpans.start() == secondSpans.start() &&
firstSpans.end() == secondSpans.end() ){
matchDocNumber = firstSpans.doc();
@@ -38,7 +37,8 @@
matchEndPosition = firstSpans.end();
return 0;
}
- else if (firstSpans.end() < secondSpans.end())
+ else if (firstSpans.start() < secondSpans.start() ||
+ firstSpans.end() < secondSpans.end())
return -1;
return 1;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
index 265d9af..276cee1 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SimpleSpans.java
@@ -82,6 +82,24 @@
return true;
}
+ /** Find the same doc shared by element, firstspan and secondspan.
+ * @return true iff such a doc is found.
+ * */
+ protected boolean findSameDoc(Spans x,
+ Spans y, Spans e) throws IOException{
+
+ while (hasMoreSpans) {
+ if (ensureSameDoc(x, y) &&
+ e.doc() == x.doc()){
+ return true;
+ }
+ if (!ensureSameDoc(e,y)){
+ return false;
+ };
+ }
+ return false;
+ }
+
@Override
public int doc() {
return matchDocNumber;