Added matching relations with id
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
index 1789a54..1a0ce77 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
@@ -13,6 +13,14 @@
import de.ids_mannheim.korap.query.spans.RelationSpans;
+/** Querying span representing a relation between tokens, elements,
+ * or a-token-and-an-element. This class provides two types of query:
+ * 1. querying any relations
+ * 2. querying relations matching certain type of sources/targets,
+ * that are the left or the right sides of the relations.
+ *
+ * @author margaretha
+ * */
public class SpanRelationQuery extends SimpleSpanQuery {
public SpanRelationQuery(SpanQuery firstClause, boolean collectPayloads) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java
index cddc326..c031c59 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationWithVariableQuery.java
@@ -9,28 +9,52 @@
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ToStringUtils;
import de.ids_mannheim.korap.query.spans.RelationSpansWithVariable;
-/** This query returns the tokens/elements of the left-side of a relation
- * whose right-side tokens/elements' positions match the spans of another
- * spanquery.
+/** This query match one side of a relation (either left or right) to certain
+ * elements or terms, and return the other side of the relation.
*
+ * @author margaretha
* */
public class SpanRelationWithVariableQuery extends SpanRelationQuery{
-
- private String rootElementStr = "s";
- private SpanElementQuery root;
+
+ private static String elementStr = "s"; // default element interval type
+
+ private SpanElementQuery elementQuery;
private boolean matchRight; // if false, match left
public SpanRelationWithVariableQuery(SpanRelationQuery spanRelationQuery,
- SpanQuery secondClause, // span to match
+ SpanElementQuery secondClause, // match span
+ boolean matchRight,
+ boolean collectPayloads) {
+ this(spanRelationQuery, secondClause, elementStr, matchRight, collectPayloads);
+ }
+
+ public SpanRelationWithVariableQuery(SpanRelationQuery spanRelationQuery,
+ SpanTermWithIdQuery secondClause, // match token
+ boolean matchRight,
+ boolean collectPayloads) {
+ this(spanRelationQuery, secondClause, elementStr, matchRight, collectPayloads);
+ }
+
+ public SpanRelationWithVariableQuery(SpanRelationQuery spanRelationQuery,
+ SpanRelationQuery secondClause, // match span
+ boolean matchRight,
+ boolean collectPayloads) {
+ this(spanRelationQuery, secondClause, elementStr, matchRight, collectPayloads);
+ }
+
+ public SpanRelationWithVariableQuery(SpanRelationQuery spanRelationQuery,
+ SpanQuery secondClause, // match span
+ String elementStr,
boolean matchRight,
boolean collectPayloads) {
super(spanRelationQuery, secondClause, collectPayloads);
this.matchRight = matchRight;
- root = new SpanElementQuery(spanRelationQuery.getField(), rootElementStr);
- }
+ elementQuery = new SpanElementQuery(spanRelationQuery.getField(), elementStr);
+ }
@Override
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
@@ -40,14 +64,31 @@
@Override
public SimpleSpanQuery clone() {
- // TODO Auto-generated method stub
- return null;
+ SpanRelationWithVariableQuery sq = new SpanRelationWithVariableQuery(
+ (SpanRelationQuery) this.firstClause,
+ this.secondClause,
+ this.elementQuery.getElementStr(),
+ this.matchRight,
+ this.collectPayloads
+ );
+ return sq;
}
@Override
public String toString(String field) {
- // TODO Auto-generated method stub
- return null;
+ StringBuilder sb = new StringBuilder();
+ sb.append("spanRelationWithVariable(");
+ sb.append(firstClause.toString(field));
+ sb.append(",");
+ sb.append(secondClause.toString(field));
+ sb.append(",");
+ sb.append( matchRight ? "matchRight, " : "matchLeft, " );
+ sb.append(",");
+ sb.append("element:");
+ sb.append(elementQuery.getElementStr());
+ sb.append(")");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
}
public boolean isMatchRight() {
@@ -58,12 +99,11 @@
this.matchRight = matchRight;
}
- public SpanElementQuery getRoot() {
- return root;
+ public SpanElementQuery getElementQuery() {
+ return elementQuery;
}
- public void setRoot(SpanElementQuery root) {
- this.root = root;
+ public void setElementQuery(SpanElementQuery root) {
+ this.elementQuery = root;
}
-
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
new file mode 100644
index 0000000..42e8305
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanTermWithIdQuery.java
@@ -0,0 +1,50 @@
+package de.ids_mannheim.korap.query;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.spans.TermSpansWithId;
+
+/** This query wraps the usual SpanTermQuery and returns TermSpans with a spanid property.
+ * This query is used in other spanqueries that require spans with id as their child spans,
+ * for example span relation with variable query.
+ *
+ * @author margaretha
+ * */
+public class SpanTermWithIdQuery extends SimpleSpanQuery{
+
+ public SpanTermWithIdQuery(Term term, boolean collectPayloads) {
+ super(new SpanTermQuery(term), collectPayloads);
+ }
+
+ @Override
+ public SimpleSpanQuery clone() {
+ SpanTermQuery sq = (SpanTermQuery) this.firstClause;
+ return new SpanTermWithIdQuery(sq.getTerm(),
+ this.collectPayloads
+ );
+ }
+
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ return new TermSpansWithId(this, context, acceptDocs, termContexts);
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("spanTermWithId(");
+ sb.append(firstClause.toString(field));
+ sb.append(")");
+ return sb.toString();
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index 6789382..30f2b76 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -111,7 +111,9 @@
* @author margaretha
* */
private void readPayload(CandidateElementSpans cs) throws IOException {
- BytesRef payload = termSpans.getPostings().getPayload();
+ List<byte[]> payloadList = (List<byte[]>) termSpans.getPayload();
+ BytesRef payload = new BytesRef(payloadList.get(0));
+ //.getPostings().getPayload();
//ByteBuffer payloadBuffer = ByteBuffer.allocate(128);
if (payload != null) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
index 757c7e8..5e3c0b7 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
@@ -22,19 +22,20 @@
* a RelationSpan always denote the start and end of the left-side token/element.
*
* There are 4 types of relations, which is differentiated by the payload length in bytes.
- * 1. Token to token relation (1 int & 1 short, length: 6)
- * 2. Token to span (2 int & 1 short, length: 10)
- * 3. Span to token (int, byte, int, short, length: 11)
- * 4. Span to Span (3 int & 1 short, length: 14)
+ * 1. Token to token relation (1 int & 3 short, length: 10)
+ * 2. Token to span (2 int & 3 short, length: 14)
+ * 3. Span to token (int, byte, int, 3 short, length: 15)
+ * 4. Span to Span (3 int & 3 short, length: 18)
*
* Every integer value denotes the start/end position of the start/target of a relation,
* in this format: (sourceEndPos?, startTargetPos, endTargetPos?). The end position of a token is
* identical to its start position, and therefore not is saved in a payload.
- *
- * A short value denote the relation id, used for matching relation-attributes.
+ *
+ * The short values denote the relation id, left id, and right id.
* The byte in relation #3 is just a dummy to create a different length from the relation #2.
*
- * NOTE: Sorting of the candidate spans can alternatively be done in indexing, instead of here.
+ * NOTE: Sorting of the candidate spans can alternatively be done in indexing, instead of here.
+ * (first by left positions and then by right positions)
*
* @author margaretha
* */
@@ -43,6 +44,7 @@
//short relationId;
private int rightStart, rightEnd;
private int currentDoc, currentPosition;
+ private short leftId, rightId;
private TermSpans relationTermSpan;
@@ -74,6 +76,8 @@
this.setRightStart(cs.getRightStart());
this.setRightEnd(cs.getRightEnd());
this.spanId = cs.getSpanId(); // relation id
+ this.leftId = cs.getLeftId();
+ this.rightId = cs.getRightId();
candidateList.remove(0);
return true;
}
@@ -97,10 +101,10 @@
}
Collections.sort(candidateList);
- /*for (CandidateRelationSpan cs:candidateList){
- System.out.println(cs.getStart()+","+cs.getEnd() //+" <size:" +payload.get(0).length
- +" target "+cs.getTargetStart()+","+cs.getTargetEnd() +" id:"+cs.getRelationId());
- }*/
+// for (CandidateRelationSpan cs:candidateList){
+// System.out.println(cs.getStart()+","+cs.getEnd() //+" <size:" +payload.get(0).length
+// +" target "+cs.getRightStart()+","+cs.getRightEnd() +" id:"+cs.getSpanId());
+// }
}
private void readPayload(CandidateRelationSpan cs) {
@@ -111,33 +115,35 @@
int i;
switch (length) {
- case 6: // Token to token
+ case 10: // Token to token
i = PayloadReader.readInteger(payloadBytesRef,0);
cs.setRightStart(i-1);
cs.setRightEnd(i);
break;
- case 10: // Token to span
+ case 14: // Token to span
cs.setRightStart(PayloadReader.readInteger(payloadBytesRef,0));
cs.setRightEnd(PayloadReader.readInteger(payloadBytesRef,4));
break;
- case 11: // Span to token
+ case 15: // Span to token
cs.setEnd(PayloadReader.readInteger(payloadBytesRef,0));
i = PayloadReader.readInteger(payloadBytesRef,5);
cs.setRightStart(i-1);
cs.setRightEnd(i);
break;
- case 14: // Span to span
+ case 18: // Span to span
cs.setEnd(PayloadReader.readInteger(payloadBytesRef,0));
cs.setRightStart(PayloadReader.readInteger(payloadBytesRef,4));
cs.setRightEnd(PayloadReader.readInteger(payloadBytesRef,8));
break;
- }
+ }
- cs.setSpanId(PayloadReader.readShort(payloadBytesRef, length-2)); //relation id
- // Payload is cleared.
+ cs.setRightId(PayloadReader.readShort(payloadBytesRef, length-2)); //right id
+ cs.setLeftId(PayloadReader.readShort(payloadBytesRef, length-4)); //left id
+ cs.setSpanId(PayloadReader.readShort(payloadBytesRef, length-6)); //relation id
+ // Payload is cleared.
}
@Override
@@ -174,11 +180,33 @@
public void setRightEnd(int rightEnd) {
this.rightEnd = rightEnd;
}
-
-
+
+
+
+ public short getLeftId() {
+ return leftId;
+ }
+
+ public void setLeftId(short leftId) {
+ this.leftId = leftId;
+ }
+
+
+
+ public short getRightId() {
+ return rightId;
+ }
+
+ public void setRightId(short rightId) {
+ this.rightId = rightId;
+ }
+
+
+
class CandidateRelationSpan extends CandidateSpan implements Comparable<CandidateSpan>{
private int rightStart, rightEnd;
+ private short leftId, rightId;
public CandidateRelationSpan(Spans span) throws IOException{
super(span);
@@ -221,6 +249,23 @@
public void setRightStart(int rightStart) {
this.rightStart = rightStart;
}
+
+ public short getLeftId() {
+ return leftId;
+ }
+
+ public void setLeftId(short leftId) {
+ this.leftId = leftId;
+ }
+
+ public short getRightId() {
+ return rightId;
+ }
+
+ public void setRightId(short rightId) {
+ this.rightId = rightId;
+ }
+
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java
index 88595d4..63c5278 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpansWithVariable.java
@@ -14,30 +14,42 @@
import de.ids_mannheim.korap.query.SpanRelationWithVariableQuery;
/** This span enumeration returns the right side of relation spans
- * whose left side token/element positions matching the second spans.
+ * whose left side token/element positions matching the second spans,
+ * or vice versa.
+ *
+ * Relations within a certain interval, e.g element-based or token-
+ * distance-based, are sorted to resolve reference within that interval.
+ * Resolution is limited only within an interval.
*
* @author margaretha
* */
-public class RelationSpansWithVariable extends SimpleSpans{
+public class RelationSpansWithVariable extends SpansWithId{
private RelationSpans relationSpans;
+ private SpansWithId matcheeSpans;
private ElementSpans element;
private List<CandidateRelationSpan> candidateRelations;
private boolean matchRight;
- private boolean hasMoreSecondSpans;
+ private boolean hasMoreMatchees;
+
+ private short leftId, rightId;
public RelationSpansWithVariable(SpanRelationWithVariableQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
super(query, context, acceptDocs, termContexts);
- element = (ElementSpans) query.getRoot().getSpans(context, acceptDocs,
+ element = (ElementSpans) query.getElementQuery().getSpans(context, acceptDocs,
termContexts);
relationSpans = (RelationSpans) firstSpans;
- hasMoreSecondSpans = secondSpans.next();
- hasMoreSpans = element.next() && relationSpans.next() && hasMoreSecondSpans;
- candidateRelations = new ArrayList<CandidateRelationSpan>();
+ matcheeSpans = (SpansWithId) secondSpans;
+ // hack
+ matcheeSpans.hasSpanId = true;
+
+ hasMoreMatchees = matcheeSpans.next();
+ hasMoreSpans = element.next() && relationSpans.next() && hasMoreMatchees;
+ candidateRelations = new ArrayList<CandidateRelationSpan>();
matchRight = query.isMatchRight();
}
@@ -60,7 +72,10 @@
else{
matchStartPosition = relationSpan.getRightStart();
matchEndPosition = relationSpan.getRightEnd();
- }
+ }
+ setLeftId(relationSpans.getLeftId());
+ setRightId(relationSpan.getRightId());
+ setSpanId(relationSpan.getSpanId());
candidateRelations.remove(0);
return true;
}
@@ -70,7 +85,7 @@
}
private void setCandidateList() throws IOException {
- while (hasMoreSpans && findSameDoc(element, relationSpans, secondSpans) ){
+ while (hasMoreSpans && findSameDoc(element, relationSpans, matcheeSpans) ){
// if the relation is within a sentence
if (relationSpans.start() >= element.start() &&
relationSpans.end() <= element.end()){
@@ -106,50 +121,57 @@
// do the matching for each relation
int i=0;
CandidateRelationSpan r;
- while (hasMoreSecondSpans && i < temp.size()){
+ while (hasMoreMatchees && i < temp.size()){
r = temp.get(i);
if (matchRight){
- //System.out.println(i+" "+r.getStart()+","+r.getEnd()+" "+
- // r.getRightStart()+","+r.getRightEnd()+
- // " "+secondSpans.start()+","+secondSpans.end());
+ /*System.out.println(r.getStart()+","+r.getEnd()+" "+
+ r.getRightStart()+","+r.getRightEnd()+
+ " #"+r.getRightId()+
+ " "+matcheeSpans.start()+","+matcheeSpans.end()+
+ " #"+matcheeSpans.getSpanId()
+ );*/
i = matchRelation(i, r,r.getRightStart(), r.getRightEnd());
}
else{
- //System.out.println(i+" "+r.getStart()+","+r.getEnd()+" "+
- // r.getRightStart()+","+r.getRightEnd()+" "
- // +secondSpans.start()+","+secondSpans.end());
+ /*System.out.println(r.getStart()+","+r.getEnd()+" "+
+ r.getRightStart()+","+r.getRightEnd()+" "
+ +matcheeSpans.start()+","+matcheeSpans.end());*/
i = matchRelation(i, r,r.getStart(), r.getEnd());
}
}
- hasMoreSpans &= hasMoreSecondSpans;
+ hasMoreSpans &= hasMoreMatchees;
}
private int matchRelation(int i, CandidateRelationSpan r, int startPos, int endPos) throws IOException {
- if(startPos == secondSpans.start() ){
- if (endPos == secondSpans.end()){
- if (matchRight) r.sortRight = false;
- else r.sortRight = true;
-
- candidateRelations.add(r);
+ if(startPos == matcheeSpans.start() ){
+ if (endPos == matcheeSpans.end()){
+ if (matchRight && r.getRightId() == matcheeSpans.getSpanId()){
+ r.sortRight = false;
+ candidateRelations.add(r);
+ }
+ else if (!matchRight && r.getLeftId() == matcheeSpans.getSpanId()) {
+ r.sortRight = true;
+ candidateRelations.add(r);
+ }
i++;
}
- else if (endPos <= secondSpans.end()){
+ else if (endPos <= matcheeSpans.end()){
i++;
}
- else { hasMoreSecondSpans = secondSpans.next(); }
+ else { hasMoreMatchees = matcheeSpans.next(); }
}
- else if (startPos < secondSpans.start()){
+ else if (startPos < matcheeSpans.start()){
i++;
}
- else { hasMoreSecondSpans = secondSpans.next(); }
+ else { hasMoreMatchees = matcheeSpans.next(); }
return i;
}
@Override
public boolean skipTo(int target) throws IOException {
- if (hasMoreSpans && (firstSpans.doc() < target)){
- if (!firstSpans.skipTo(target)){
+ if (hasMoreSpans && (relationSpans.doc() < target)){
+ if (!relationSpans.skipTo(target)){
candidateRelations.clear();
return false;
}
@@ -166,16 +188,36 @@
return 0;
}
+ public short getLeftId() {
+ return leftId;
+ }
+
+ public void setLeftId(short leftId) {
+ this.leftId = leftId;
+ }
+
+ public short getRightId() {
+ return rightId;
+ }
+
+ public void setRightId(short rightId) {
+ this.rightId = rightId;
+ }
+
class CandidateRelationSpan extends CandidateSpan implements Comparable<CandidateSpan>{
private int rightStart, rightEnd;
+ private short leftId, rightId;
private boolean sortRight;
+
public CandidateRelationSpan(RelationSpans span, boolean sortRight)
throws IOException {
super(span);
this.rightStart = span.getRightStart();
this.rightEnd = span.getRightEnd();
this.sortRight = sortRight;
+ this.leftId = span.getLeftId();
+ this.rightId = span.getRightId();
}
@Override
@@ -218,6 +260,22 @@
public void setRightEnd(int rightEnd) {
this.rightEnd = rightEnd;
+ }
+
+ public short getLeftId() {
+ return leftId;
+ }
+
+ public void setLeftId(short leftId) {
+ this.leftId = leftId;
+ }
+
+ public short getRightId() {
+ return rightId;
+ }
+
+ public void setRightId(short rightId) {
+ this.rightId = rightId;
}
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java
index a2aa59a..465d11f 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SpansWithId.java
@@ -10,7 +10,11 @@
import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanRelationQuery;
+import de.ids_mannheim.korap.query.SpanTermWithIdQuery;
+/** Base class for span enumeration with spanid property.
+ * @author margaretha
+ * */
public abstract class SpansWithId extends SimpleSpans{
protected short spanId;
@@ -27,6 +31,12 @@
Map<Term, TermContext> termContexts) throws IOException {
super(spanRelationQuery, context, acceptDocs, termContexts);
}
+
+ public SpansWithId(SpanTermWithIdQuery spanTermWithIdQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(spanTermWithIdQuery, context, acceptDocs, termContexts);
+ }
public short getSpanId() {
return spanId;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java b/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
new file mode 100644
index 0000000..2cf0c5b
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/TermSpansWithId.java
@@ -0,0 +1,76 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.TermSpans;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+import de.ids_mannheim.korap.query.SpanTermWithIdQuery;
+
+/** Enumeration of termSpans whose an id. This class just wraps the usual Lucene TermSpans,
+ * and adds spanid property. It reads the term-id from a term span payload. The term-id
+ * is encoded in a short, starting from (offset) 0 in the payload.
+ *
+ * @author margaretha
+ * */
+public class TermSpansWithId extends SpansWithId{
+
+ private TermSpans termSpans;
+
+ public TermSpansWithId(SpanTermWithIdQuery spanTermWithIdQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(spanTermWithIdQuery, context, acceptDocs, termContexts);
+ termSpans = (TermSpans) firstSpans;
+ hasMoreSpans = termSpans.next();
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ isStartEnumeration=false;
+ return advance();
+ }
+
+ private boolean advance() throws IOException{
+ while (hasMoreSpans){
+ readPayload();
+ matchDocNumber = firstSpans.doc();
+ matchStartPosition = firstSpans.start();
+ matchEndPosition = firstSpans.end();
+ hasMoreSpans = firstSpans.next();
+ return true;
+ }
+ return false;
+ }
+
+ private void readPayload() throws IOException{
+ List<byte[]> payload = (List<byte[]>) firstSpans.getPayload();
+ BytesRef payloadBytesRef = new BytesRef(payload.get(0));
+ setSpanId(PayloadReader.readShort(payloadBytesRef, 0)); //term id
+ }
+
+
+ @Override
+ public boolean skipTo(int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)){
+ if (!firstSpans.skipTo(target)){
+ return false;
+ }
+ }
+ matchPayload.clear();
+ isStartEnumeration=false;
+ return advance();
+ }
+
+ @Override
+ public long cost() {
+ return firstSpans.cost(); // plus cost from reading payload
+ }
+
+}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
index 4388fde..23f9467 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -16,6 +16,7 @@
import de.ids_mannheim.korap.query.SpanRelationQuery;
import de.ids_mannheim.korap.query.SpanRelationWithVariableQuery;
import de.ids_mannheim.korap.query.SpanSegmentQuery;
+import de.ids_mannheim.korap.query.SpanTermWithIdQuery;
import de.ids_mannheim.korap.query.SpanWithAttributeQuery;
/*
@@ -70,16 +71,16 @@
fd.addString("ID", "doc-0");
fd.addTV("base",
"text",
- "[(0-1)s:c|_1#0-1|>:xip/syntax-dep_rel$<i>6<s>1]" +
- "[(1-2)s:e|_2#1-2|<:xip/syntax-dep_rel$<i>9<s>1|>:xip/syntax-dep_rel$<i>4<s>1]" +
+ "[(0-1)s:c$<s>1|_1#0-1|>:xip/syntax-dep_rel$<i>6<s>1<s>1<s>1]" +
+ "[(1-2)s:e$<s>1|_2#1-2|<:xip/syntax-dep_rel$<i>9<s>1<s>1<s>1|>:xip/syntax-dep_rel$<i>4<s>1<s>1<s>1]" +
"[(2-3)s:c|_3#2-3]" +
- "[(3-4)s:c|s:b|_4#3-4|<:xip/syntax-dep_rel$<i>9<s>1]" +
- "[(4-5)s:e|s:d|_5#4-5|<:xip/syntax-dep_rel$<i>1<s>1]" +
+ "[(3-4)s:c$<s>1|s:b$<s>2|_4#3-4|<:xip/syntax-dep_rel$<i>9<s>1<s>1<s>1]" +
+ "[(4-5)s:e$<s>1|s:d$<s>2|_5#4-5|<:xip/syntax-dep_rel$<i>1<s>1<s>1<s>1]" +
"[(5-6)s:c|_6#5-6]" +
- "[(6-7)s:d|_7#6-7|<:xip/syntax-dep_rel$<i>1<s>1]" +
+ "[(6-7)s:d$<s>1|_7#6-7|<:xip/syntax-dep_rel$<i>1<s>1<s>1<s>1]" +
"[(7-8)s:e|_8#7-8]" +
"[(8-9)s:e|s:b|_9#8-9]" +
- "[(9-10)s:d|_10#9-10|>:xip/syntax-dep_rel$<i>1<s>2|>:xip/syntax-dep_rel$<i>3<s>1]");
+ "[(9-10)s:d$<s>1|_10#9-10|>:xip/syntax-dep_rel$<i>1<s>2<s>1<s>1|>:xip/syntax-dep_rel$<i>3<s>1<s>1<s>1]");
return fd;
}
@@ -88,19 +89,23 @@
fd.addString("ID", "doc-1");
fd.addTV("base",
"text",
- "[(0-1)s:c|_1#0-1|>:xip/syntax-dep_rel$<i>3<i>6<i>9<s>2|>:xip/syntax-dep_rel$<i>6<i>9<s>1]" +
+ "[(0-1)s:c$<s>2|<>:p$#0-3$<i>3<s>1|_1#0-1|" +
+ ">:xip/syntax-dep_rel$<i>3<i>6<i>9<s>2<s>1<s>1|" +
+ ">:xip/syntax-dep_rel$<i>6<i>9<s>1<s>2<s>1|" +
"r@:func=subj$<s>2]" +
- "[(1-2)s:e|_2#1-2]" +
+ "[(1-2)s:e|_2#1-2|<>:p#1-3$<i>3<s>1]" +
"[(2-3)s:c|_3#2-3]" +
"[(3-4)s:c|s:b|_4#3-4]" +
"[(4-5)s:e|s:d|_5#4-5]" +
"[(5-6)s:c|_6#5-6]" +
- "[(6-7)s:d|_7#6-7|<:xip/syntax-dep_rel$<i>9<b>0<i>0<s>1|>:xip/syntax-dep_rel$<i>9<b>0<i>9<s>3|" +
- "<:xip/syntax-dep_rel$<i>9<i>1<i>3<s>2|" +
+ "[(6-7)s:d$<s>2|<>:p$#6-9$<i>9<s>1|_7#6-7|" +
+ "<:xip/syntax-dep_rel$<i>9<b>0<i>1<s>1<s>1<s>2|" +
+ ">:xip/syntax-dep_rel$<i>9<b>0<i>9<s>3<s>1<s>1|" +
+ "<:xip/syntax-dep_rel$<i>9<i>1<i>3<s>2<s>1<s>1|" +
"r@:func=obj$<s>2]" +
"[(7-8)s:e|_8#7-8]" +
"[(8-9)s:e|s:b|_9#8-9]" +
- "[(9-10)s:d|_10#9-10|<:xip/syntax-dep_rel$<i>6<i>9<s>2]");
+ "[(9-10)s:d$<s>1|_10#9-10|<:xip/syntax-dep_rel$<i>6<i>9<s>2<s>1<s>1]");
return fd;
}
@@ -109,42 +114,62 @@
fd.addString("ID", "doc-2");
fd.addTV("base",
"Ich kaufe die Blümen für meine Mutter.",
- "[(0-3)s:Ich|_0#0-3|pos:NN|<>:s#0-38$<i>7<s>-1|<>:np#0-3$<i>1<s>-1|" +
- ">:child-of$<i>0<i>7<s>1|>:child-of$<i>0<i>1<s>6|" +
- "<:child-of$<i>1<b>0<i>1<s>3|<:child-of$<i>7<i>0<i>1<s>4|<:child-of$<i>7<i>1<i>7<s>5|" +
- "<:dep$<i>1<s>2|" +
+ "[(0-3)s:Ich|_1#0-3|pos:NN$<s>1|<>:s#0-38$<i>7<s>2|<>:np#0-3$<i>1<s>3|" +
+ ">:child-of$<i>0<i>7<s>1<s>3<s>2|" +
+ ">:child-of$<i>0<i>1<s>6<s>1<s>3|" +
+ "<:child-of$<i>1<b>0<i>1<s>3<s>3<s>1|" +
+ "<:child-of$<i>7<i>0<i>1<s>4<s>2<s>3|" +
+ "<:child-of$<i>7<i>1<i>7<s>5<s>2<s>2|" +
+ "<:dep$<i>2<s>2<s>1<s>1|" +
"r@:func=sbj$<s>1]" +
- "[(1-2)s:kaufe|_1#4-9|pos:V|<>:vp#4-38$<i>7<s>-1|" +
- ">:child-of$<i>7<i>0<i>7<s>1|>:child-of$<i>1<i>7<s>2|" +
- "<:child-of$<i>7<b>0<i>1<s>5|<:child-of$<i>7<i>3<i>7<s>6|" +
- ">:dep$<i>0<s>3|>:dep$<i>3<s>4]" +
+ "[(1-2)s:kaufe|_2#4-9|pos:V$<s>1|<>:vp#4-38$<i>7<s>2|" +
+ ">:child-of$<i>7<i>0<i>7<s>1<s>2<s>2|" +
+ ">:child-of$<i>1<i>7<s>2<s>1<s>2|" +
+ "<:child-of$<i>7<b>0<i>2<s>5<s>2<s>1|" +
+ "<:child-of$<i>7<i>2<i>7<s>6<s>2<s>4|" +
+ ">:dep$<i>1<s>3<s>1<s>1|" +
+ ">:dep$<i>4<s>4<s>1<s>1]" +
- "[(2-3)s:die|_2#10-13|pos:ART|tt:DET|<>:np#10-20$<i>4<s>-1|<>:np#10-38$<i>7<s>-1|" +
- ">:child-of$<i>4<i>2<i>7<s>1|>:child-of$<i>2<i>4<s>2|>:child-of$<i>7<i>1<i>7<s>2|" +
- "<:child-of$<i>4<b>0<i>3<s>3|<:child-of$<i>4<b>0<i>4<s>4|<:child-of$<i>7<i>2<i>4<s>5|<:child-of$<i>7<i>4<i>7<s>6|" +
- "<:dep$<i>3<s>3|r@:func=obj$<s>1" +
+ "[(2-3)s:die|_3#10-13|pos:ART$<s>1|tt:DET$<s>2|<>:np#10-20$<i>4<s>3|<>:np#10-38$<i>7<s>4|" +
+ ">:child-of$<i>4<i>2<i>7<s>1<s>3<s>4|" +
+ ">:child-of$<i>2<i>4<s>2<s>1<s>3|" +
+ ">:child-of$<i>7<i>1<i>7<s>2<s>4<s>2|" +
+ "<:child-of$<i>4<b>0<i>3<s>3<s>3<s>1|" +
+ "<:child-of$<i>4<b>0<i>4<s>4<s>3<s>1|" +
+ "<:child-of$<i>7<i>2<i>4<s>5<s>4<s>3|" +
+ "<:child-of$<i>7<i>4<i>7<s>6<s>4<s>2|" +
+ "<:dep$<i>4<s>3<s>1<s>1|" +
+ "r@:func=obj$<s>1" +
"]" +
- "[(3-4)s:Blümen|_3#14-20|pos:NN|" +
- ">:child-of$<i>2<i>4<s>1|" +
- "<:dep$<i>1<s>2|>:dep$<i>2<s>3|>:dep$<i>4<s>4|" +
+ "[(3-4)s:Blümen|_4#14-20|pos:NN$<s>1|" +
+ ">:child-of$<i>2<i>4<s>1<s>1<s>3|" +
+ "<:dep$<i>2<s>2<s>1<s>2|" +
+ ">:dep$<i>3<s>3<s>1<s>1|" +
+ ">:dep$<i>5<s>4<s>1<s>1|" +
"r@:func=head$<s>2]" +
- "[(4-5)s:für|_4#21-24|pos:PREP|<>:pp#21-38$<i>7<s>-1|" +
- ">:child-of$<i>4<i>7<s>1|>:child-of$<i>7<i>2<i>7<s>2|" +
- "<:child-of$<i>7<b>0<i>5<s>4|<:child-of$<i>7<i>5<i>7<s>5|" +
- "<:dep$<i>3<s>1|>:dep$<i>5<s>3" +
+ "[(4-5)s:für|_5#21-24|pos:PREP$<s>1|<>:pp#21-38$<i>7<s>2|" +
+ ">:child-of$<i>4<i>7<s>1<s>1<s>2|" +
+ ">:child-of$<i>7<i>2<i>7<s>2<s>2<s>4|" +
+ "<:child-of$<i>7<b>0<i>5<s>4<s>2<s>1|" +
+ "<:child-of$<i>7<i>5<i>7<s>5<s>2<s>2|" +
+ "<:dep$<i>4<s>1<s>1<s>1|" +
+ ">:dep$<i>7<s>3<s>1<s>1" +
"]" +
- "[(5-6)s:meine|_5#25-30|pos:ART|<>:np#25-38$<i>7<s>-1|" +
- ">:child-of$<i>5<i>7<s>1|>:child-of$<i>7<i>4<i>7<s>2|" +
- "<:child-of$<i>7<b>0<i>6<s>4|<:child-of$<i>7<b>0<i>7<s>5|" +
- "<:dep$<i>7<s>3" +
+ "[(5-6)s:meine|_6#25-30|pos:ART$<s>1|<>:np#25-38$<i>7<s>2|" +
+ ">:child-of$<i>5<i>7<s>1<s>1<s>2|" +
+ ">:child-of$<i>7<i>4<i>7<s>2<s>2<s>2|" +
+ "<:child-of$<i>7<b>0<i>6<s>4<s>2<s>1|" +
+ "<:child-of$<i>7<b>0<i>7<s>5<s>2<s>1|" +
+ "<:dep$<i>7<s>3<s>1<s>1" +
"]" +
- "[(6-7)s:Mutter.|_6#31-38|pos:NN|" +
- ">:child-of$<i>5<i>7<s>1|" +
- ">:dep$<i>5<s>2|<:dep$<i>4<s>3|" +
+ "[(6-7)s:Mutter.|_7#31-38|pos:NN$<s>1|" +
+ ">:child-of$<i>5<i>7<s>1<s>1<s>2|" +
+ ">:dep$<i>6<s>2<s>1<s>1|" +
+ "<:dep$<i>5<s>3<s>1<s>1|" +
"r@:func=head$<s>3]");
return fd;
@@ -234,10 +259,10 @@
// child-of relations
SpanRelationQuery srq = new SpanRelationQuery(
new SpanTermQuery(new Term("base",">:child-of")),true);
- kr = ki.search(srq,(short) 20);
+ /*kr = ki.search(srq,(short) 20);
assertEquals(13, kr.getTotalResults());
-
+ */
// child-of with attr func=sbj
SpanWithAttributeQuery wq =
new SpanWithAttributeQuery(srq,
@@ -421,7 +446,7 @@
assertEquals(7,kr.getMatch(2).getEndPos());
assertEquals(4,kr.getMatch(3).getStartPos());
assertEquals(7,kr.getMatch(3).getEndPos());
- // id problem same like testcase5
+ // id problem same like testcase7 (solved)
}
/** Match left, return right
@@ -453,7 +478,7 @@
assertEquals(7,kr.getMatch(2).getEndPos());
assertEquals(4,kr.getMatch(3).getStartPos());
assertEquals(7,kr.getMatch(3).getEndPos());
- // id problem
+ // id problem (solved)
// return all children of relation targets/ right side
SpanRelationWithVariableQuery rv3 = new SpanRelationWithVariableQuery(
@@ -470,4 +495,88 @@
}*/
}
+
+ /** Relations whose source/target do not embed
+ * its counterparts.
+ * */
+ @Test
+ public void testCase8() throws IOException {
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ // match right
+
+ //return source of dep relations to pos:NN
+ SpanRelationWithVariableQuery rv =new SpanRelationWithVariableQuery(
+ new SpanRelationQuery(
+ new SpanTermQuery(new Term("base",">:dep")),true
+ ),
+ new SpanTermWithIdQuery(new Term("base","pos:NN"), true),
+ true, true);
+ kr = ki.search(rv,(short) 10);
+ assertEquals(3, kr.getTotalResults());
+ assertEquals(1,kr.getMatch(0).getStartPos());
+ assertEquals(2,kr.getMatch(0).getEndPos());
+ assertEquals(1,kr.getMatch(1).getStartPos());
+ assertEquals(2,kr.getMatch(1).getEndPos());
+ assertEquals(4,kr.getMatch(2).getStartPos());
+ assertEquals(5,kr.getMatch(2).getEndPos());
+
+ //return target of dep relations from pos:NN
+ rv =new SpanRelationWithVariableQuery(
+ new SpanRelationQuery(
+ new SpanTermQuery(new Term("base","<:dep")),true
+ ),
+ new SpanTermWithIdQuery(new Term("base","pos:NN"),true),
+ true, true);
+ kr = ki.search(rv,(short) 10);
+ assertEquals(3, kr.getTotalResults());
+ assertEquals(2,kr.getMatch(0).getStartPos());
+ assertEquals(3,kr.getMatch(0).getEndPos());
+ assertEquals(4,kr.getMatch(1).getStartPos());
+ assertEquals(5,kr.getMatch(1).getEndPos());
+ assertEquals(5,kr.getMatch(2).getStartPos());
+ assertEquals(6,kr.getMatch(2).getEndPos());
+
+ // matchleft
+
+ //return target of dep relations from pos:NN
+ rv =new SpanRelationWithVariableQuery(
+ new SpanRelationQuery(
+ new SpanTermQuery(new Term("base",">:dep")),true
+ ),
+ new SpanTermWithIdQuery(new Term("base","pos:NN"),true),
+ false, true);
+ kr = ki.search(rv,(short) 10);
+
+ assertEquals(3, kr.getTotalResults());
+ assertEquals(2,kr.getMatch(0).getStartPos());
+ assertEquals(3,kr.getMatch(0).getEndPos());
+ assertEquals(4,kr.getMatch(1).getStartPos());
+ assertEquals(5,kr.getMatch(1).getEndPos());
+ assertEquals(5,kr.getMatch(2).getStartPos());
+ assertEquals(6,kr.getMatch(2).getEndPos());
+
+ //return source of dep relations to pos:NN
+ rv =new SpanRelationWithVariableQuery(
+ new SpanRelationQuery(
+ new SpanTermQuery(new Term("base","<:dep")),true
+ ),
+ new SpanTermWithIdQuery(new Term("base","pos:NN"),true),
+ false, true);
+ kr = ki.search(rv,(short) 10);
+
+ /*for (KorapMatch km : kr.getMatches()){
+ System.out.println(km.getStartPos() +","+km.getEndPos()+" "
+ +km.getSnippetBrackets());
+ }*/
+
+ assertEquals(3, kr.getTotalResults());
+ assertEquals(1,kr.getMatch(0).getStartPos());
+ assertEquals(2,kr.getMatch(0).getEndPos());
+ assertEquals(1,kr.getMatch(1).getStartPos());
+ assertEquals(2,kr.getMatch(1).getEndPos());
+ assertEquals(4,kr.getMatch(2).getStartPos());
+ assertEquals(5,kr.getMatch(2).getEndPos());
+ }
}