Added RelationSpanQuery - initial implementation
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
new file mode 100644
index 0000000..5bee05f
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanRelationQuery.java
@@ -0,0 +1,46 @@
+package de.ids_mannheim.korap.query;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ToStringUtils;
+
+import de.ids_mannheim.korap.query.spans.RelationSpans;
+
+public class SpanRelationQuery extends SimpleSpanQuery {
+
+ public SpanRelationQuery(SpanQuery firstClause, boolean collectPayloads) {
+ super(firstClause, collectPayloads);
+ }
+
+ @Override
+ public SimpleSpanQuery clone() {
+ SimpleSpanQuery sq = new SpanRelationQuery(
+ (SpanQuery) this.firstClause.clone(),
+ this.collectPayloads);
+ return sq;
+ }
+
+ @Override
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ return new RelationSpans(this, context, acceptDocs, termContexts);
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("spanRelation(");
+ sb.append(firstClause.toString(field));
+ sb.append(")");
+ sb.append(ToStringUtils.boost(getBoost()));
+ return sb.toString();
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
index 5879946..31bc8a9 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ElementSpans.java
@@ -118,12 +118,12 @@
// Copy some payloads like start character and end character
//payloadBuffer.put(payload.bytes, payload.offset, 8);
- cs.setEnd(readEndPostion(payload));
+ cs.setEnd(PayloadReader.readInteger(payload,8));
if (isElementRef ){
// Copy rest of payloads after the end position and elementref
//payloadBuffer.put(payload.bytes, payload.offset + 14, payload.length - 14);
- cs.setElementRef(readElementRef(payload));
+ cs.setElementRef(PayloadReader.readShort(payload,12));
}
else{
// Copy rest of payloads after the end position
@@ -134,7 +134,7 @@
//byte[] offsetCharacters = new byte[8];
//System.arraycopy(payloadBuffer.array(), 0, offsetCharacters, 0, 8);
- cs.setPayloads(Collections.singletonList(readOffset(payload)));
+ cs.setPayloads(Collections.singletonList(PayloadReader.readOffset(payload)));
}
else {
cs.setEnd(cs.getStart());
@@ -143,31 +143,6 @@
}
}
-
- /** Get the offset bytes from the payload.
- * */
- private byte[] readOffset(BytesRef payload){
- byte[] b = new byte[8];
- System.arraycopy(payload.bytes, payload.offset, b, 0, 8);
- return b;
- }
-
- /** Get the end position bytes from the payload and cast it to int.
- * */
- private int readEndPostion(BytesRef payload) {
- byte[] b = new byte[4];
- System.arraycopy(payload.bytes, payload.offset + 8, b, 0, 4);
- return ByteBuffer.wrap(b).getInt();
- }
-
- /** Get the elementRef bytes from the payload and cast it into short.
- * */
- private short readElementRef(BytesRef payload) {
- byte[] b = new byte[2];
- System.arraycopy(payload.bytes, payload.offset + 12, b, 0, 2);
- return ByteBuffer.wrap(b).getShort();
- }
-
@Override
public boolean skipTo(int target) throws IOException {
if (hasMoreSpans && (firstSpans.doc() < target)){
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/PayloadReader.java b/src/main/java/de/ids_mannheim/korap/query/spans/PayloadReader.java
new file mode 100644
index 0000000..a69a6d5
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/PayloadReader.java
@@ -0,0 +1,32 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.nio.ByteBuffer;
+
+import org.apache.lucene.util.BytesRef;
+
+public class PayloadReader {
+
+ /** Get the offset bytes from the payload.
+ * */
+ public static byte[] readOffset(BytesRef payload){
+ byte[] b = new byte[8];
+ System.arraycopy(payload.bytes, payload.offset, b, 0, 8);
+ return b;
+ }
+
+ /** Get the end position bytes from the payload and cast it to int.
+ * */
+ public static int readInteger(BytesRef payload, int start) {
+ byte[] b = new byte[4];
+ System.arraycopy(payload.bytes, payload.offset + start, b, 0, 4);
+ return ByteBuffer.wrap(b).getInt();
+ }
+
+ /** Get the elementRef bytes from the payload and cast it into short.
+ * */
+ public static short readShort(BytesRef payload, int start) {
+ byte[] b = new byte[2];
+ System.arraycopy(payload.bytes, payload.offset + start, b, 0, 2);
+ return ByteBuffer.wrap(b).getShort();
+ }
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
new file mode 100644
index 0000000..24bbd82
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/RelationSpans.java
@@ -0,0 +1,236 @@
+package de.ids_mannheim.korap.query.spans;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.search.spans.TermSpans;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import de.ids_mannheim.korap.query.SpanRelationQuery;
+import de.ids_mannheim.korap.query.spans.ElementSpans.CandidateElementSpans;
+
+/** Enumeration of spans denoting relations between two tokens/elements. The start and end of
+ * a RelationSpan always denote the start and end of the source token/element.
+ *
+ * There are 4 types of relations, which is differentiated by the payload length in bytes.
+ * 1. Token to token relation (1 int & 1 short, length: 6)
+ * 2. Token to span (2 int & 1 short, length: 10)
+ * 3. Span to token (int, byte, int, short, length: 11)
+ * 4. Span to Span (3 int & 1 short, length: 13)
+ *
+ * Every integer value denotes the start/end position of the start/target of a relation,
+ * in this format: (sourceEndPos?, startTargetPos, endTargetPos?). The end position of a token is
+ * identical to its start position, and therefore not is saved in a payload.
+ *
+ * A short value denote the relation id, used for matching relation-attributes.
+ * The byte in relation #3 is just a dummy to create a different length from the relation #2.
+ *
+ * NOTE: Sorting of the candidate spans can alternatively be done in indexing, instead of here.
+ *
+ * @author margaretha
+ * */
+public class RelationSpans extends SimpleSpans{
+
+ short relationId;
+ int targetStart, targetEnd;
+ int currentDoc, currentPosition;
+
+ private TermSpans relationTermSpan;
+
+ protected Logger logger = LoggerFactory.getLogger(RelationSpans.class);
+ private List<CandidateRelationSpan> candidateList;
+
+ public RelationSpans(SpanRelationQuery relationSpanQuery,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ super(relationSpanQuery, context, acceptDocs, termContexts);
+ candidateList = new ArrayList<>();
+ relationTermSpan = (TermSpans) firstSpans;
+ hasMoreSpans = relationTermSpan.next();
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ while(hasMoreSpans || !candidateList.isEmpty()){
+ if (!candidateList.isEmpty()){
+ CandidateRelationSpan cs = candidateList.get(0);
+ this.matchDocNumber = cs.getDoc();
+ this.matchStartPosition = cs.getStart();
+ this.matchEndPosition = cs.getEnd();
+ this.matchPayload = cs.getPayloads();
+ candidateList.remove(0);
+ return true;
+ }
+ else{
+ setCandidateList();
+ currentDoc = relationTermSpan.doc();
+ currentPosition = relationTermSpan.start();
+ }
+ }
+ return false;
+ }
+
+ private void setCandidateList() throws IOException {
+ while (hasMoreSpans && relationTermSpan.doc() == currentDoc &&
+ relationTermSpan.start() == currentPosition){
+ CandidateRelationSpan cs = new CandidateRelationSpan(relationTermSpan);
+ readPayload(cs);
+
+ candidateList.add(cs);
+ hasMoreSpans = relationTermSpan.next();
+ }
+ Collections.sort(candidateList);
+
+ /*for (CandidateRelationSpan cs:candidateList){
+ System.out.println(cs.getStart()+","+cs.getEnd() //+" <size:" +payload.get(0).length
+ +" target "+cs.getTargetStart()+","+cs.getTargetEnd() +" id:"+cs.getRelationId());
+ }*/
+ }
+
+ private void readPayload(CandidateRelationSpan cs) {
+ List<byte[]> payload = (List<byte[]>) cs.getPayloads();
+ int length = payload.get(0).length;
+ BytesRef payloadBytesRef = new BytesRef(payload.get(0));
+
+ int i;
+
+ switch (length) {
+ case 6: // Token to token
+ i = PayloadReader.readInteger(payloadBytesRef,0);
+ cs.setTargetStart(i);
+ cs.setTargetEnd(i);
+ break;
+
+ case 10: // Token to span
+ cs.setTargetStart(PayloadReader.readInteger(payloadBytesRef,0));
+ cs.setTargetEnd(PayloadReader.readInteger(payloadBytesRef,4));
+ break;
+
+ case 11: // Span to token
+ cs.setEnd(PayloadReader.readInteger(payloadBytesRef,0));
+ i = PayloadReader.readInteger(payloadBytesRef,5);
+ cs.setTargetStart(i);
+ cs.setTargetEnd(i);
+ break;
+
+ case 13: // Span to span
+ cs.setEnd(PayloadReader.readInteger(payloadBytesRef,0));
+ cs.setTargetStart(PayloadReader.readInteger(payloadBytesRef,4));
+ cs.setTargetEnd(PayloadReader.readInteger(payloadBytesRef,8));
+ break;
+ }
+
+ cs.setRelationId(PayloadReader.readShort(payloadBytesRef, length-2));
+ }
+
+ @Override
+ public boolean skipTo(int arg0) throws IOException {
+ // TODO Auto-generated method stub
+ return false;
+ }
+
+ @Override
+ public long cost() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ public short getRelationId() {
+ return relationId;
+ }
+
+ public void setRelationId(short relationId) {
+ this.relationId = relationId;
+ }
+
+ public int getTargetStart() {
+ return targetStart;
+ }
+
+ public void setTargetStart(int targetStart) {
+ this.targetStart = targetStart;
+ }
+
+ public int getTargetEnd() {
+ return targetEnd;
+ }
+
+ public void setTargetEnd(int targetEnd) {
+ this.targetEnd = targetEnd;
+ }
+
+
+ class CandidateRelationSpan extends CandidateSpan implements Comparable<CandidateSpan>{
+
+ private int targetStart, targetEnd, relationId;
+
+ public CandidateRelationSpan(Spans span) throws IOException{
+ super(span);
+ }
+
+ /*public CandidateRelationSpan(Spans span, int targetStart, int targetEnd) throws IOException{
+ super(span);
+ setTargetStart(targetStart);
+ setTargetEnd(targetEnd);
+ }*/
+
+ @Override
+ public int compareTo(CandidateSpan o) {
+
+ int sourcePositionComparison = super.compareTo(o);
+
+ CandidateRelationSpan cs = (CandidateRelationSpan) o;
+ if (sourcePositionComparison == 0){
+ if (this.getTargetStart() == cs.getTargetStart()){
+ if (this.getTargetEnd() == cs.getTargetEnd())
+ return 0;
+ if (this.getTargetEnd() > cs.getTargetEnd() )
+ return 1;
+ else return -1;
+ }
+ else if (this.getTargetStart() < cs.getTargetStart())
+ return -1;
+ else return 1;
+ }
+
+ return sourcePositionComparison;
+ }
+
+ public int getTargetEnd() {
+ return targetEnd;
+ }
+
+ public void setTargetEnd(int targetEnd) {
+ this.targetEnd = targetEnd;
+ }
+
+ public int getTargetStart() {
+ return targetStart;
+ }
+
+ public void setTargetStart(int targetStart) {
+ this.targetStart = targetStart;
+ }
+
+ public int getRelationId() {
+ return relationId;
+ }
+
+ public void setRelationId(int relationId) {
+ this.relationId = relationId;
+ }
+ }
+
+}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
index b88a10b..4bbec84 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -1,3 +1,15 @@
+package de.ids_mannheim.korap.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.junit.Test;
+
+import de.ids_mannheim.korap.KorapIndex;
+import de.ids_mannheim.korap.KorapResult;
+import de.ids_mannheim.korap.query.SpanRelationQuery;
+
/*
within(x,y)
@@ -36,3 +48,42 @@
*/
+
+public class TestRelationIndex {
+ private KorapIndex ki;
+ private KorapResult kr;
+
+ public TestRelationIndex() throws IOException {
+ ki = new KorapIndex();
+ }
+
+ private FieldDocument createFieldDoc0(){
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-0");
+ fd.addTV("base",
+ "text",
+ "[(0-1)s:c|_1#0-1|>:xip/syntax-dep_rel$<i>7<s>1]" +
+ "[(1-2)s:e|_2#1-2|<:xip/syntax-dep_rel$<i>10<s>1|>:xip/syntax-dep_rel$<i>5<s>1]" +
+ "[(2-3)s:c|_3#2-3]" +
+ "[(3-4)s:c|s:b|_4#3-4|<:xip/syntax-dep_rel$<i>10<s>1]" +
+ "[(4-5)s:e|s:d|_5#4-5|<:xip/syntax-dep_rel$<i>2<s>1]" +
+ "[(5-6)s:c|_6#5-6]" +
+ "[(6-7)s:d|_7#6-7|<:xip/syntax-dep_rel$<i>1<s>1]" +
+ "[(7-8)s:e|_8#7-8]" +
+ "[(8-9)s:e|s:b|_9#8-9]" +
+ "[(9-10)s:d|_10#9-10|>:xip/syntax-dep_rel$<i>2<s>2|>:xip/syntax-dep_rel$<i>1<s>1]");
+ return fd;
+ }
+
+ /** Test token-token relation
+ * */
+ @Test
+ public void testCase1() throws IOException {
+ ki.addDoc(createFieldDoc0());
+ ki.commit();
+
+ SpanRelationQuery sq = new SpanRelationQuery(
+ new SpanTermQuery(new Term("base",">:xip/syntax-dep_rel")),true);
+ ki.search(sq,(short) 10);
+ }
+}