Added SpanReferenceQuery.
Change-Id: Ica0fc48c4e96b3871dc3d05839d1e7ba46f941da
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanReferenceQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanReferenceQuery.java
new file mode 100644
index 0000000..9c2484d
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanReferenceQuery.java
@@ -0,0 +1,61 @@
+package de.ids_mannheim.korap.query;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.spans.ReferenceSpans;
+
+public class SpanReferenceQuery extends SimpleSpanQuery {
+
+ private byte classNum;
+
+ public SpanReferenceQuery (SpanQuery firstClause, byte classNum,
+ boolean collectPayloads) {
+ super(firstClause, collectPayloads);
+ this.classNum = classNum;
+ }
+
+
+ @Override
+ public SimpleSpanQuery clone () {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+
+ @Override
+ public Spans getSpans (AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts) throws IOException {
+ // TODO Auto-generated method stub
+ return new ReferenceSpans(this, context, acceptDocs, termContexts);
+ }
+
+
+ @Override
+ public String toString (String field) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("spanReference(");
+ sb.append(firstClause.toString());
+ sb.append(", ");
+ sb.append(classNum);
+ sb.append(")");
+ return sb.toString();
+ }
+
+
+ public byte getClassNum() {
+ return classNum;
+ }
+
+ public void setClassNum(byte classNum) {
+ this.classNum = classNum;
+ }
+
+}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/ReferenceSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/ReferenceSpans.java
new file mode 100644
index 0000000..a860afd
--- /dev/null
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/ReferenceSpans.java
@@ -0,0 +1,93 @@
+package de.ids_mannheim.korap.query.spans;
+
+import static de.ids_mannheim.korap.util.KrillByte.byte2int;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.util.Bits;
+
+import de.ids_mannheim.korap.query.SpanReferenceQuery;
+
+public class ReferenceSpans extends SimpleSpans {
+
+ private byte classNum;
+
+ public ReferenceSpans (SpanReferenceQuery query,
+ AtomicReaderContext context, Bits acceptDocs,
+ Map<Term, TermContext> termContexts)
+ throws IOException {
+ super(query, context, acceptDocs, termContexts);
+ this.classNum = query.getClassNum();
+ hasMoreSpans = firstSpans.next();
+ }
+
+
+ @Override
+ public boolean next () throws IOException {
+ while (hasMoreSpans) {
+ if (hasSameClassPosition()) {
+ matchStartPosition = firstSpans.start();
+ matchEndPosition = firstSpans.end();
+ matchDocNumber = firstSpans.doc();
+ hasMoreSpans = firstSpans.next();
+ return true;
+ }
+ hasMoreSpans = firstSpans.next();
+ }
+ return false;
+ }
+
+
+ private boolean hasSameClassPosition () throws IOException {
+ int start=0, end=0;
+ boolean isFound = false;
+ boolean match = false;
+
+ matchPayload.clear();
+
+ for (byte[] payload : firstSpans.getPayload()) {
+ if (payload.length == 9 && payload[8] == classNum) {
+ if (isFound) {
+ if (start == byte2int(payload, 0) && end == byte2int(payload, 4)){
+ match = true;
+ continue;
+ }
+ match = false;
+ break;
+ }
+
+ start = byte2int(payload, 0);
+ end = byte2int(payload, 4);
+ isFound = true;
+ matchPayload.add(payload);
+ }
+ else {
+ matchPayload.add(payload);
+ }
+ }
+ return match;
+ }
+
+
+ @Override
+ public boolean skipTo (int target) throws IOException {
+ if (hasMoreSpans && (firstSpans.doc() < target)) {
+ if (!firstSpans.skipTo(target)) {
+ hasMoreSpans = false;
+ return false;
+ }
+ }
+ return next();
+ }
+
+
+ @Override
+ public long cost() {
+ return firstSpans.cost();
+ }
+
+}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java
new file mode 100644
index 0000000..2a167a0
--- /dev/null
+++ b/src/test/java/de/ids_mannheim/korap/index/TestReferenceIndex.java
@@ -0,0 +1,324 @@
+package de.ids_mannheim.korap.index;
+
+import static de.ids_mannheim.korap.TestSimple.getJSONQuery;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.junit.Test;
+
+import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.query.DistanceConstraint;
+import de.ids_mannheim.korap.query.SpanClassQuery;
+import de.ids_mannheim.korap.query.SpanDistanceQuery;
+import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanFocusQuery;
+import de.ids_mannheim.korap.query.SpanNextQuery;
+import de.ids_mannheim.korap.query.SpanReferenceQuery;
+import de.ids_mannheim.korap.query.SpanRelationMatchQuery;
+import de.ids_mannheim.korap.query.SpanRelationQuery;
+import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.QueryException;
+
+public class TestReferenceIndex {
+ private KrillIndex ki;
+ private Result kr;
+
+ @Test
+ public void testCase1 () throws IOException {
+ ki = new KrillIndex();
+ ki.addDoc(TestRelationIndex.createFieldDoc2());
+ ki.commit();
+
+ SpanTermQuery seq1 = new SpanTermQuery(new Term("base", "pos:V"));
+ SpanElementQuery seq2 = new SpanElementQuery("base", "np");
+ SpanClassQuery scq1 = new SpanClassQuery(seq1, (byte) 1);
+ SpanClassQuery scq2 = new SpanClassQuery(seq2, (byte) 2);
+ SpanNextQuery snq1 = new SpanNextQuery(scq1, scq2);
+
+ SpanFocusQuery sfq1 = new SpanFocusQuery(snq1, (byte) 2);
+
+ SpanRelationQuery srq = new SpanRelationQuery(new SpanTermQuery(
+ new Term("base", "<:child-of")), true);
+ // SpanSegmentQuery ssq = new SpanSegmentQuery(srq, sfq1,
+ // true);
+ // SpanFocusQuery sfq2 = new SpanFocusQuery(ssq, (byte) 1);
+ // sfq2.setSorted(false);
+ // sfq2.setMatchTemporaryClass(false);
+
+ SpanElementQuery seq3 = new SpanElementQuery("base", "pp");
+ SpanClassQuery scq3 = new SpanClassQuery(seq3, (byte) 3);
+ // SpanSegmentQuery ssq2 = new SpanSegmentQuery(sfq2, scq3,
+ // true);
+
+ SpanRelationMatchQuery rq = new SpanRelationMatchQuery(srq, sfq1, scq3, true);
+
+ // System.out.println(rq.toString());
+ SpanFocusQuery sfq3 = new SpanFocusQuery(rq, (byte) 1);
+
+ DistanceConstraint constraint = new DistanceConstraint(3, 3, true,
+ false);
+ SpanDistanceQuery sdq = new SpanDistanceQuery(sfq3, scq3, constraint,
+ true);
+
+ SpanReferenceQuery ref = new SpanReferenceQuery(sdq, (byte) 3, true);
+ // System.out.println(ref.toString());
+
+ kr = ki.search(ref, (short) 10);
+ // for (Match km : kr.getMatches()) {
+ // System.out.println(km.getStartPos() + "," + km.getEndPos()
+ // + " "
+ // + km.getSnippetBrackets());
+ // }
+ assertEquals(
+ "spanReference(spanDistance(focus(1: focus(#[1,2]spanSegment("
+ + "focus(#1: spanSegment(spanRelation(base:<:child-of), focus(2: spanNext("
+ + "{1: base:pos:V}, {2: <base:np />})))), {3: <base:pp />}))), "
+ + "{3: <base:pp />}, [(w[3:3], ordered, notExcluded)]), 3)",
+ ref.toString());
+
+ assertEquals(1, kr.getMatch(0).getStartPos());
+ assertEquals(7, kr.getMatch(0).getEndPos());
+ }
+
+ @Test
+ public void testCase2() throws IOException, QueryException {
+
+ String filepath = getClass().getResource(
+ "/queries/reference/distance-reference.jsonld").getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+
+ // cat="vb" & cat="prp" & cat="nn" & #1 .notordered #2 & #1
+ // .{0,2} #3 & #3 -> #2
+
+ assertEquals(
+ "spanReference(focus(#[1,2]spanSegment(focus(#2: "
+ + "spanSegment(spanRelation(tokens:>:stanford/d:tag), "
+ + "focus(3: spanDistance(focus(1: spanDistance({1: <tokens:vb />}, "
+ + "{2: <tokens:prp />}, [(w[0:1], notOrdered, notExcluded)])), "
+ + "{3: <tokens:nn />}, [(w[0:2], notOrdered, notExcluded)])))), "
+ + "{2: <tokens:prp />})), 2)",
+ sq.toString());
+
+ SpanElementQuery seq1 = new SpanElementQuery("tokens", "vb");
+ // new SpanTermQuery(new Term("tokens", "c:vb"));
+ SpanElementQuery seq2 = new SpanElementQuery("tokens", "prp");
+ // new SpanTermQuery(new Term("tokens", "c:prp"));
+ SpanElementQuery seq3 = new SpanElementQuery("tokens", "nn");
+ // new SpanTermQuery(new Term("tokens", "c:nn"));
+ SpanClassQuery scq1 = new SpanClassQuery(seq1, (byte) 1);
+ SpanClassQuery scq2 = new SpanClassQuery(seq2, (byte) 2);
+ SpanClassQuery scq3 = new SpanClassQuery(seq3, (byte) 3);
+
+ // vb .{0,1} prp
+ SpanDistanceQuery sdq1 = new SpanDistanceQuery(scq1, scq2,
+ new DistanceConstraint(0, 1, false, false), true);
+ SpanFocusQuery sfq1 = new SpanFocusQuery(sdq1, (byte) 1);
+
+ // vb .{0,2} nn
+ SpanDistanceQuery sdq2 = new SpanDistanceQuery(sfq1, scq3,
+ new DistanceConstraint(0, 2, false, false), true);
+ SpanFocusQuery sfq2 = new SpanFocusQuery(sdq2, (byte) 3);
+
+ // nn -> prp
+ SpanRelationQuery srq = new SpanRelationQuery(new SpanTermQuery(
+ new Term("tokens", ">:stanford/d:tag")), true);
+ SpanRelationMatchQuery rq = new SpanRelationMatchQuery(srq, sfq2, scq2,
+ true);
+
+ SpanReferenceQuery ref = new SpanReferenceQuery(rq, (byte) 2, true);
+
+ ki = new KrillIndex();
+ ki.addDoc(createFieldDoc0());
+ ki.commit();
+
+ kr = ki.search(ref, (short) 10);
+ // for (Match km : kr.getMatches()) {
+ // System.out.println(km.getStartPos() + "," + km.getEndPos()
+ // + " "
+ // + km.getSnippetBrackets());
+ // }
+
+ assertEquals(sq.toString(), ref.toString());
+ assertEquals(2, kr.getMatch(0).getStartPos());
+ assertEquals(4, kr.getMatch(0).getEndPos());
+ assertEquals(5, kr.getMatch(1).getStartPos());
+ assertEquals(9, kr.getMatch(1).getEndPos());
+ assertEquals(11, kr.getMatch(2).getStartPos());
+ assertEquals(13, kr.getMatch(2).getEndPos());
+
+ // multiple references
+
+ SpanFocusQuery sfq3 = new SpanFocusQuery(ref, (byte) 1);
+ // vp -> nn
+ SpanRelationMatchQuery rq2 = new SpanRelationMatchQuery(srq, sfq3,
+ scq3, true);
+
+ SpanReferenceQuery ref2 = new SpanReferenceQuery(rq2, (byte) 3, true);
+
+ kr = ki.search(ref2, (short) 10);
+ assertEquals(1, kr.getMatch(0).getStartPos());
+ assertEquals(4, kr.getMatch(0).getEndPos());
+ assertEquals(10, kr.getMatch(1).getStartPos());
+ assertEquals(13, kr.getMatch(1).getEndPos());
+
+ // for (Match km : kr.getMatches()) {
+ // System.out.println(km.getStartPos() + "," + km.getEndPos()
+ // + " "
+ // + km.getSnippetBrackets());
+ // }
+ }
+
+ // multiple references
+ @Test
+ public void testCase3() throws IOException, QueryException {
+ ki = new KrillIndex();
+ ki.addDoc(createFieldDoc0());
+ ki.commit();
+
+ String filepath = getClass().getResource(
+ "/queries/reference/distance-multiple-references.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+
+ // 'cat="vb" & cat="prp" & cat="nn" & #1 .{0,1} #2 & #1 .{0,2}
+ // #3 & #3 ->stanford/d #2 & #1 ->stanford #3' annis
+ // without layer=c and + relation key
+ assertEquals(
+ "spanReference(focus(#[1,2]spanSegment(focus(#2: spanSegment(spanRelation(tokens:>:stanford/d:tag), "
+ + "focus(1: spanReference(focus(#[1,2]spanSegment(focus(#2: spanSegment(spanRelation(tokens:>:stanford/d:tag), "
+ + "focus(3: spanDistance(focus(1: spanDistance({1: <tokens:vb />}, {2: <tokens:prp />}, "
+ + "[(w[0:1], notOrdered, notExcluded)])), {3: <tokens:nn />}, [(w[0:2], notOrdered, notExcluded)])))), "
+ + "{2: <tokens:prp />})), 2)))), {3: <tokens:nn />})), 3)",
+ sq.toString());
+ kr = ki.search(sq, (short) 10);
+ // for (Match km : kr.getMatches()) {
+ // System.out.println(km.getStartPos() + "," + km.getEndPos()
+ // + " "
+ // + km.getSnippetBrackets());
+ // }
+ assertEquals(1, kr.getMatch(0).getStartPos());
+ assertEquals(4, kr.getMatch(0).getEndPos());
+ assertEquals(10, kr.getMatch(1).getStartPos());
+ assertEquals(13, kr.getMatch(1).getEndPos());
+ }
+
+ // multiple document
+ @Test
+ public void testCase4() throws Exception {
+ ki = new KrillIndex();
+ ki.addDoc(createFieldDoc0());
+ ki.addDoc(createFieldDoc1());
+ ki.commit();
+
+ String filepath = getClass().getResource(
+ "/queries/reference/distance-reference.jsonld")
+ .getFile();
+ SpanQueryWrapper sqwi = getJSONQuery(filepath);
+ SpanQuery sq = sqwi.toQuery();
+
+ kr = ki.search(sq, (short) 10);
+
+ assertEquals(4, kr.getTotalResults());
+ assertEquals("doc-1", kr.getMatch(3).getDocID());
+ assertEquals(2, kr.getMatch(3).getStartPos());
+ assertEquals(4, kr.getMatch(3).getEndPos());
+ }
+
+ public static FieldDocument createFieldDoc1() {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV(
+ "tokens",
+ "Frankenstein, treat my daughter well. She is the one that saved your master who you hold so dear.",
+
+ "[(0-12)s:Frankenstein|_0#0-12|<>:nn#0-12$<i>1<s>18|<>:s#0-37$<i>4<s>1|"
+ + "<>:np#0-13$<i>1<s>2|"
+ + "<:stanford/d:tag$<i>1<s>1<s>18<s>19]"
+
+ + "[(14-19)s:treat|_1#14-19|<>:vb#14-19$<i>2<s>19|<>:vp#14-36$<i>4<s>3|"
+ + ">:stanford/d:tag$<i>0<s>2<s>19<s>18|"
+ + ">:stanford/d:tag$<i>3<s>3<s>19<s>21|"
+ + ">:stanford/d:tag$<i>4<s>4<s>19<s>22]"
+
+ + "[(20-22)s:my|_2#20-22|<>:prp#20-22$<i>3<s>20|<>:np#20-31$<i>3<s>4]"
+
+ + "[(23-31)s:daughter|_3#23-31|<>:nn#23-31$<i>4<s>21|"
+ + ">:stanford/d:tag$<i>2<s>5<s>21<s>20]"
+
+ + "[(32-36)s:well|_4#32-36|<>:rb#32-36$<i>5<s>22|<>:advp#32-36$<i>4<s>5]");
+ return fd;
+ }
+
+ public static FieldDocument createFieldDoc0 () {
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-0");
+ fd.addTV(
+ "tokens",
+ "Frankenstein, treat my daughter well. She is the one that saved your master who you hold so dear.",
+
+ "[(0-12)s:Frankenstein|_0#0-12|<>:nn#0-12$<i>1<s>18|<>:s#0-37$<i>4<s>1|"
+ + "<>:np#0-13$<i>1<s>2|"
+ + "<:stanford/d:tag$<i>1<s>1<s>18<s>19]"
+
+ + "[(14-19)s:treat|_1#14-19|<>:vb#14-19$<i>2<s>19|<>:vp#14-36$<i>4<s>3|"
+ + ">:stanford/d:tag$<i>0<s>2<s>19<s>18|"
+ + ">:stanford/d:tag$<i>3<s>3<s>19<s>21|"
+ + ">:stanford/d:tag$<i>4<s>4<s>19<s>22]"
+
+ + "[(20-22)s:my|_2#20-22|<>:prp#20-22$<i>3<s>20|<>:np#20-31$<i>3<s>4]"
+
+ + "[(23-31)s:daughter|_3#23-31|<>:nn#23-31$<i>4<s>21|"
+ + ">:stanford/d:tag$<i>2<s>5<s>21<s>20]"
+
+ + "[(32-36)s:well|_4#32-36|<>:rb#32-36$<i>5<s>22|<>:advp#32-36$<i>4<s>5]"
+
+ + "[(38-41)s:She|_5#38-41|<>:prp#38-41$<i>6<s>23|<>:s#38-97$<i>17<s>6]"
+
+ + "[(42-44)s:is|_6#42-44|<>:vb#42-44$<i>7<s>24|<>:vp#42-96$<i>17<s>7]"
+
+ + "[(45-48)s:the|_7#45-48|<>:dt#45-48$<i>8<s>25|<>:np#45-52$<i>8<s>8|<>:np#45-96$<i>17<s>9]"
+
+ + "[(49-52)s:one|_8#49-52|<>:nn#49-52$<i>9<s>26|"
+ + ">:stanford/d:tag$<i>5<s>6<s>26<s>23|"
+ + ">:stanford/d:tag$<i>6<s>7<s>26<s>24|"
+ + ">:stanford/d:tag$<i>7<s>8<s>26<s>25|"
+ + ">:stanford/d:tag$<i>10<s>9<s>26<s>28]"
+
+ + "[(53-57)s:that|_9#53-57|<>:rp#53-57$<i>10<s>27|<>:sb#53-96$<i>17<s>10]"
+
+ + "[(58-63)s:saved|_10#58-63|<>:vb#58-63$<i>11<s>28|<>:s#58-96$<i>17<s>11|"
+ + "<>:vp#58-96$<i>17<s>12|"
+ + ">:stanford/d:tag$<i>9<s>10<s>28<s>27|"
+ + ">:stanford/d:tag$<i>12<s>11<s>28<s>30|"
+ + ">:stanford/d:tag$<i>15<s>12<s>28<s>33]"
+
+ + "[(64-68)s:your|_11#64-68|<>:prp#64-68$<i>12<s>29|<>:np#64-75$<i>12<s>13]"
+
+ + "[(69-75)s:master|_12#69-75|<>:nn#69-75$<i>13<s>30|"
+ + ">:stanford/d:tag$<i>11<s>13<s>30<s>29]"
+
+ + "[(76-79)s:who|_13#76-79|<>:rp#76-79$<i>14<s>31|<>:sb#76-96$<i>17<s>14]"
+
+ + "[(80-83)s:you|_14#80-83|<>:prp#80-83$<i>15<s>32|<>:s#80-96$<i>17<s>15]"
+
+ + "[(84-88)s:hold|_15#84-88|<>:vb#84-88$<i>16<s>33|<>:vp#84-96$<i>17<s>16|"
+ + ">:stanford/d:tag$<i>13<s>14<s>33<s>31|"
+ + ">:stanford/d:tag$<i>14<s>15<s>33<s>32|"
+ + ">:stanford/d:tag$<i>17<s>16<s>33<s>35]"
+
+ + "[(89-91)s:so|_16#89-91|<>:rb#89-91$<i>17<s>341|<>:adjp#89-96$<i>17<s>17]"
+
+ + "[(92-96)s:dear|_17#92-96|<>:jj#92-96$<i>18<s>35|"
+ + ">:stanford/d:tag$<i>16<s>17<s>35<s>34]"
+ );
+
+ return fd;
+ }
+}