Added skipto test for DistanceExclusion,
added elementEnd payload for attributeSpans
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
index 6a32009..74aafc7 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/AttributeSpans.java
@@ -35,12 +35,10 @@
private int currentDoc, currentPosition;
private short spanId;
private boolean isFinish;
+ private int elementEnd;
protected Logger logger = LoggerFactory.getLogger(AttributeSpans.class);
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
public AttributeSpans(SpanAttributeQuery simpleSpanQuery,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts) throws IOException {
@@ -73,12 +71,12 @@
this.matchStartPosition = cs.getStart();
this.matchEndPosition = cs.getEnd();
this.setSpanId(cs.getSpanId());
+ this.setElementEnd(cs.getElementEnd());
candidateList.remove(0);
return true;
}
else{
- if (DEBUG)
- logger.info("Setting candidate list");
+ //logger.info("Setting candidate list");
setCandidateList();
// for (CandidateAttributeSpan cs: candidateList){
// logger.info("cs ref "+cs.getElementRef());
@@ -99,9 +97,7 @@
while (hasMoreSpans && firstSpans.doc() == currentDoc &&
firstSpans.start() == currentPosition){
- short spanId = retrieveSpanId(firstSpans);
- //logger.info("ElementRef: "+elementRef);
- candidateList.add(new CandidateAttributeSpan(firstSpans,spanId));
+ candidateList.add(createCandidateSpan(firstSpans));
hasMoreSpans = firstSpans.next();
}
@@ -109,17 +105,16 @@
Collections.reverse(candidateList);
}
- /** Get the elementRef from payload
- * */
- private short retrieveSpanId(Spans firstSpans) throws IOException {
+ private CandidateAttributeSpan createCandidateSpan(Spans firstSpans) throws IOException {
List<byte[]> payload = (List<byte[]>) firstSpans.getPayload();
- long s = System.nanoTime();
ByteBuffer wrapper = ByteBuffer.wrap(payload.get(0));
- short num = wrapper.getShort();
- long e = System.nanoTime();
- if (DEBUG)
- logger.info("Bytebuffer runtime "+ (e-s));
- return num;
+
+ short spanId = wrapper.getShort(0);
+ int elementEnd = -1;
+ if (payload.get(0).length == 6){
+ elementEnd = wrapper.getInt(2);
+ }
+ return new CandidateAttributeSpan(firstSpans,spanId, elementEnd);
}
public short getSpanId(){
@@ -130,6 +125,14 @@
this.spanId = spanId;
}
+ public int getElementEnd() {
+ return elementEnd;
+ }
+
+ public void setElementEnd(int elementEnd) {
+ this.elementEnd = elementEnd;
+ }
+
public boolean isFinish() {
return isFinish;
}
@@ -163,11 +166,13 @@
implements Comparable<CandidateSpans>{
private short spanId;
+ private int elementEnd;
- public CandidateAttributeSpan(Spans span, short spanId)
+ public CandidateAttributeSpan(Spans span, short spanId, int elementEnd)
throws IOException {
super(span);
setSpanId(spanId);
+ setElementEnd(elementEnd);
}
public void setSpanId(short spanId) {
@@ -177,6 +182,14 @@
return spanId;
}
+ public int getElementEnd() {
+ return elementEnd;
+ }
+
+ public void setElementEnd(int elementEnd) {
+ this.elementEnd = elementEnd;
+ }
+
@Override
public int compareTo(CandidateSpans o) {
CandidateAttributeSpan cs = (CandidateAttributeSpan) o;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpans.java
index deac119..3a029ce 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpans.java
@@ -14,7 +14,7 @@
private long cost;
private Collection<byte[]> payloads = new ArrayList<>();
private int position;
- private CandidateSpans childSpan; // used for multiple distance with unordered constraint
+ private CandidateSpans childSpan; // used for example for multiple distance with unordered constraint
protected short spanId;
@@ -25,15 +25,6 @@
this.cost = span.cost();
if (span.isPayloadAvailable())
setPayloads(span.getPayload());
-
- /*if (span instanceof ElementSpans ){
- ElementSpans s = (ElementSpans) span;
- this.elementRef = s.getElementRef();
- }
- else if (span instanceof AttributeSpans){
- AttributeSpans s = (AttributeSpans) span;
- this.elementRef = s.getElementRef();
- } */
}
@Override
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpans.java
index 5e3fc1d..cff3100 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/DistanceExclusionSpans.java
@@ -21,9 +21,6 @@
private boolean isOrdered;
private boolean hasMoreSecondSpans;
- // This advices the java compiler to ignore all loggings
- public static final boolean DEBUG = false;
-
public DistanceExclusionSpans(SpanDistanceQuery query,
AtomicReaderContext context, Bits acceptDocs,
Map<Term, TermContext> termContexts, boolean isOrdered)
@@ -59,7 +56,7 @@
// skip the secondSpan to the right side of the firstspan
while (hasMoreSecondSpans && secondSpans.doc() == firstSpans.doc() &&
- firstSpans.start() >= secondSpans.end()){
+ firstSpans.start() >= secondSpans.end()){
// the firstspan is within maxDistance
if (!isOrdered && calculateActualDistance() <= maxDistance){
@@ -70,6 +67,11 @@
}
}
+ /** Calculate the distance / difference between a firstspan and
+ * a secondspan positions.
+ *
+ * @return distance
+ * */
private int calculateActualDistance(){
// right secondSpan
if (firstSpans.end() <= secondSpans.start())
@@ -78,6 +80,13 @@
return firstSpans.start() - secondSpans.end() +1;
}
+ /** Check the distance between the current first span and second span
+ * against the min and max distance constraints.
+ *
+ * @return true if the distance between the first and the second spans
+ * are smaller as the minimum distance or bigger than the max
+ * distance.
+ * */
private boolean findMatch() throws IOException {
if (!hasMoreSecondSpans || secondSpans.doc() > firstSpans.doc()){
setMatchProperties();
@@ -98,6 +107,9 @@
return false;
}
+ /** Set the current firstspan as the match
+ *
+ * */
private void setMatchProperties() throws IOException{
matchDocNumber = firstSpans.doc();
matchStartPosition = firstSpans.start();
@@ -107,13 +119,11 @@
matchPayload.addAll(firstSpans.getPayload());
setMatchFirstSpan(new CandidateSpans(firstSpans));
- //setMatchSecondSpan(new CandidateSpan(secondSpans));
- if (DEBUG)
- log.trace("doc# {}, start {}, end {}",
- matchDocNumber,
- matchStartPosition,
- matchEndPosition);
+// log.trace("doc# {}, start {}, end {}",
+// matchDocNumber,
+// matchStartPosition,
+// matchEndPosition);
}
@Override
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
index 88e5c3f..cdf5be1 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestAttributeIndex.java
@@ -33,13 +33,13 @@
fd.addString("ID", "doc-0");
fd.addTV("base",
"bcbabd",
- "[(0-1)s:a|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|<>:div#0-2$<i>2<s>2|@:class=header$<s>1|@:class=header$<s>2]" +
- "[(1-2)s:e|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<s>1]" +
- "[(2-3)s:e|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<s>1]" +
- "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=header$<s>1]" +
- "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|<>:a#4-5$<i>5<s>2|@:class=header$<s>2]" +
- "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>1|<>:div#5-6$<i>6<s>-1|@:class=header$<s>1]"+
- "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=header$<s>2]");
+ "[(0-1)s:a|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|<>:div#0-2$<i>2<s>2|@:class=header$<s>1<i>3|@:class=header$<s>2<i>2]" +
+ "[(1-2)s:e|_2#1-2|<>:a#1-2$<i>2<s>1|@:class=header$<s>1<i>2]" +
+ "[(2-3)s:e|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=time$<s>1<i>5]" +
+ "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=header$<s>1<i>5]" +
+ "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|<>:a#4-5$<i>5<s>2|@:class=header$<s>2<i>5]" +
+ "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>1|<>:div#5-6$<i>6<s>-1|@:class=header$<s>1<i>6]"+
+ "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1<i>7|@:class=header$<s>2<i>7]");
return fd;
}
@@ -49,13 +49,13 @@
fd.addString("ID", "doc-1");
fd.addTV("base",
"bcbabd",
- "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|@:class=header$<s>1|@:class=title$<s>1|@:class=book$<s>1]" +
- "[(1-2)s:c|_2#1-2|<>:div#1-2$<i>2<s>1|@:class=header$<s>1|@:class=title$<s>1]" +
- "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=book$<s>1]" +
- "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1]" +
- "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1|@:class=book$<s>1|@:class=title$<s>1]" +
- "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1]"+
- "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=title$<s>1]");
+ "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>-1|<>:div#0-3$<i>3<s>1|@:class=header$<s>1<i>3|@:class=title$<s>1<i>3|@:class=book$<s>1<i>3]" +
+ "[(1-2)s:c|_2#1-2|<>:div#1-2$<i>2<s>1|@:class=header$<s>1<i>2|@:class=title$<s>1<i>2]" +
+ "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|@:class=book$<s>1<i>5]" +
+ "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1<i>5]" +
+ "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1<i>5|@:class=book$<s>1<i>5|@:class=title$<s>1<i>5]" +
+ "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1<i>6]"+
+ "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1<i>7|@:class=title$<s>1<i>7]");
return fd;
}
@@ -65,13 +65,13 @@
fd.addString("ID", "doc-1");
fd.addTV("base",
"bcbabd",
- "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>1|<>:div#0-3$<i>3<s>2|@:class=header$<s>2|@:class=book$<s>1|@:class=book$<s>2]" +
- "[(1-2)s:e|_2#1-2|<>:div#1-2$<i>2<s>1|<>:a#1-2$<i>2<s>2|@:class=book$<s>2|@:class=header$<s>1]" +
- "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|<>:a#1-2$<i>2<s>2|@:class=header$<s>2|@:class=book$<s>1]" +
- "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1]" +
- "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1|@:class=book$<s>1|@:class=book$<s>1]" +
- "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1]"+
- "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1|@:class=book$<s>2]");
+ "[(0-1)s:b|_1#0-1|<>:s#0-5$<i>5<s>1|<>:div#0-3$<i>3<s>2|@:class=header$<s>2<i>3|@:class=book$<s>1<i>5|@:class=book$<s>2<i>3]" +
+ "[(1-2)s:e|_2#1-2|<>:div#1-2$<i>2<s>1|<>:a#1-2$<i>2<s>2|@:class=book$<s>2<i>2|@:class=header$<s>1<i>2]" +
+ "[(2-3)s:b|_3#2-3|<>:div#2-3$<i>5<s>1|<>:a#1-2$<i>2<s>2|@:class=header$<s>2<i>2|@:class=book$<s>1<i>5]" +
+ "[(3-4)s:a|_4#3-4|<>:div#3-5$<i>5<s>1|@:class=title$<s>1<i>5]" +
+ "[(4-5)s:b|_5#4-5|<>:div#4-5$<i>5<s>1|@:class=header$<s>1<i>5|@:class=book$<s>1<i>5|@:class=book$<s>1<i>5]" +
+ "[(5-6)s:d|_6#5-6|<>:s#5-6$<i>6<s>-1|<>:div#5-6$<i>6<s>1|@:class=header$<s>1<i>6]"+
+ "[(6-7)s:d|_7#6-7|<>:s#6-7$<i>7<s>2|<>:div#6-7$<i>7<s>1|@:class=header$<s>1<i>7|@:class=book$<s>2<i>7]");
return fd;
}
@@ -92,6 +92,7 @@
List<SpanQuery> sql = new ArrayList<>();
sql.add(saq);
+ // div with @class=header
SpanQuery sq = new SpanWithAttributeQuery(
new SpanElementQuery("base", "div"),
sql, true);
@@ -116,7 +117,7 @@
public void testCase2() throws IOException{
ki.addDoc(createFieldDoc1());
ki.commit();
-
+ // header and title
List<SpanQuery> sql = new ArrayList<>();
sql.add(new SpanAttributeQuery(
new SpanTermQuery(new Term("base","@:class=header")),true)
@@ -130,7 +131,7 @@
sql, true);
kr = ki.search(sq, (short) 10);
-
+
assertEquals(4, kr.getTotalResults());
assertEquals(0,kr.getMatch(0).getStartPos());
assertEquals(3,kr.getMatch(0).getEndPos());
@@ -141,7 +142,8 @@
assertEquals(6,kr.getMatch(3).getStartPos());
assertEquals(7,kr.getMatch(3).getEndPos());
- // Add not Attribute
+ // Add not Attribute
+ // header and title, not book
sql.add(new SpanAttributeQuery(
new SpanTermQuery(new Term("base","@:class=book")),true,true)
);
@@ -159,6 +161,7 @@
assertEquals(7,kr.getMatch(1).getEndPos());
// Test multiple negations
+ // header, not title, not book
sql.remove(1);
sql.add(new SpanAttributeQuery(
new SpanTermQuery(new Term("base","@:class=title")),true,true)
@@ -203,7 +206,7 @@
assertEquals(7,kr.getMatch(2).getEndPos());
}
- /** Test SkipTo Doc */
+ /** Test skipto doc for spanWithAttribute*/
@Test
public void testCase4() throws IOException{
ki.addDoc(createFieldDoc1());
@@ -222,6 +225,9 @@
new SpanElementQuery("base", "div"),
sql, true);
+ kr = ki.search(sq, (short) 10);
+ assertEquals(6,kr.getTotalResults());
+
SpanNextQuery snq = new SpanNextQuery(
new SpanTermQuery(new Term("base", "s:e"))
,sq);
@@ -233,4 +239,29 @@
assertEquals(1,kr.getMatch(0).getStartPos());
assertEquals(5,kr.getMatch(0).getEndPos());
}
+
+ /** Arbitrary elements with a specific attribute
+ * This is just spanAttribute query, to get the elementEnd,
+ * you have to use getElementEnd(). Alternatives (unimplemented):
+ * 1) store in payload?
+ * 2) wrap as a span
+ * */
+ @Test
+ public void testCase5() throws IOException{
+ ki.addDoc(createFieldDoc1());
+ ki.commit();
+ SpanAttributeQuery saq = new SpanAttributeQuery(
+ new SpanTermQuery(new Term("base","@:class=book")),
+ true);
+ kr = ki.search(saq, (short) 10);
+ assertEquals(3, kr.getTotalResults());
+
+ /*for (int i=0; i< kr.getTotalResults(); i++){
+ System.out.println(
+ kr.match(i).getLocalDocID()+" "+
+ kr.match(i).startPos + " " +
+ kr.match(i).endPos
+ );
+ }*/
+ }
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
index 8505452..8d480c0 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestDistanceExclusionIndex.java
@@ -14,6 +14,7 @@
import de.ids_mannheim.korap.query.DistanceConstraint;
import de.ids_mannheim.korap.query.SpanDistanceQuery;
import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanNextQuery;
public class TestDistanceExclusionIndex {
@@ -28,7 +29,7 @@
ki.addDoc(createFieldDoc0());
ki.commit();
SpanQuery sq;
- // ---- Distance 0 to 1
+ //ordered distance 0 to 1
sq = createQuery("s:c","s:e",0,1,true);
kr = ki.search(sq, (short) 10);
assertEquals(3, kr.getTotalResults());
@@ -116,8 +117,35 @@
assertEquals(1, kr.getTotalResults());
assertEquals(9, kr.match(0).getStartPos());
assertEquals(10, kr.match(0).getEndPos());
+ }
+
+ // Add skipTo test
+ @Test
+ public void testCase6() throws IOException{
+ ki = new KorapIndex();
+ ki.addDoc(createFieldDoc1());
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
-/* System.out.print(kr.getTotalResults()+"\n");
+ SpanQuery sq;
+ //ordered distance 0 to 1
+ sq = createQuery("s:d","s:b",0,1,true);
+ kr = ki.search(sq, (short) 10);
+ assertEquals(4, kr.getTotalResults());
+
+ SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
+ kr = ki.search(stq, (short) 10);
+ assertEquals(6, kr.getTotalResults());
+
+ SpanNextQuery snq = new SpanNextQuery(stq,sq);
+ kr = ki.search(snq, (short) 10);
+ assertEquals(2, kr.getTotalResults());
+ assertEquals(3, kr.match(0).getStartPos());
+ assertEquals(5, kr.match(0).getEndPos());
+ assertEquals(8, kr.match(1).getStartPos());
+ assertEquals(10, kr.match(1).getEndPos());
+
+ /*System.out.print(kr.getTotalResults()+"\n");
for (int i=0; i< kr.getTotalResults(); i++){
System.out.println(
kr.match(i).getLocalDocID()+" "+
@@ -125,7 +153,7 @@
kr.match(i).endPos
);
}*/
- }
+ }
private SpanQuery createQuery(String x, String y, int min, int max, boolean isOrdered){
SpanDistanceQuery sq = new SpanDistanceQuery(
diff --git a/src/test/java/de/ids_mannheim/korap/response/TestMessage.java b/src/test/java/de/ids_mannheim/korap/response/TestMessage.java
index c8876ad..6ecdf73 100644
--- a/src/test/java/de/ids_mannheim/korap/response/TestMessage.java
+++ b/src/test/java/de/ids_mannheim/korap/response/TestMessage.java
@@ -1,4 +1,5 @@
-package de.ids_mannheim.korap.util;
+package de.ids_mannheim.korap.response;
+
import de.ids_mannheim.korap.response.Messages;
import static org.junit.Assert.*;
diff --git a/src/test/java/de/ids_mannheim/korap/response/TestNotifications.java b/src/test/java/de/ids_mannheim/korap/response/TestNotifications.java
index e2982e1..f92af6e 100644
--- a/src/test/java/de/ids_mannheim/korap/response/TestNotifications.java
+++ b/src/test/java/de/ids_mannheim/korap/response/TestNotifications.java
@@ -1,4 +1,4 @@
-package de.ids_mannheim.korap.util;
+package de.ids_mannheim.korap.response;
import java.io.*;
diff --git a/src/test/java/de/ids_mannheim/korap/response/TestResponse.java b/src/test/java/de/ids_mannheim/korap/response/TestResponse.java
index d7862ae..57babbb 100644
--- a/src/test/java/de/ids_mannheim/korap/response/TestResponse.java
+++ b/src/test/java/de/ids_mannheim/korap/response/TestResponse.java
@@ -1,4 +1,4 @@
-package de.ids_mannheim.korap.util;
+package de.ids_mannheim.korap.response;
import java.io.*;