Fixed serialization of t-distance
Change-Id: I411054200c13fa930e1cee795fb8e10e29f1a20e
diff --git a/Changes b/Changes
index 3f45c13..ef06563 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.55.6 2016-06-03
+ - [bugfix] distance with key "t" uses default foundry (diewald)
+
0.55.5 2016-05-02
- [performance] Changed to a dynamic window for sorting in FocusSpans (margaretha)
- [bugfix] store skipped spans in Repetitionspans as candidates
diff --git a/pom.xml b/pom.xml
index 60dd27b..4b2b844 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
<groupId>de.ids_mannheim.korap</groupId>
<artifactId>Krill</artifactId>
- <version>0.55.5</version>
+ <version>0.55.6</version>
<packaging>jar</packaging>
<name>Krill</name>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillQuery.java b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
index af1db1d..b5ea08d 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillQuery.java
@@ -914,7 +914,8 @@
}
// Use default foundry and layer - currently only base is supported!
- else if (unit.equals("s") || unit.equals("p")) {
+ else if (unit.equals("s") || unit.equals("p")
+ || unit.equals("t")) {
StringBuilder value = new StringBuilder();
unit = value.append("base/s:").append(unit).toString();
};
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
index 5f18ff1..db51707 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanFocusQuery.java
@@ -39,6 +39,7 @@
private boolean removeTemporaryClasses = false;
private int windowSize = 10;
+
/**
* Construct a new SpanFocusQuery.
*
@@ -60,6 +61,7 @@
this.classNumbers = classNumbers;
};
+
/**
* Construct a new SpanFocusQuery. The class to focus on defaults
* to
@@ -209,12 +211,12 @@
}
- public int getWindowSize() {
+ public int getWindowSize () {
return windowSize;
}
- public void setWindowSize(int windowSize) {
+ public void setWindowSize (int windowSize) {
this.windowSize = windowSize;
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java b/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
index c547edc..9ec9632 100644
--- a/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
+++ b/src/main/java/de/ids_mannheim/korap/query/SpanSubspanQuery.java
@@ -140,11 +140,12 @@
}
- public int getWindowSize() {
+ public int getWindowSize () {
return windowSize;
}
- public void setWindowSize(int windowSize) {
+
+ public void setWindowSize (int windowSize) {
this.windowSize = windowSize;
}
}
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java
index 0ba3c29..c351891 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/CandidateSpanComparator.java
@@ -5,10 +5,11 @@
public class CandidateSpanComparator implements Comparator<CandidateSpan> {
@Override
- public int compare(CandidateSpan o1, CandidateSpan o2) {
+ public int compare (CandidateSpan o1, CandidateSpan o2) {
if (o1.doc == o2.doc) {
if (o1.getStart() == o2.getStart()) {
- if (o1.getEnd() == o2.getEnd()) return 0;
+ if (o1.getEnd() == o2.getEnd())
+ return 0;
if (o1.getEnd() > o2.getEnd())
return 1;
else
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
index 202d3d4..e566637 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/FocusSpans.java
@@ -61,6 +61,7 @@
private PriorityQueue<CandidateSpan> candidates;
private CandidateSpanComparator comparator;
+
/**
* Construct a FocusSpan for the given {@link SpanQuery}.
*
@@ -139,8 +140,8 @@
if (firstSpans.isPayloadAvailable()
&& updateSpanPositions(cs = new CandidateSpan(firstSpans))) {
if (cs.getDoc() == prevDoc && cs.getStart() < prevStart) {
- log.warn("Span (" + cs.getStart() + ", "
- + cs.getEnd() + ") is out of order and skipped.");
+ log.warn("Span (" + cs.getStart() + ", " + cs.getEnd()
+ + ") is out of order and skipped.");
}
else {
candidates.add(cs);
@@ -150,6 +151,7 @@
}
}
+
private void setMatch (CandidateSpan cs) {
matchStartPosition = cs.getStart();
prevStart = matchStartPosition;
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
index 52b657e..57f8362 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/SubSpans.java
@@ -43,6 +43,7 @@
private PriorityQueue<CandidateSpan> candidates;
private CandidateSpanComparator comparator;
+
/**
* Constructs SubSpans for the given {@link SpanSubspanQuery}
* specifiying the start offset and the length of the subspans.
@@ -65,7 +66,8 @@
candidates = new PriorityQueue<>(windowSize, comparator);
if (DEBUG) {
- log.trace("Init SubSpan at {} with length {}", this.startOffset, this.length);
+ log.trace("Init SubSpan at {} with length {}", this.startOffset,
+ this.length);
};
hasMoreSpans = firstSpans.next();
}
@@ -109,7 +111,8 @@
return false;
}
- private void collectCandidates() throws IOException {
+
+ private void collectCandidates () throws IOException {
while (hasMoreSpans && candidates.size() < windowSize
&& firstSpans.doc() == currentDoc) {
@@ -127,12 +130,13 @@
}
}
+
/**
* Sets the properties of the current match/subspan.
*
* @throws IOException
*/
- public boolean findMatch(CandidateSpan cs) throws IOException {
+ public boolean findMatch (CandidateSpan cs) throws IOException {
// Check at span ending
if (this.startOffset < 0) {
@@ -174,16 +178,16 @@
cs.setDoc(firstSpans.doc());
if (DEBUG) {
- log.trace("Start at absolute position {} " +
- "and end at absolute position {}",
- cs.getStart(),
+ log.trace("Start at absolute position {} "
+ + "and end at absolute position {}", cs.getStart(),
cs.getEnd());
};
return true;
}
- private void setMatch(CandidateSpan cs) {
+
+ private void setMatch (CandidateSpan cs) {
matchStartPosition = cs.getStart();
prevStart = matchStartPosition;
matchEndPosition = cs.getEnd();
@@ -193,6 +197,7 @@
matchPayload.addAll(cs.getPayloads());
}
+
@Override
public boolean skipTo (int target) throws IOException {
if (candidates.size() > 0) {
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
index 31a0e12..9234716 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/WithinSpans.java
@@ -238,7 +238,8 @@
if (this.embeddedDoc != this.wrapDoc) {
if (DEBUG) {
- log.trace("(A) Embedded span is in a new document {}",
+ log.trace(
+ "(A) Embedded span is in a new document {}",
_currentEmbedded().toString());
log.trace("Reset current embedded doc");
};
@@ -268,7 +269,8 @@
if (this.embeddedDoc != this.wrapDoc) {
if (DEBUG) {
- log.trace("(B) Embedded span is in a new document {}",
+ log.trace(
+ "(B) Embedded span is in a new document {}",
_currentEmbedded().toString());
log.trace("Reset current embedded doc");
};
@@ -706,7 +708,7 @@
this.storeEmbedded();
this.nextSpanA();
-
+
if (DEBUG)
_logCurrentCase((byte) 15);
return false;
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
index 81fcad7..37d63db 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestElementDistanceIndex.java
@@ -3,10 +3,8 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.util.*;
+import java.io.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanQuery;
@@ -36,7 +34,7 @@
private FieldDocument createFieldDoc0 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-0");
- fd.addTV("base", "text",
+ fd.addTV("tokens", "text",
"[(0-1)s:b|s:c|_1$<i>0<i>1|<>:s$<b>64<i>0<i>1<i>1<b>0]"
+ "[(1-2)s:b|_2$<i>1<i>2]"
+ "[(2-3)s:c|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>3<b>0]"
@@ -51,9 +49,9 @@
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-1");
fd.addTV(
- "base",
- "text",
- "[(0-1)s:e|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>1<b>0]"
+ "tokens",
+ "ecebdc",
+ "[(0-1)s:e|_1$<i>0<i>1|<>:base/s:t$<b>64<i>0<i>6<i>0<i>6<b>0|<>:s$<b>64<i>0<i>2<i>1<b>0]"
+ "[(1-2)s:c|s:b|_2$<i>1<i>2|<>:s$<b>64<i>1<i>2<i>2<b>0]"
+ "[(2-3)s:e|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>3<b>0]"
+ "[(3-4)s:b|_4$<i>3<i>4|<>:s$<b>64<i>3<i>4<i>4<b>0]"
@@ -66,7 +64,7 @@
private FieldDocument createFieldDoc2 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-2");
- fd.addTV("base", "text",
+ fd.addTV("tokens", "text",
"[(0-1)s:b|_1$<i>0<i>1|<>:p$<b>64<i>0<i>2<i>1<b>0]"
+ "[(1-2)s:b|_2$<i>1<i>2]"
+ "[(2-3)s:b|_3$<i>2<i>3|<>:p$<b>64<i>2<i>3<i>3<b>0]"
@@ -80,7 +78,7 @@
private FieldDocument createFieldDoc3 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-3");
- fd.addTV("base", "text",
+ fd.addTV("tokens", "text",
"[(0-1)s:b|_1$<i>0<i>1|<>:s$<b>64<i>0<i>2<i>1<b>0]"
+ "[(1-2)s:d|_2$<i>1<i>2]"
+ "[(2-3)s:b|_3$<i>2<i>3|<>:s$<b>64<i>2<i>3<i>3<b>0]"
@@ -94,10 +92,10 @@
public SpanQuery createQuery (String elementType, String x, String y,
int min, int max, boolean isOrdered) {
- SpanElementQuery e = new SpanElementQuery("base", elementType);
- return new SpanDistanceQuery(new SpanTermQuery(new Term("base", x)),
- new SpanTermQuery(new Term("base", y)), new DistanceConstraint(
- e, min, max, isOrdered, false), true);
+ SpanElementQuery e = new SpanElementQuery("tokens", elementType);
+ return new SpanDistanceQuery(new SpanTermQuery(new Term("tokens", x)),
+ new SpanTermQuery(new Term("tokens", y)),
+ new DistanceConstraint(e, min, max, isOrdered, false), true);
}
@@ -165,7 +163,8 @@
SpanQuery sq, edq;
edq = createQuery("s", "s:b", "s:c", 1, 1, true);
- sq = new SpanNextQuery(edq, new SpanTermQuery(new Term("base", "s:d")));
+ sq = new SpanNextQuery(edq,
+ new SpanTermQuery(new Term("tokens", "s:d")));
kr = ki.search(sq, (short) 10);
@@ -206,43 +205,76 @@
ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
ki.commit();
- InputStream is = getClass()
- .getResourceAsStream("/queries/cosmas1.json");
- BufferedReader bufferedReader = new BufferedReader(
- new InputStreamReader(is, "UTF-8"));
- StringBuilder sb = new StringBuilder();
- String json;
- while ((json = bufferedReader.readLine()) != null) {
- sb.append(json);
- }
- json = sb.toString();
-
- SpanQueryWrapper sqwi;
- try {
- sqwi = new KrillQuery("tokens").fromJson(json);
- }
- catch (QueryException e) {
- fail(e.getMessage());
- sqwi = new QueryBuilder("tokens").seg("???");
- };
-
-
- SpanQuery sq;
- sq = sqwi.toQuery();
-
- kr = ki.search(sq, (short) 10);
+ SpanQueryWrapper sqwi = jsonQuery(getClass().getResource(
+ "/queries/cosmas1.json").getFile());
+ kr = ki.search(sqwi.toQuery(), (short) 10);
assertEquals((long) 3, kr.getTotalResults());
assertEquals(14, kr.getMatch(0).startPos);
assertEquals(19, kr.getMatch(0).endPos);
assertEquals(30, kr.getMatch(1).startPos);
assertEquals(33, kr.getMatch(1).endPos);
-
- /* for (Match km : kr.getMatches()){
- System.out.println(km.getStartPos() +","+km.getEndPos()+" "
- +km.getSnippetBrackets());
- }*/
}
-}
+ /** Test query from json (2) */
+ @Test
+ public void testCase6 () throws Exception {
+ ki = new KrillIndex();
+ ki.addDoc(createFieldDoc0());
+ ki.addDoc(createFieldDoc1());
+ ki.addDoc(createFieldDoc2());
+ ki.commit();
+
+ SpanQueryWrapper sqwi;
+ sqwi = new QueryBuilder("tokens").tag("base/s:t");
+
+ kr = ki.search(sqwi.toQuery(), (short) 10);
+ assertEquals(kr.getTotalResults(), 1);
+ assertEquals("[ecebdc]", kr.getMatch(0).getSnippetBrackets());
+
+ sqwi = jsonQuery(getClass().getResource(
+ "/queries/distances/in-same-t.jsonld").getFile());
+
+ assertEquals(
+ "spanElementDistance(tokens:s:c, tokens:s:e, [(base/s:t[0:0], ordered, notExcluded)])",
+ sqwi.toQuery().toString());
+
+ /*
+ kr = ki.search(sqwi.toQuery(), (short) 10);
+ assertEquals(1, kr.getTotalResults()); // Is 1 correct or should it not be ordered?
+ */
+ };
+
+
+ public static String getString (String path) {
+ StringBuilder contentBuilder = new StringBuilder();
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(path));
+ String str;
+ while ((str = in.readLine()) != null) {
+ contentBuilder.append(str);
+ };
+ in.close();
+ }
+ catch (IOException e) {
+ fail(e.getMessage());
+ }
+ return contentBuilder.toString();
+ };
+
+
+ public static SpanQueryWrapper jsonQuery (String jsonFile) {
+ SpanQueryWrapper sqwi;
+
+ try {
+ String json = getString(jsonFile);
+ sqwi = new KrillQuery("tokens").fromJson(json);
+ }
+ catch (QueryException e) {
+ fail(e.getMessage());
+ sqwi = new QueryBuilder("tokens").seg("???");
+ };
+ return sqwi;
+ };
+};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java
index c08dcae..b835df1 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java
@@ -20,15 +20,17 @@
private KrillIndex ki;
private Result kr;
+
public TestFocusIndex () throws IOException {
ki = new KrillIndex();
}
+
/**
* Check Skipto focus spans
* */
@Test
- public void testCase12() throws IOException {
+ public void testCase12 () throws IOException {
ki.addDoc(TestRelationIndex.createFieldDoc0());
ki.addDoc(TestRelationIndex.createFieldDoc1());
ki.commit();
@@ -43,12 +45,12 @@
kr = ki.search(snq, (short) 20);
- assertEquals(0, kr.getMatch(0).getStartPos());
- assertEquals(2, kr.getMatch(0).getEndPos());
- assertEquals(5, kr.getMatch(1).getStartPos());
- assertEquals(9, kr.getMatch(1).getEndPos());
- // for (Match m : kr.getMatches()) {
- // System.out.println(m.getStartPos() + " " + m.getEndPos());
- // }
+ assertEquals(0, kr.getMatch(0).getStartPos());
+ assertEquals(2, kr.getMatch(0).getEndPos());
+ assertEquals(5, kr.getMatch(1).getStartPos());
+ assertEquals(9, kr.getMatch(1).getEndPos());
+ // for (Match m : kr.getMatches()) {
+ // System.out.println(m.getStartPos() + " " + m.getEndPos());
+ // }
}
}
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
index 8bc7a9f..a1d615d 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRelationIndex.java
@@ -72,7 +72,7 @@
}
- public static FieldDocument createFieldDoc0() {
+ public static FieldDocument createFieldDoc0 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-0");
fd.addTV(
@@ -97,7 +97,7 @@
}
- public static FieldDocument createFieldDoc1() {
+ public static FieldDocument createFieldDoc1 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-1");
fd.addTV(
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
index d0af112..c415ca1 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSubSpanIndex.java
@@ -27,12 +27,14 @@
Result kr;
KrillIndex ki;
+
public TestSubSpanIndex () throws IOException {
ki = new KrillIndex();
ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
ki.commit();
}
+
@Test
public void testCase1 () throws IOException {
SpanDistanceQuery sdq = new SpanDistanceQuery(new SpanTermQuery(
@@ -124,22 +126,23 @@
// }
}
+
// Negative SubSpanQuery
@Test
public void testCaseNegativeSubSpan () throws IOException {
KrillIndex ki = new KrillIndex();
FieldDocument fd = new FieldDocument();
- fd.addTV(
- "base",
+ fd.addTV("base",
// <x>a <x>b </x>c </x>
"a b c ",
- "[(0-1)s:a|i:a|_0$<i>0<i>2|<>:x$<b>64<i>0<i>6<i>3<b>0]" +
- "[(1-2)s:b|i:b|_1$<i>2<i>4|<>:x$<b>64<i>2<i>4<i>2<b>1]" +
- "[(3-4)s:c|i:c|_2$<i>4<i>6]");
+ "[(0-1)s:a|i:a|_0$<i>0<i>2|<>:x$<b>64<i>0<i>6<i>3<b>0]"
+ + "[(1-2)s:b|i:b|_1$<i>2<i>4|<>:x$<b>64<i>2<i>4<i>2<b>1]"
+ + "[(3-4)s:c|i:c|_2$<i>4<i>6]");
ki.addDoc(fd);
ki.commit();
- SpanSubspanQuery ssq = new SpanSubspanQuery(new SpanElementQuery("base", "x"), -1, 1, true);
+ SpanSubspanQuery ssq = new SpanSubspanQuery(new SpanElementQuery(
+ "base", "x"), -1, 1, true);
kr = ki.search(ssq, (short) 10);
/*
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
index cba0c3a..8ac224f 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestWithinIndex.java
@@ -1097,8 +1097,8 @@
assertEquals(1, kr.getTotalResults());
*/
sqw = qb.startswith(qb.tag("e"), qb.seg("i:h"));
- assertEquals("spanStartsWith(<base:e />, base:i:h)",
- sqw.toQuery().toString());
+ assertEquals("spanStartsWith(<base:e />, base:i:h)", sqw.toQuery()
+ .toString());
kr = ki.search(sqw.toQuery(), (short) 10);
assertEquals(2, kr.getTotalResults());
};
diff --git a/src/test/resources/queries/distances/in-same-t.jsonld b/src/test/resources/queries/distances/in-same-t.jsonld
new file mode 100644
index 0000000..439d543
--- /dev/null
+++ b/src/test/resources/queries/distances/in-same-t.jsonld
@@ -0,0 +1,40 @@
+{
+ "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
+ "query": {
+ "operands": [
+ {
+ "wrap": {
+ "layer": "orth",
+ "match": "match:eq",
+ "key": "c",
+ "@type": "koral:term"
+ },
+ "@type": "koral:token"
+ },
+ {
+ "wrap": {
+ "layer": "orth",
+ "match": "match:eq",
+ "key": "e",
+ "@type": "koral:term"
+ },
+ "@type": "koral:token"
+ }
+ ],
+ "operation": "operation:sequence",
+ "distances": [
+ {
+ "min": 0,
+ "boundary": {
+ "min": 0,
+ "max": 0,
+ "@type": "koral:boundary"
+ },
+ "max": 0,
+ "key": "t",
+ "@type": "cosmas:distance"
+ }
+ ],
+ "@type": "koral:group"
+ }
+}