Fixed the candidate list in NextSpans.
Change-Id: Ia2a8a65b580071527d53409310a374b6e78f6721
diff --git a/Changes b/Changes
index e6c2849..61d9b4c 100644
--- a/Changes
+++ b/Changes
@@ -8,7 +8,9 @@
- [bugfix] Remove entries from matchList that are not in the same
document in NextSpans (diewald)
- [bugfix] Fixed skipTo in NextSpans, see de.ids_mannheim.korap.index.
- TestRepetitionIndex.testRepetitionSnippetBug3() (margaretha)
+ TestRepetitionIndex.testRepetitionSnippetBug3() (margaretha)
+ - [bugfix] Fixed the candidate list in NextSpans, see de.ids_mannheim.
+ korap.index.TestNextIndex.testNextExpansion() (margaretha)
0.58.0 2018-09-03
- [feature] Implemented referencing cached collection (margaretha)
diff --git a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
index 2ea1377..fb5b254 100644
--- a/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
+++ b/src/main/java/de/ids_mannheim/korap/query/spans/NextSpans.java
@@ -178,7 +178,7 @@
if (cs.getStart() == firstSpans.end()) {
addMatch(cs);
}
- else {
+ else if (cs.getEnd() < firstSpans.end()){
i.remove();
}
}
diff --git a/src/test/java/de/ids_mannheim/korap/TestSimple.java b/src/test/java/de/ids_mannheim/korap/TestSimple.java
index 1d7cb99..64ce39f 100644
--- a/src/test/java/de/ids_mannheim/korap/TestSimple.java
+++ b/src/test/java/de/ids_mannheim/korap/TestSimple.java
@@ -1,28 +1,43 @@
package de.ids_mannheim.korap;
-import java.util.*;
-import java.io.*;
+import static de.ids_mannheim.korap.util.KrillByte.byte2int;
+import static org.junit.Assert.fail;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
import java.net.URLDecoder;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
-import static org.junit.Assert.*;
-
-import de.ids_mannheim.korap.KrillQuery;
-import de.ids_mannheim.korap.query.QueryBuilder;
-import de.ids_mannheim.korap.index.*;
-import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
-import de.ids_mannheim.korap.util.QueryException;
-import de.ids_mannheim.korap.util.CorpusDataException;
-
-import static de.ids_mannheim.korap.util.KrillByte.*;
-
-import org.apache.lucene.index.*;
-import org.apache.lucene.document.*;
-import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import de.ids_mannheim.korap.index.FieldDocument;
+import de.ids_mannheim.korap.index.MultiTermToken;
+import de.ids_mannheim.korap.index.MultiTermTokenStream;
+import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
+import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.CorpusDataException;
+import de.ids_mannheim.korap.util.QueryException;
+
/**
* Helper class for testing the KrillIndex framework (Simple).
*
@@ -60,9 +75,13 @@
w.addDocument(doc);
};
- // Add document
public static FieldDocument simpleFieldDoc (String s) {
- String[] characters = s.split("");
+ return simpleFieldDoc(s, "");
+ }
+
+ // Add document
+ public static FieldDocument simpleFieldDoc (String s, String delimiter) {
+ String[] characters = s.split(delimiter);
FieldDocument fd = new FieldDocument();
String surface = "";
@@ -80,11 +99,12 @@
return fd;
};
+ // Create a new FieldDocument with random data
public static FieldDocument simpleFuzzyFieldDoc (List<String> chars, int minLength, int maxLength) {
String surface = "";
for (int i = 0; i < (int)(Math.random() * (maxLength - minLength)) + minLength; i++) {
- String randomChar = chars.get((int)(Math.random() * 6));
+ String randomChar = chars.get((int)(Math.random() * chars.size()));
surface += randomChar;
};
return simpleFieldDoc(surface);
@@ -189,4 +209,56 @@
};
return spanArray;
};
+
+
+ // Simple fuzzing test
+ public static void fuzzingTest (List<String> chars, Pattern resultPattern,
+ SpanQuery sq, int minTextLength, int maxTextLength, int maxDocs)
+ throws IOException, QueryException {
+
+ Krill ks = new Krill(sq);
+ String lastFailureConf = "";
+
+ // Multiple runs of corpus creation and query checks
+ for (int x = 0; x < 100000; x++) {
+ KrillIndex ki = new KrillIndex();
+ ArrayList<String> list = new ArrayList<String>();
+ int c = 0;
+
+ // Create a corpus of <= maxDocs fuzzy docs
+ for (int i = 0; i < (int) (Math.random() * maxDocs); i++) {
+ FieldDocument testDoc = simpleFuzzyFieldDoc(chars,
+ minTextLength, maxTextLength);
+ String testString = testDoc.doc.getField("base").stringValue();
+ Matcher m = resultPattern.matcher(testString);
+ list.add(testString);
+ int offset = 0;
+ while (m.find(offset)) {
+ c++;
+ offset = Math.max(0, m.start() + 1);
+ }
+ ki.addDoc(testDoc);
+ };
+
+ ki.commit();
+ Result kr = ks.apply(ki);
+
+ // Check if the regex-calculated matches are correct,
+ // otherwise
+ // spit out the corpus configurations
+ if (c != kr.getTotalResults()) {
+ String failureConf = "expected:" + c + ", actual:"
+ + kr.getTotalResults() + ", docs:" + list.toString();
+
+ // Try to keep the failing configuration small
+ if (lastFailureConf.length() == 0
+ || failureConf.length() < lastFailureConf.length()) {
+ System.err.println(failureConf);
+ lastFailureConf = failureConf;
+ minTextLength--;
+ maxDocs--;
+ };
+ };
+ };
+ };
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
index 7407ed3..93ee0ab 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestNextIndex.java
@@ -1,5 +1,6 @@
package de.ids_mannheim.korap.index;
+import static de.ids_mannheim.korap.TestSimple.simpleFieldDoc;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
@@ -15,10 +16,10 @@
import de.ids_mannheim.korap.KrillIndex;
import de.ids_mannheim.korap.query.SpanClassQuery;
import de.ids_mannheim.korap.query.SpanElementQuery;
+import de.ids_mannheim.korap.query.SpanExpansionQuery;
import de.ids_mannheim.korap.query.SpanFocusQuery;
import de.ids_mannheim.korap.query.SpanNextQuery;
import de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper;
-import de.ids_mannheim.korap.response.Match;
import de.ids_mannheim.korap.response.Result;
@RunWith(JUnit4.class)
@@ -27,6 +28,45 @@
// Todo: primary data as a non-indexed field separated.
@Test
+ public void testNextExpansion () throws IOException {
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(simpleFieldDoc("ccecc"));
+ ki.commit();
+
+ SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, 0, true);
+ Result kr = ki.search(seq, (short) 20);
+// assertEquals(8, kr.getTotalResults());
+ assertEquals(12, kr.getTotalResults());
+
+ SpanNextQuery snq = new SpanNextQuery(seq, stq);
+ kr = ki.search(snq, (short) 10);
+
+ // cc ccec cec cecc cc
+ // 1-3 1-5 2-5 2-6 4-6
+ assertEquals(5, kr.getTotalResults());
+ }
+
+ @Test
+ public void testNextExpansion2 () throws IOException {
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(simpleFieldDoc("cccc"));
+ ki.commit();
+
+ SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, 0, true);
+// Result kr = ki.search(seq, (short) 20);
+// assertEquals(12, kr.getTotalResults());
+
+ SpanNextQuery snq = new SpanNextQuery(seq, stq);
+ Result kr = ki.search(snq, (short) 10);
+
+ // cc ccc cccc cc ccc cc
+ // 1-3 1-4 1-5 2-4 2-5 3-5
+ assertEquals(6, kr.getTotalResults());
+ }
+
+ @Test
public void indexExample1 () throws IOException {
KrillIndex ki = new KrillIndex();
@@ -71,7 +111,6 @@
assertEquals(1, ki.numberOf("base", "documents"));
assertEquals(10, ki.numberOf("base", "t"));
-
sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")),
new SpanNextQuery(new SpanTermQuery(new Term("base", "s:b")),
new SpanTermQuery(new Term("base", "s:c"))));
@@ -89,7 +128,6 @@
};
-
@Test
public void indexExample2 () throws IOException {
KrillIndex ki = new KrillIndex();
@@ -118,7 +156,6 @@
};
-
@Test
public void indexExample3 () throws IOException {
KrillIndex ki = new KrillIndex();
@@ -146,7 +183,6 @@
assertEquals("abc[[abcab]]ac", kr.getMatch(0).getSnippetBrackets());
};
-
@Test
public void indexExample4 () throws IOException {
KrillIndex ki = new KrillIndex();
@@ -203,7 +239,6 @@
assertEquals("xb[[zxbzx]]bxz", kr.getMatch(0).getSnippetBrackets());
};
-
/**
* Multiple atomic indices
* Skip to a greater doc#
@@ -217,9 +252,9 @@
ki.addDoc(createFieldDoc3());
ki.commit();
- SpanQuery sq = new SpanNextQuery(
- new SpanTermQuery(new Term("base", "s:d")),
- new SpanTermQuery(new Term("base", "s:b")));
+ SpanQuery sq =
+ new SpanNextQuery(new SpanTermQuery(new Term("base", "s:d")),
+ new SpanTermQuery(new Term("base", "s:b")));
Result kr = ki.search(sq, (short) 10);
assertEquals("totalResults", kr.getTotalResults(), 2);
@@ -242,7 +277,6 @@
assertEquals("EndPos", 4, kr.getMatch(0).endPos);
}
-
/** Skip to NextSpan */
@Test
public void indexExample6 () throws IOException {
@@ -252,10 +286,11 @@
ki.addDoc(createFieldDoc3());
ki.commit();
- SpanQuery sq = new SpanNextQuery(
- new SpanTermQuery(new Term("base", "s:c")),
- new SpanNextQuery(new SpanTermQuery(new Term("base", "s:d")),
- new SpanTermQuery(new Term("base", "s:b"))));
+ SpanQuery sq =
+ new SpanNextQuery(new SpanTermQuery(new Term("base", "s:c")),
+ new SpanNextQuery(
+ new SpanTermQuery(new Term("base", "s:d")),
+ new SpanTermQuery(new Term("base", "s:b"))));
Result kr = ki.search(sq, (short) 10);
assertEquals("totalResults", kr.getTotalResults(), 1);
@@ -286,7 +321,6 @@
// }
}
-
@Test
public void indexExample7Distances () throws Exception {
KrillIndex ki = new KrillIndex();
@@ -307,7 +341,6 @@
assertEquals("doc-number", "match-doc-3-p2-5", kr.getMatch(2).getID());
};
-
@Test
public void indexExample8Distances () throws Exception {
KrillIndex ki = new KrillIndex();
@@ -328,7 +361,6 @@
assertEquals("doc-number", "match-doc-3-p3-6", kr.getMatch(2).getID());
};
-
@Test
public void indexExample9 () throws IOException {
KrillIndex ki = new KrillIndex();
@@ -348,14 +380,13 @@
assertEquals(5, kr.getMatch(1).getEndPos());
};
-
- @Test
- public void sequenceSkipBug () throws IOException {
- KrillIndex ki = new KrillIndex();
+ @Test
+ public void sequenceSkipBug () throws IOException {
+ KrillIndex ki = new KrillIndex();
ki.addDoc(createFieldDoc1());
- ki.addDoc(createFieldDoc3());
- ki.addDoc(createFieldDoc4());
+ ki.addDoc(createFieldDoc3());
+ ki.addDoc(createFieldDoc4());
ki.addDoc(createFieldDoc5()); // match for 2
ki.addDoc(createFieldDoc1());
ki.addDoc(createFieldDoc3());
@@ -365,11 +396,11 @@
ki.addDoc(createFieldDoc3());
ki.addDoc(createFieldDoc1());
ki.commit();
-
- ki.addDoc(createFieldDoc5()); // match for 2
+
+ ki.addDoc(createFieldDoc5()); // match for 2
ki.addDoc(createFieldDoc1());
ki.addDoc(createFieldDoc2()); // match for 1 and 2
- ki.addDoc(createFieldDoc1());
+ ki.addDoc(createFieldDoc1());
ki.addDoc(createFieldDoc3());
ki.addDoc(createFieldDoc4());
ki.addDoc(createFieldDoc1());
@@ -377,15 +408,11 @@
ki.commit();
- // "cab" is in 2
- SpanQuery sq =
- new SpanNextQuery(
- new SpanNextQuery(
- new SpanTermQuery(new Term("base", "s:c")),
- new SpanTermQuery(new Term("base", "s:a"))
- ),
- new SpanTermQuery(new Term("base", "s:b"))
- );
+ // "cab" is in 2
+ SpanQuery sq = new SpanNextQuery(
+ new SpanNextQuery(new SpanTermQuery(new Term("base", "s:c")),
+ new SpanTermQuery(new Term("base", "s:a"))),
+ new SpanTermQuery(new Term("base", "s:b")));
Result kr = ki.search(sq, (short) 10);
@@ -393,19 +420,15 @@
assertEquals(3, kr.getMatch(0).getEndPos());
assertEquals("totalResults", kr.getTotalResults(), 1);
- // "aba" is in 2 and 5
- sq = new SpanNextQuery(
- new SpanNextQuery(
- new SpanTermQuery(new Term("base", "s:a")),
- new SpanTermQuery(new Term("base", "s:b"))
- ),
- new SpanTermQuery(new Term("base", "s:a"))
- );
+ // "aba" is in 2 and 5
+ sq = new SpanNextQuery(
+ new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")),
+ new SpanTermQuery(new Term("base", "s:b"))),
+ new SpanTermQuery(new Term("base", "s:a")));
kr = ki.search(sq, (short) 10);
assertEquals("totalResults", kr.getTotalResults(), 3);
- };
-
+ };
private FieldDocument createFieldDoc1 () {
FieldDocument fd = new FieldDocument();
@@ -419,7 +442,6 @@
return fd;
}
-
private FieldDocument createFieldDoc2 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-1");
@@ -431,11 +453,10 @@
return fd;
}
-
private FieldDocument createFieldDoc3 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-2");
- fd.addTV("base", "cdbd", // c[ba]d
+ fd.addTV("base", "cdbd", // c[ba]d
"[(0-1)s:c|i:c|_0$<i>0<i>1]" + "[(1-2)s:d|i:d|_1$<i>1<i>2]"
+ "[(2-3)s:b|i:b|s:a|_2$<i>2<i>3]"
+ "[(3-4)s:d|i:d|_3$<i>3<i>4]");
@@ -443,7 +464,6 @@
return fd;
}
-
private FieldDocument createFieldDoc4 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-3");
@@ -460,14 +480,11 @@
private FieldDocument createFieldDoc5 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-4");
- fd.addTV("base", "dabaca",
- "[(0-1)s:d|i:d|_0$<i>0<i>1]"
- + "[(1-2)s:a|i:a|_1$<i>1<i>2|<>:e$<b>64<i>1<i>3<i>3<b>0]"
- + "[(2-3)s:b|i:b|_2$<i>2<i>3]"
- + "[(3-4)s:a|i:a|_3$<i>3<i>4]"
- + "[(4-5)s:c|i:c|_4$<i>4<i>5]"
- + "[(5-6)s:a|i:a|_5$<i>5<i>6]");
+ fd.addTV("base", "dabaca", "[(0-1)s:d|i:d|_0$<i>0<i>1]"
+ + "[(1-2)s:a|i:a|_1$<i>1<i>2|<>:e$<b>64<i>1<i>3<i>3<b>0]"
+ + "[(2-3)s:b|i:b|_2$<i>2<i>3]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]"
+ + "[(4-5)s:c|i:c|_4$<i>4<i>5]" + "[(5-6)s:a|i:a|_5$<i>5<i>6]");
return fd;
- }
+ }
};
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
index 488067d..8a78b62 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestSpanExpansionIndex.java
@@ -1,9 +1,13 @@
package de.ids_mannheim.korap.index;
import static de.ids_mannheim.korap.TestSimple.getJsonString;
+import static de.ids_mannheim.korap.TestSimple.simpleFieldDoc;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.RegexpQuery;
@@ -16,9 +20,11 @@
import de.ids_mannheim.korap.Krill;
import de.ids_mannheim.korap.KrillIndex;
import de.ids_mannheim.korap.KrillQuery;
+import de.ids_mannheim.korap.TestSimple;
import de.ids_mannheim.korap.query.QueryBuilder;
import de.ids_mannheim.korap.query.SpanElementQuery;
import de.ids_mannheim.korap.query.SpanExpansionQuery;
+import de.ids_mannheim.korap.query.SpanNextQuery;
import de.ids_mannheim.korap.query.SpanRepetitionQuery;
import de.ids_mannheim.korap.query.wrap.SpanQueryWrapper;
import de.ids_mannheim.korap.response.Result;
@@ -29,26 +35,58 @@
Result kr;
KrillIndex ki;
-
public TestSpanExpansionIndex () throws IOException {
ki = new KrillIndex();
ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
ki.commit();
}
+ /** Method for finding bugs. Since java matcher cannot find multiple matches
+ * from the same offset, the expected results are sometimes lower than the
+ * actual results.
+ *
+ * @throws IOException
+ * @throws QueryException
+ */
+ public void fuzzyTest () throws IOException, QueryException {
+ List<String> chars = Arrays.asList("a", "b", "c", "d", "e");
+
+ // c []{0,2} a
+ SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
+ SpanTermQuery stq2 = new SpanTermQuery(new Term("base", "s:a"));
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, 0, true);
+ SpanNextQuery snq = new SpanNextQuery(seq, stq2);
+
+ Pattern resultPattern = Pattern.compile("c[a-e]{0,2}a");
+ TestSimple.fuzzingTest(chars, resultPattern, snq,
+ 6, 20, 8,1);
+ }
+
+ @Test
+ public void testNoExpansion () throws IOException {
+ KrillIndex ki = new KrillIndex();
+ ki.addDoc(simpleFieldDoc("cc"));
+ ki.commit();
+
+ SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
+ SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 0, 0, true);
+ Result kr = ki.search(seq, (short) 10);
+
+ assertEquals(2, kr.getTotalResults());
+ }
/**
* Left and right expansions
*/
@Test
- public void testCase1 () throws IOException {
+ public void testLeftRightExpansions () throws IOException {
SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "s:des"));
// left
SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1, true);
kr = ki.search(seq, (short) 10);
- //assertEquals(69,kr.getTotalResults());
+ // assertEquals(69,kr.getTotalResults());
assertEquals(5, kr.getMatch(0).getStartPos());
assertEquals(8, kr.getMatch(0).getEndPos());
assertEquals(6, kr.getMatch(1).getStartPos());
@@ -57,9 +95,9 @@
assertEquals(8, kr.getMatch(2).getEndPos());
/*
- * for (Match km : kr.getMatches()) {
- * System.out.println(km.getStartPos() + "," + km.getEndPos() + " " +
- * km.getSnippetBrackets()); }
+ for (Match km : kr.getMatches()) {
+ System.out.println(km.getStartPos() + "," + km.getEndPos() + " " +
+ km.getSnippetBrackets()); }
*/
// right
@@ -76,13 +114,12 @@
assertEquals(161, kr.getMatch(3).getEndPos());
}
-
/**
* Classnumber
* Check the expansion offsets
*/
@Test
- public void testCase2 () {
+ public void testExpansionWithClassNumber () {
byte classNumber = 1;
SpanExpansionQuery sq;
// create new payload for the expansion offsets
@@ -135,12 +172,11 @@
*/
}
-
/**
* Right expansion with exclusion
*/
@Test
- public void testCase3 () throws IOException {
+ public void testRightExpansionWithExclusion () throws IOException {
byte classNumber = 1;
SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "tt/p:NN"));
SpanTermQuery notQuery =
@@ -170,13 +206,12 @@
*/
}
-
/**
* Left expansion with exclusion
* No expansion
*/
@Test
- public void testCase4 () throws IOException {
+ public void testLeftExpansionWithExclusion () throws IOException {
byte classNumber = 1;
SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "tt/p:NN"));
SpanTermQuery notQuery =
@@ -212,7 +247,6 @@
}
-
/**
* Expansion over start and end documents start => cut to 0
* TODO: end => to be handled in rendering process
@@ -220,7 +254,7 @@
* @throws IOException
*/
@Test
- public void testCase5 () throws IOException {
+ public void testExpansionOverStart () throws IOException {
KrillIndex ki = new KrillIndex();
ki.addDoc(createFieldDoc0());
ki.commit();
@@ -234,7 +268,7 @@
assertEquals(0, kr.getMatch(0).getStartPos());
assertEquals(2, kr.getMatch(0).getEndPos());
- //right expansion exceeds end position
+ // right expansion exceeds end position
seq = new SpanExpansionQuery(stq, 3, 3, 0, true);
kr = ki.search(seq, (short) 10);
@@ -251,14 +285,13 @@
*/
}
-
/**
* Expansion exclusion : multiple documents
*
* @throws IOException
*/
@Test
- public void testCase6 () throws IOException {
+ public void testExclusionWithMultipleDocs () throws IOException {
KrillIndex ki = new KrillIndex();
ki.addDoc(createFieldDoc0()); // same doc
ki.addDoc(createFieldDoc1()); // only not clause
@@ -283,22 +316,22 @@
assertEquals(4, kr.getMatch(4).getEndPos());
}
-
/**
* Skip to
*/
@Test
- public void testCase7 () throws IOException, QueryException {
+ public void testExpansionWithSkipTo () throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
ki.addDoc(getClass().getResourceAsStream("/wiki/00002.json.gz"), true);
ki.commit();
- String jsonPath = getClass().getResource("/queries/poly3.json").getFile();
+ String jsonPath =
+ getClass().getResource("/queries/poly3.json").getFile();
String jsonQuery = getJsonString(jsonPath);
SpanQueryWrapper sqwi = new KrillQuery("tokens").fromKoral(jsonQuery);
SpanQuery sq = sqwi.toQuery();
- //System.out.println(sq.toString());
+ // System.out.println(sq.toString());
kr = ki.search(sq, (short) 20);
assertEquals(205, kr.getMatch(0).getStartPos());
@@ -311,7 +344,6 @@
*/
}
-
/**
* Query rewrite bug
*
@@ -378,8 +410,6 @@
assertEquals((long) 2, kr.getTotalResults());
}
-
-
/**
* Query rewrite bug
*
@@ -416,7 +446,6 @@
assertEquals(2, kr.getTotalResults());
}
-
@Test
public void indexRegexSequence () throws Exception {
KrillIndex ki = new KrillIndex();
@@ -428,7 +457,6 @@
SpanQueryWrapper sq = kq.seq(kq.or("s:baumgarten", "s:steingarten"))
.append(kq.seg().without(kq.or("s:franz", "s:hans")));
-
// Expected to find [baumgarten steingarten]
Krill ks = _newKrill(sq);
Result kr = ki.search(ks);
@@ -459,7 +487,7 @@
ki.commit();
SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:a"));
-
+
RegexpQuery requery =
new RegexpQuery(new Term("base", "s:[bc]"), RegExp.ALL);
SpanMultiTermQueryWrapper<RegexpQuery> notQuery =
@@ -472,10 +500,9 @@
kr = ki.search(seq, (short) 20);
- assertEquals(9,kr.getMatches().size());
-
- }
+ assertEquals(9, kr.getMatches().size());
+ }
@Test
public void indexExpansionWithNegationDifferentFragments () throws Exception {
@@ -540,7 +567,6 @@
assertEquals((long) 1, kr.getTotalResults());
};
-
private FieldDocument createFieldDoc6 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-6");
@@ -580,7 +606,6 @@
return fd;
}
-
private FieldDocument createFieldDoc2 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-2");
@@ -592,7 +617,6 @@
return fd;
}
-
private FieldDocument createFieldDoc3 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-3");
@@ -602,7 +626,6 @@
return fd;
}
-
private FieldDocument createFieldDoc4 () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-4");
diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
index 1d993b8..7094199 100644
--- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
+++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java
@@ -895,7 +895,7 @@
+ "(tokens:/tt/p:A.*/){1,3}), "
+ "SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/))])");
- assertEquals(kr.getTotalResults(), 58);
+ assertEquals(68,kr.getTotalResults());
assertEquals(0, kr.getStartIndex());
assertEquals(kr.getMatch(0).getSnippetBrackets(),
@@ -908,10 +908,9 @@
assertEquals(kr.getMatch(3).getSnippetBrackets(),
"Saragat-Partei zerfällt Rom ([[ADN]]) "
+ "die von dem Rechtssozialisten Saragat geführte ...");
- assertEquals(kr.getMatch(23).getSnippetBrackets(),
- "... dem Namen \"Einheitsbewegung der sozialistischen "
- + "Initiative\" [[eine neue politische Gruppierung]] "
- + "ins Leben gerufen hatten. Pressemeldungen zufolge ...");
+ assertEquals("... auseinander, nachdem vor einiger Zeit mehrere "
+ + "[[prominente Mitglieder]] ihren Austritt erklärt "
+ + "und unter dem ...", kr.getMatch(23).getSnippetBrackets());
};