Add failing fuzzy test for skipped results in focus sequences (test #78)
Change-Id: I75c3825759f69bb83ad3c148d2bf02f692180176
diff --git a/Changes b/Changes
index dfc10cd..ebc3112 100644
--- a/Changes
+++ b/Changes
@@ -2,7 +2,7 @@
- [bugfix] Fixed skipping of focus spans (fixed #78; margaretha,
diewald)
-0.59.5 2021-09-30
+0.59.5 2021-10-26
- [bugfix] Fixed candidate settings in token distance spans
(margaretha, diewald)
- [bugfix] Fixed setting candidates in element distance spans.
@@ -10,6 +10,7 @@
- [cleanup] Change contains' frame to be only "contains", for
compatibility with future changes; isAround is deprecated
for now (diewald)
+ - [feature] Added fuzzing test for equivalent queries (diewald)
0.59.4 2021-07-27
- [cleanup] Upgrade dependencies (diewald)
diff --git a/src/test/java/de/ids_mannheim/korap/TestSimple.java b/src/test/java/de/ids_mannheim/korap/TestSimple.java
index b0554ca..0243250 100644
--- a/src/test/java/de/ids_mannheim/korap/TestSimple.java
+++ b/src/test/java/de/ids_mannheim/korap/TestSimple.java
@@ -99,6 +99,7 @@
fd.addTV("base",surface, annotation);
return fd;
};
+
// Create a new FieldDocument with random data
public static FieldDocument simpleFuzzyFieldDoc (List<String> chars, int minLength, int maxLength) {
@@ -112,6 +113,35 @@
};
+ // Create a new FieldDocument with random data
+ public static FieldDocument annotatedFuzzyFieldDoc (List<String> chars, int minLength, int maxLength) {
+ FieldDocument fd = new FieldDocument();
+ String annotation = "";
+ String surface = "";
+
+ int l = (int)(Math.random() * (maxLength - minLength)) + minLength;
+
+ for (int i = 0; i < l; i++) {
+ String fixChar = chars.get((int)(Math.random() * chars.size()));
+ surface += fixChar;
+ annotation += "[("+i+"-"+(i+1)+")s:"+fixChar;
+ if (i == 0)
+ annotation += "|<>:base/s:t$<b>64<i>0<i>" + l + "<i>" + l + "<b>0";
+
+ for (int j = 0; j < (int)(Math.random() * 3); j++) {
+ fixChar = chars.get((int)(Math.random() * chars.size()));
+ annotation += "|a:" + fixChar;
+ };
+
+ annotation += "|_"+i+"$<i>"+i+"<i>"+(i+1)+"]";
+ };
+
+
+ fd.addTV("base",surface, annotation);
+ fd.addString("copy", annotation);
+ return fd;
+ };
+
// Get Term Vector
public static MultiTermTokenStream getTermVector (String stream) {
MultiTermTokenStream ts = new MultiTermTokenStream();
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java
index 99e2a5a..408332f 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestFocusIndex.java
@@ -1,6 +1,7 @@
package de.ids_mannheim.korap.index;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import java.io.IOException;
@@ -10,7 +11,14 @@
import org.apache.lucene.search.spans.SpanTermQuery;
import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+
+import de.ids_mannheim.korap.Krill;
import de.ids_mannheim.korap.KrillIndex;
+import de.ids_mannheim.korap.TestSimple;
import de.ids_mannheim.korap.constants.RelationDirection;
import de.ids_mannheim.korap.query.QueryBuilder;
import de.ids_mannheim.korap.query.SpanClassQuery;
@@ -134,9 +142,6 @@
focus.setWindowSize(2);
kr = ki.search(focus, (short) 10);
-// for (Match m: kr.getMatches()){
-// System.out.println(m.getDocID() + " "+m.getSnippetBrackets());
-// }
assertEquals("a[[{1:b}]]cd", kr.getMatch(0).getSnippetBrackets());
assertEquals("a[[{1:b}]]cd", kr.getMatch(1).getSnippetBrackets());
assertEquals("ab[[{1:c}]]d", kr.getMatch(2).getSnippetBrackets());
@@ -200,16 +205,61 @@
assertEquals("spanNext(base:s:b, focus(1: spanNext(spanNext(base:s:a, base:s:b), {1: base:s:c})))", focus.toQuery().toString());
kr = ki.search(focus.toQuery(), (short) 10);
- /*
- assertEquals("a[[b{1:c}]]d", kr.getMatch(0).getSnippetBrackets());
- assertEquals("a[[b{1:c}]]dcabcd", kr.getMatch(1).getSnippetBrackets());
- assertEquals("abcdca[[b{1:c}]]d", kr.getMatch(2).getSnippetBrackets());
- assertEquals("a[[b{1:c}]]d", kr.getMatch(3).getSnippetBrackets());
- */
assertEquals(5, kr.getTotalResults());
+ };
+
+
+ @Test
+ public void testFocusInNextWithAnnotations () throws QueryException, IOException {
+ ki = new KrillIndex();
+
+ FieldDocument fd = new FieldDocument();
+ fd.addString("ID", "doc-1");
+ fd.addTV("base", "acc",
+ "[(0-1)s:a|<>:base/s:t$<b>64<i>0<i>3<i>3<b>0|a:b|_0$<i>0<i>1]"+
+ "[(1-2)s:c|a:b|_1$<i>1<i>2]"+
+ "[(2-3)s:c|a:a|_2$<i>2<i>3]"
+ );
+ ki.addDoc(fd);
+
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-2");
+ fd.addTV("base", "bea",
+ "[(0-1)s:b|<>:base/s:t$<b>64<i>0<i>3<i>3<b>0|a:c|_0$<i>0<i>1]"+
+ "[(1-2)s:e|_1$<i>1<i>2]"+
+ "[(2-3)s:a|_2$<i>2<i>3]"
+ );
+ ki.addDoc(fd);
+
+ fd = new FieldDocument();
+ fd.addString("ID", "doc-3");
+ fd.addTV("base", "babc",
+ "[(0-1)s:b|<>:base/s:t$<b>64<i>0<i>4<i>4<b>0|a:c|_0$<i>0<i>1]"+
+ "[(1-2)s:a|a:b|_1$<i>1<i>2]"+
+ "[(2-3)s:b|a:b|_2$<i>2<i>3]"+
+ "[(3-4)s:c|a:a|_3$<i>3<i>4]"
+ );
+ ki.addDoc(fd);
+ ki.commit();
+
+ QueryBuilder kq = new QueryBuilder("base");
+
+ // Compare
+ // [a:b] focus(a,b,{c})
+ // b focus(a,[a:b],{c})
+
+ SpanQueryWrapper focus = kq.seq(kq.seg("a:b"),kq.focus(kq.seq(kq.seg("s:a"),kq.seg("s:b"),kq.nr(1, kq.seg("s:c")))));
+ kr = ki.search(focus.toQuery(), (short) 10);
+
+ long total = kr.getTotalResults();
+ assertTrue(total >= 1);
+
+ focus = kq.seq(kq.seg("s:b"),kq.focus(kq.seq(kq.seg("s:a"),kq.seg("a:b"),kq.nr(1, kq.seg("s:c")))));
+ kr = ki.search(focus.toQuery(), (short) 10);
+
+ assertEquals(total, kr.getTotalResults());
}
-
@Test
public void testFocusInNextBug () throws QueryException, IOException {
ki = new KrillIndex();
@@ -250,6 +300,21 @@
}
+ // @Test
+ public void testFocusInNextWithAnnotationsFuzzy () throws QueryException, IOException {
+
+ QueryBuilder kq = new QueryBuilder("base");
+
+ SpanQueryWrapper focus1 = kq.seq(kq.seg("a:b"),kq.focus(kq.seq(kq.seg("s:a"),kq.seg("s:b"),kq.nr(1, kq.seg("s:c")))));
+
+ SpanQueryWrapper focus2 = kq.seq(kq.seg("s:b"),kq.focus(kq.seq(kq.seg("s:a"),kq.seg("a:b"),kq.nr(1, kq.seg("s:c")))));
+
+ List<String> chars = Arrays.asList("a", "b", "c", "d", "e");
+
+ fuzzingTestCompareTotal(chars, focus1.toQuery(), focus2.toQuery(), 5, 20, 20);
+ };
+
+
public static FieldDocument createFieldDoc () {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-0");
@@ -271,14 +336,59 @@
fd.addString("ID", "doc-1");
fd.addTV("tokens", "abcd",
- "[(0-1)s:a|_0$<i>0<i>1|"
- + "<>:x$<b>64<i>0<i>4<i>4<b>0]"
- + "[(1-2)s:b|_1$<i>1<i>2]"
- + "[(2-3)s:c|_2$<i>2<i>3|"
- + "<>:x$<b>64<i>2<i>4<i>4<b>0]"
- + "[(3-4)s:d|_3$<i>3<i>4]"
+ "[(0-1)s:a|_0$<i>0<i>1|"
+ + "<>:x$<b>64<i>0<i>4<i>4<b>0]"
+ + "[(1-2)s:b|_1$<i>1<i>2]"
+ + "[(2-3)s:c|_2$<i>2<i>3|"
+ + "<>:x$<b>64<i>2<i>4<i>4<b>0]"
+ + "[(3-4)s:d|_3$<i>3<i>4]"
);
return fd;
}
-}
+
+ // Annotated fuzzing test
+ public static void fuzzingTestCompareTotal (List<String> chars, SpanQuery sq1, SpanQuery sq2, int minTextLength, int maxTextLength, int maxDocs)
+ throws IOException, QueryException {
+
+ Krill ks1 = new Krill(sq1);
+ Krill ks2 = new Krill(sq2);
+ String lastFailureConf = "";
+
+ // Multiple runs of corpus creation and query checks
+ for (int x = 0; x < 100000; x++) {
+ KrillIndex ki = new KrillIndex();
+ ArrayList<String> list = new ArrayList<String>();
+ int c = 0;
+
+ // Create a corpus of <= maxDocs fuzzy docs
+ for (int i = 0; i < (int) (Math.random() * maxDocs); i++) {
+ FieldDocument testDoc = TestSimple.annotatedFuzzyFieldDoc(chars,
+ minTextLength, maxTextLength);
+
+ ki.addDoc(testDoc);
+ String testString = testDoc.doc.getField("copy").stringValue();
+ list.add(testString);
+ };
+
+ ki.commit();
+
+ // Search and compare both queries
+ Result kr1 = ks1.apply(ki);
+ Result kr2 = ks2.apply(ki);
+
+ if (kr1.getTotalResults() != kr2.getTotalResults()) {
+ String failureConf = "Failure:" + list.toString();
+
+ // Try to keep the failing configuration small
+ if (lastFailureConf.length() == 0
+ || failureConf.length() < lastFailureConf.length()) {
+ System.err.println(failureConf);
+ lastFailureConf = failureConf;
+ minTextLength--;
+ maxDocs--;
+ };
+ };
+ };
+ };
+};