Added failing test for repetition queries

Change-Id: I6342e047f3b30a6a935ed7187dc9fb4de2374dea
diff --git a/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java b/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
index 3cbc34f..7c553e9 100644
--- a/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
+++ b/src/test/java/de/ids_mannheim/korap/index/TestRepetitionIndex.java
@@ -3,24 +3,33 @@
 import static org.junit.Assert.assertEquals;
 
 import java.io.IOException;
+import java.util.*;
+import java.util.regex.*;
 
+import static de.ids_mannheim.korap.TestSimple.getJsonString;
+import static de.ids_mannheim.korap.TestSimple.simpleFieldDoc;
+import static de.ids_mannheim.korap.TestSimple.simpleFuzzyFieldDoc;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.junit.Test;
+import org.junit.Ignore;
 
+import de.ids_mannheim.korap.query.QueryBuilder;
+import de.ids_mannheim.korap.Krill;
 import de.ids_mannheim.korap.KrillIndex;
 import de.ids_mannheim.korap.query.SpanNextQuery;
 import de.ids_mannheim.korap.query.SpanRepetitionQuery;
 import de.ids_mannheim.korap.response.Match;
 import de.ids_mannheim.korap.response.Result;
+import de.ids_mannheim.korap.util.QueryException;
 
 public class TestRepetitionIndex {
 
     private KrillIndex ki;
     private Result kr;
-
+    private FieldDocument fd;
 
     private FieldDocument createFieldDoc0 () {
         FieldDocument fd = new FieldDocument();
@@ -251,15 +260,6 @@
         kr = ki.search(sq, (short) 10);
         // 2-4, 2-5, 3-5, 3-6, 4-6 
         assertEquals((long) 5, kr.getTotalResults());
-
-        //        System.out.print(kr.getTotalResults()+"\n");
-        //		for (int i=0; i< kr.getTotalResults(); i++){
-        //			System.out.println(
-        //				kr.match(i).getLocalDocID()+" "+
-        //				kr.match(i).startPos + " " +
-        //				kr.match(i).endPos
-        //			);
-        //		}
     }
 
 
@@ -296,4 +296,139 @@
         	System.out.println(km.getStartPos() +","+km.getEndPos());
         }*/
     };
+
+    @Test
+    public void testRepetitionSnippetBug () throws IOException, QueryException {
+        // Construct index
+        Pattern p = Pattern.compile("bccc?d");
+        
+        // Der [corenlp/p=ADJA]{2,3} Baum
+
+        QueryBuilder qb = new QueryBuilder("base");
+
+        // b c{2,3} d
+        SpanQuery sq = qb.seq(
+            qb.seg("s:b")
+            ).append(
+                qb.repeat(qb.seg("s:c"),2,3)
+                ).append(
+                    qb.seg("s:d")
+                    ).toQuery();
+        
+        Krill ks = new Krill(sq);
+
+        assertEquals(ks.getSpanQuery().toString(),
+                     "spanNext(spanNext(base:s:b, spanRepetition(base:s:c{2,3})), base:s:d)");
+
+        // simpleDocTest
+        KrillIndex ki = new KrillIndex();
+        ki.addDoc(simpleFieldDoc("abccde"));
+        ki.commit();
+        Result kr = ks.apply(ki);
+        assertEquals(1,kr.getTotalResults());
+
+        // fuzzingRepetitionBug();
+
+        // First fuzzed failure (0 vs 1)
+        ki = new KrillIndex();
+        ki.addDoc(simpleFieldDoc("cccd"));
+        ki.addDoc(simpleFieldDoc("bccccccaeae"));
+        ki.addDoc(simpleFieldDoc("cbcedb"));
+
+        ki.commit();
+        kr = ks.apply(ki);
+        assertEquals(0,kr.getTotalResults());
+
+        // Second fuzzed failure (1 vs 0)
+        ki = new KrillIndex();
+        ki.addDoc(simpleFieldDoc("cdddbc"));
+        ki.addDoc(simpleFieldDoc("bccc"));
+        ki.addDoc(simpleFieldDoc("cbcccd"));
+
+        ki.commit();
+        kr = ks.apply(ki);
+        assertEquals(1,kr.getTotalResults());
+            
+        // Third fuzzed failure (1 vs 2)
+        ki = new KrillIndex();
+        ki.addDoc(simpleFieldDoc("bccdcb"));
+        ki.addDoc(simpleFieldDoc("ebccce"));
+        ki.addDoc(simpleFieldDoc("adbdcd"));
+        
+        ki.commit();
+        kr = ks.apply(ki);
+        assertEquals(1,kr.getTotalResults());
+    };
+
+
+    /**
+     * This method creates a corpus using fuzzing to
+     * check for unexpected, failing constellations
+     * regarding repetition queries.
+     * By shrinking the accepted result length, it tries
+     * to minimize the complexity of the constellations.
+     */
+    public void fuzzingRepetitionBug () throws IOException, QueryException {
+
+        List<String> chars = Arrays.asList("a", "b", "c", "c", "d", "e");
+
+        // Construct index
+        Pattern p = Pattern.compile("bccc?d");
+        QueryBuilder qb = new QueryBuilder("base");
+
+        // b c{2,3} d
+        SpanQuery sq = qb.seq(
+            qb.seg("s:b")
+            ).append(
+                qb.repeat(qb.seg("s:c"),2,3)
+                ).append(
+                    qb.seg("s:d")
+                    ).toQuery();
+        
+        Krill ks = new Krill(sq);
+
+        assertEquals(ks.getSpanQuery().toString(),
+                     "spanNext(spanNext(base:s:b, spanRepetition(base:s:c{2,3})), base:s:d)");
+
+        String lastFailureConf = "";
+
+        int minLength = 6;
+        int maxLength = 22;
+        int maxDocs = 8;
+
+        // Create fuzzy corpora (1000 trials)
+        for (int x = 0; x < 100000; x++) {
+            KrillIndex ki = new KrillIndex();
+            ArrayList<String> list = new ArrayList<String>();
+            int c = 0;
+
+            // Create a corpus of 8 fuzzy docs
+            for (int i = 0; i < (int)(Math.random() * maxDocs); i++) {
+                FieldDocument testDoc = simpleFuzzyFieldDoc(chars, minLength, maxLength);
+                String testString = testDoc.doc.getField("base").stringValue();
+                Matcher m = p.matcher(testString);
+                list.add(testString);
+                while (m.find())
+                    c++;
+                ki.addDoc(testDoc);
+            };
+
+            ki.commit();
+
+            Result kr = ks.apply(ki);
+
+            // Check if the regex-calculated matches are correct, otherwise
+            // spit out the corpus configurations
+            if (c != kr.getTotalResults()) {
+                String failureConf = c + ":" + kr.getTotalResults() + " " + list.toString();
+                if (lastFailureConf.length() == 0 ||
+                    failureConf.length() < lastFailureConf.length()) {
+                    System.err.println(failureConf);
+                    lastFailureConf = failureConf;
+                    minLength--;
+                    maxDocs--;
+                };
+            };
+        };
+    };
 }