Improve fuzzing by separating test and output samples Change-Id: I3e0513b5b92afb3316bd40113cce9f87d8c31e46

commit: 87b3d33222a44743ef07b5f54e249b57897c28a8 [log] [tgz]
author: Akron <nils@diewald-online.de> Thu Aug 11 20:03:45 2022 +0200
committer: Akron <nils@diewald-online.de> Fri Aug 12 11:25:13 2022 +0200
tree: fe44ed161b69a9652716057ab6c01168da79e195
parent: 02e2a5f5973cc8a4acfc530be003ea5bac932160 [diff]
diff --git a/src/test/java/de/ids_mannheim/korap/TestSimple.java b/src/test/java/de/ids_mannheim/korap/TestSimple.java
index 8ab88e9..0a0bf98 100644
--- a/src/test/java/de/ids_mannheim/korap/TestSimple.java
+++ b/src/test/java/de/ids_mannheim/korap/TestSimple.java

@@ -112,6 +112,7 @@
         };
         FieldDocument fd = simpleFieldDoc(surface);
         fd.addStored("copy", surface);
+        fd.addStored("plain", surface);
         return fd;
     };
 
@@ -141,6 +142,7 @@
         
         fd.addTV("base",surface, annotation);
         fd.addStored("copy", surface);
+        fd.addStored("plain", annotation);
         return fd;
     };
 
@@ -154,6 +156,7 @@
         int l = (int)(Math.random() * (maxLength - minLength)) + minLength;
 
         boolean sentences[] = new boolean[l+1];
+
         Arrays.fill(sentences, false);
         sentences[0] = true;
 
@@ -177,8 +180,8 @@
             };
 
             if (sentences[i]) {
-                int sl = 0;
-                if (i != l - 1) {
+                int sl = -1;
+                if (i < l) {
                     for (int x = i+1; x < l; x++) {
                         if (sentences[x]) {
                             sl = x - 1;
@@ -186,11 +189,11 @@
                         };
                     }
                 };
-                if (sl == 0)
+                if (sl == -1)
                     sl = l;
 
-                annotation += "|<>:base/s:s$<b>64<i>" + sl + "<i>" + sl + "<b>1";
-                surface2 += "|";
+                annotation += "|<>:base/s:s$<b>64<i>" + i + "<i>" + sl + "<i>" + sl + "<b>1";
+                surface2 += "~";
             };
             surface2 += fixChar;
 
@@ -199,6 +202,7 @@
         
         fd.addTV("base",surface, annotation);
         fd.addStored("copy", surface2);
+        fd.addStored("plain", annotation);
         return fd;
     };
 
@@ -311,15 +315,24 @@
         Krill ks = new Krill(sq);
         String lastFailureConf = "";
 
+        ArrayList<String> list = new ArrayList<String>();
+        ArrayList<String> annoList = new ArrayList<String>();
+        
         // Multiple runs of corpus creation and query checks
         for (int x = 0; x < 100000; x++) {
             KrillIndex ki = new KrillIndex();
-            ArrayList<String> list = new ArrayList<String>();
             int c = 0;
 
+            list.clear();
+            annoList.clear();
+
+            if (minTextLength == 0)
+                minTextLength = 1;
+            
             // Create a corpus of <= maxDocs fuzzy docs
             for (int i = 0; i < (int) (Math.random() * maxDocs); i++) {
                 FieldDocument testDoc;
+                
                 if (docType == 1) {
                     testDoc = annotatedFuzzyFieldDoc(
                         chars,
@@ -334,17 +347,46 @@
                         minTextLength, maxTextLength);
                 };
                 String testString = testDoc.getFieldValue("copy");
+                String annoString = testDoc.getFieldValue("plain");
                 Matcher m = resultPattern.matcher(testString);
                 list.add(testString);
+                annoList.add(annoString);
                 int offset = 0;
                 while (m.find(offset)) {
                     c++;
                     offset = Math.max(0, m.start() + 1);
                 }
                 ki.addDoc(testDoc);
+
+
+                // Randomly create new index fragments
+                if (Math.random() > 0.7) {
+                    ki.commit();
+                };
             };
 
             ki.commit();
+
+            /*
+            try {
+                ks.apply(ki);
+            } catch (Exception e) {
+                String failureConf = "Fatal in docs:" + annoList.toString();
+
+                // Try to keep the failing configuration small
+                if (lastFailureConf.length() == 0
+                    || failureConf.length() < lastFailureConf.length()) {
+                    System.err.println(failureConf);
+                    lastFailureConf = failureConf;
+                    minTextLength--;
+                    maxDocs--;
+                    System.err.println("???????????????????");
+                    return;
+                };
+            };
+            */
+            
+            
             Result kr = ks.apply(ki);
             
             // Check if the regex-calculated matches are correct,
@@ -352,7 +394,7 @@
             // spit out the corpus configurations
             if (c != kr.getTotalResults()) {
                 String failureConf = "expected:" + c + ", actual:"
-                        + kr.getTotalResults() + ", docs:" + list.toString();
+                        + kr.getTotalResults() + ", docs:" + annoList.toString();
 
                 // Try to keep the failing configuration small
                 if (lastFailureConf.length() == 0
commit	87b3d33222a44743ef07b5f54e249b57897c28a8	[log] [tgz]
author	Akron <nils@diewald-online.de>	Thu Aug 11 20:03:45 2022 +0200
committer	Akron <nils@diewald-online.de>	Fri Aug 12 11:25:13 2022 +0200
tree	fe44ed161b69a9652716057ab6c01168da79e195
parent	02e2a5f5973cc8a4acfc530be003ea5bac932160 [diff]