Use cryptogrphic Blake2b hash as determisitic fold random source

The original standard hash code function was in effect partially
dependent on the order of the texts.
diff --git a/pom.xml b/pom.xml
index 54b31bf..beda604 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
 
     <groupId>groupId</groupId>
     <artifactId>nGrammFoldCount</artifactId>
-    <version>1.3</version>
+    <version>1.5</version>
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -162,5 +162,10 @@
             <version>RELEASE</version>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>org.bouncycastle</groupId>
+            <artifactId>bcprov-jdk15on</artifactId>
+            <version>1.66</version>
+        </dependency>
     </dependencies>
 </project>
\ No newline at end of file
diff --git a/src/main/java/org/ids_mannheim/DeterministicRandomProvider.java b/src/main/java/org/ids_mannheim/DeterministicRandomProvider.java
new file mode 100644
index 0000000..2f51f2c
--- /dev/null
+++ b/src/main/java/org/ids_mannheim/DeterministicRandomProvider.java
@@ -0,0 +1,20 @@
+package org.ids_mannheim;
+
+import org.bouncycastle.crypto.digests.Blake2bDigest;
+
+public class DeterministicRandomProvider {
+    private final Blake2bDigest b2bd = new Blake2bDigest(null, 1, null, null);
+    private final byte[] out_bytes = new byte[4];
+    private int max_values;
+
+    public DeterministicRandomProvider(int max_values) {
+        this.max_values = max_values;
+    }
+
+    public int getFoldFromTextID(String id) {
+        b2bd.update(id.getBytes(), 0, id.length());
+        b2bd.doFinal(out_bytes, 0);
+        return Byte.toUnsignedInt(out_bytes[0]) % max_values;
+    }
+}
+
diff --git a/src/main/java/org/ids_mannheim/Utils.java b/src/main/java/org/ids_mannheim/Utils.java
index 6af95f0..966236a 100644
--- a/src/main/java/org/ids_mannheim/Utils.java
+++ b/src/main/java/org/ids_mannheim/Utils.java
@@ -2,7 +2,6 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.nio.channels.FileChannel;
 import java.nio.file.AccessDeniedException;
 import java.nio.file.FileAlreadyExistsException;
 import java.nio.file.Files;
@@ -38,9 +37,5 @@
         }
         return f;
     }
-
-    public static int getFoldFromTextID(String id, int max_fold) {
-        return Math.abs(id.hashCode() % max_fold);
-    }
 }
 
diff --git a/src/main/java/org/ids_mannheim/Worker.java b/src/main/java/org/ids_mannheim/Worker.java
index 238e004..18a14bd 100644
--- a/src/main/java/org/ids_mannheim/Worker.java
+++ b/src/main/java/org/ids_mannheim/Worker.java
@@ -20,13 +20,13 @@
     private final ArrayList<String> fnames;
     private final BlockingQueue<Integer> queue;
     private final ConcurrentHashMap<String, AtomicInteger> map;
-    private final int folds;
     private final Progressbar etaPrinter;
     private final int ngram_size;
     private final int target_fold;
     private final Logger logger;
     private final WorkerNodePool pool;
     private final boolean with_lemma_and_pos;
+    private final DeterministicRandomProvider deterministicRandomProvider;
 
     public Worker(BlockingQueue<Integer> queue, ArrayList<String> fnames, int ngram_size, int target_fold, int folds,
                   ConcurrentHashMap<String, AtomicInteger> map,
@@ -36,12 +36,12 @@
         this.fnames = fnames;
         this.map = map;
         this.ngram_size = ngram_size;
-        this.folds = folds;
         this.target_fold = target_fold;
         this.with_lemma_and_pos = with_lemma_and_pos;
         this.pool = pool;
         this.etaPrinter = etaPrinter;
         this.logger = logger;
+        this.deterministicRandomProvider = new DeterministicRandomProvider(folds);
     }
 
     @Override
@@ -81,9 +81,9 @@
                     if (line.startsWith("#")) {
                         Matcher matcher = new_text_pattern.matcher(line);
                         if (matcher.find()) {
-                            fold = Utils.getFoldFromTextID(matcher.group(1), folds + 1);
+                            fold = deterministicRandomProvider.getFoldFromTextID(matcher.group(1)) + 1;
                             texts++;
-                            if(fold == target_fold) {
+                            if (fold == target_fold) {
                                 slidingWindowQueue.reset(fold);
                             }
                         }
diff --git a/src/test/java/org/ids_mannheim/UtilsTest.java b/src/test/java/org/ids_mannheim/UtilsTest.java
index 02b9c04..a93db3c 100644
--- a/src/test/java/org/ids_mannheim/UtilsTest.java
+++ b/src/test/java/org/ids_mannheim/UtilsTest.java
@@ -12,11 +12,12 @@
 
     @Test
     void randomFoldIsDeterministic() {
-        assertEquals(404783, Utils.getFoldFromTextID("RPO05_JAN.00001", 1000000));
-        assertEquals(404782, Utils.getFoldFromTextID("RPO05_JAN.00002", 1000000));
-        assertEquals(404781, Utils.getFoldFromTextID("RPO05_JAN.00003", 1000000));
-        assertEquals(404753, Utils.getFoldFromTextID("RPO05_JAN.00010", 1000000));
-        assertEquals(936451, Utils.getFoldFromTextID("RPO05_JUN.00001", 1000000));
-        assertEquals(936450, Utils.getFoldFromTextID("RPO05_JUN.00002", 1000000));
+        DeterministicRandomProvider drp = new DeterministicRandomProvider(100);
+        assertEquals(89, drp.getFoldFromTextID("RPO05_JAN.00001"));
+        assertEquals(47, drp.getFoldFromTextID("RPO05_JAN.00002"));
+        assertEquals(0, drp.getFoldFromTextID("RPO05_JAN.00003"));
+        assertEquals(91, drp.getFoldFromTextID("RPO05_JAN.00010"));
+        assertEquals(53, drp.getFoldFromTextID("RPO05_JUN.00001"));
+        assertEquals(94, drp.getFoldFromTextID("RPO05_JUN.00002"));
     }
 }
\ No newline at end of file