Try Ehcache
diff --git a/pom.xml b/pom.xml
index 6466062..93f05ba 100644
--- a/pom.xml
+++ b/pom.xml
@@ -159,10 +159,9 @@
</dependency>
<!-- https://mvnrepository.com/artifact/net.openhft/chronicle-map -->
<dependency>
- <scope>compile</scope>
- <groupId>net.openhft</groupId>
- <artifactId>chronicle-map</artifactId>
- <version>3.17.8</version>
+ <groupId>org.ehcache</groupId>
+ <artifactId>ehcache</artifactId>
+ <version>3.8.1</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
diff --git a/src/main/java/org/ids_mannheim/TotalNGram.java b/src/main/java/org/ids_mannheim/TotalNGram.java
index 5c405eb..2f691f2 100644
--- a/src/main/java/org/ids_mannheim/TotalNGram.java
+++ b/src/main/java/org/ids_mannheim/TotalNGram.java
@@ -19,8 +19,6 @@
import java.util.stream.Collectors;
import java.util.stream.IntStream;
-import net.openhft.chronicle.map.*;
-import shaded.org.ops4j.io.FileUtils;
@CommandLine.Command(mixinStandardHelpOptions = true,
name = "totalngram", description = "add ngram counts from KorAP-XML or CoNLL-U files")
@@ -131,13 +129,7 @@
Utils.newExtension(tmpdir + "/" + new File(output_fillename).getName(),
"cm"), true);
logger.info("Creating ChronicleMap with "+estimated_entries+ " entries");
- map = ChronicleMap
- .of(String.class, FoldedEntry.class)
- .name("ngrams-map")
- .averageKey("Amsterdam".repeat(FOLDS))
- .averageValue(new FoldedEntry())
- .entries(estimated_entries)
- .recoverPersistedTo(persisted_map, false);
+ map = new org.ehcache.impl.internal.concurrent.ConcurrentHashMap();
} else {
map = new ConcurrentHashMap();
}
@@ -164,16 +156,15 @@
etaPrinter.finish();
logger.info("Sorting and writing frequency table.");
System.err.println("Sorting and writing frequency table.");
- map.entrySet().stream()
- .sorted(new FreqListEntryComparator<>())
- .forEach(entry -> output_stream.println(entry.getKey() + entry.getValue().toString()));
+ map.entrySet().parallelStream()
+ .forEachOrdered(entry -> output_stream.println(entry.getKey() + entry.getValue().toString()));
logger.info("Calculating column sums.");
- if (map instanceof ConcurrentHashMap) {
+ if (map instanceof ConcurrentHashMap || map instanceof org.ehcache.impl.internal.concurrent.ConcurrentHashMap) {
logger.info("Calculating column sums.");
System.err.println("Calculating column sums.");
- IntStream.rangeClosed(1, FOLDS)
- .forEach(i -> output_stream.print("\t" + Long.toUnsignedString(map.values()
- .stream().mapToLong(e -> Integer.toUnsignedLong(e.count.get(i))).sum())));
+ IntStream.rangeClosed(1, FOLDS).parallel()
+ .forEachOrdered(i -> output_stream.print("\t" + Long.toUnsignedString(map.values()
+ .parallelStream().mapToLong(e -> Integer.toUnsignedLong(e.count.get(i))).sum())));
output_stream.println("\t" + Long.toUnsignedString(map.values().stream().mapToLong(e -> Integer.toUnsignedLong(e.count.get(0))).sum()));
} else {
logger.info("Skip column sums calculation which is too slow with persisted hash maps.");
diff --git a/src/main/java/org/ids_mannheim/Worker.java b/src/main/java/org/ids_mannheim/Worker.java
index 6b123f2..49f1944 100644
--- a/src/main/java/org/ids_mannheim/Worker.java
+++ b/src/main/java/org/ids_mannheim/Worker.java
@@ -1,7 +1,6 @@
package org.ids_mannheim;
-import net.openhft.chronicle.map.ChronicleMap;
-
+import java.io.BufferedReader;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;