Revert "Try Ehcache"
This reverts commit b7f4bb7ee472eb5202fbf17c415b8ddd2d68257f.
diff --git a/pom.xml b/pom.xml
index 93f05ba..6466062 100644
--- a/pom.xml
+++ b/pom.xml
@@ -159,9 +159,10 @@
</dependency>
<!-- https://mvnrepository.com/artifact/net.openhft/chronicle-map -->
<dependency>
- <groupId>org.ehcache</groupId>
- <artifactId>ehcache</artifactId>
- <version>3.8.1</version>
+ <scope>compile</scope>
+ <groupId>net.openhft</groupId>
+ <artifactId>chronicle-map</artifactId>
+ <version>3.17.8</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
diff --git a/src/main/java/org/ids_mannheim/TotalNGram.java b/src/main/java/org/ids_mannheim/TotalNGram.java
index 2f691f2..5c405eb 100644
--- a/src/main/java/org/ids_mannheim/TotalNGram.java
+++ b/src/main/java/org/ids_mannheim/TotalNGram.java
@@ -19,6 +19,8 @@
import java.util.stream.Collectors;
import java.util.stream.IntStream;
+import net.openhft.chronicle.map.*;
+import shaded.org.ops4j.io.FileUtils;
@CommandLine.Command(mixinStandardHelpOptions = true,
name = "totalngram", description = "add ngram counts from KorAP-XML or CoNLL-U files")
@@ -129,7 +131,13 @@
Utils.newExtension(tmpdir + "/" + new File(output_fillename).getName(),
"cm"), true);
logger.info("Creating ChronicleMap with "+estimated_entries+ " entries");
- map = new org.ehcache.impl.internal.concurrent.ConcurrentHashMap();
+ map = ChronicleMap
+ .of(String.class, FoldedEntry.class)
+ .name("ngrams-map")
+ .averageKey("Amsterdam".repeat(FOLDS))
+ .averageValue(new FoldedEntry())
+ .entries(estimated_entries)
+ .recoverPersistedTo(persisted_map, false);
} else {
map = new ConcurrentHashMap();
}
@@ -156,15 +164,16 @@
etaPrinter.finish();
logger.info("Sorting and writing frequency table.");
System.err.println("Sorting and writing frequency table.");
- map.entrySet().parallelStream()
- .forEachOrdered(entry -> output_stream.println(entry.getKey() + entry.getValue().toString()));
+ map.entrySet().stream()
+ .sorted(new FreqListEntryComparator<>())
+ .forEach(entry -> output_stream.println(entry.getKey() + entry.getValue().toString()));
logger.info("Calculating column sums.");
- if (map instanceof ConcurrentHashMap || map instanceof org.ehcache.impl.internal.concurrent.ConcurrentHashMap) {
+ if (map instanceof ConcurrentHashMap) {
logger.info("Calculating column sums.");
System.err.println("Calculating column sums.");
- IntStream.rangeClosed(1, FOLDS).parallel()
- .forEachOrdered(i -> output_stream.print("\t" + Long.toUnsignedString(map.values()
- .parallelStream().mapToLong(e -> Integer.toUnsignedLong(e.count.get(i))).sum())));
+ IntStream.rangeClosed(1, FOLDS)
+ .forEach(i -> output_stream.print("\t" + Long.toUnsignedString(map.values()
+ .stream().mapToLong(e -> Integer.toUnsignedLong(e.count.get(i))).sum())));
output_stream.println("\t" + Long.toUnsignedString(map.values().stream().mapToLong(e -> Integer.toUnsignedLong(e.count.get(0))).sum()));
} else {
logger.info("Skip column sums calculation which is too slow with persisted hash maps.");
diff --git a/src/main/java/org/ids_mannheim/Worker.java b/src/main/java/org/ids_mannheim/Worker.java
index 49f1944..6b123f2 100644
--- a/src/main/java/org/ids_mannheim/Worker.java
+++ b/src/main/java/org/ids_mannheim/Worker.java
@@ -1,6 +1,7 @@
package org.ids_mannheim;
-import java.io.BufferedReader;
+import net.openhft.chronicle.map.ChronicleMap;
+
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;