totalngrams: parallelize and fix alphabetical secondary sorting
diff --git a/src/main/java/org/ids_mannheim/ValueThenKeyComparator.java b/src/main/java/org/ids_mannheim/FreqListEntryComparator.java
similarity index 74%
rename from src/main/java/org/ids_mannheim/ValueThenKeyComparator.java
rename to src/main/java/org/ids_mannheim/FreqListEntryComparator.java
index 1623462..fd5600d 100644
--- a/src/main/java/org/ids_mannheim/ValueThenKeyComparator.java
+++ b/src/main/java/org/ids_mannheim/FreqListEntryComparator.java
@@ -3,12 +3,12 @@
import java.util.Comparator;
import java.util.Map;
-public class ValueThenKeyComparator<K extends Comparable<? super K>,
+public class FreqListEntryComparator<K extends Comparable<? super K>,
V extends Comparable<? super V>>
implements Comparator<Map.Entry<K, V>> {
public int compare(Map.Entry<K, V> a, Map.Entry<K, V> b) {
- int cmp1 = a.getValue().compareTo(b.getValue());
+ int cmp1 = b.getValue().compareTo(a.getValue());
if (cmp1 != 0) {
return cmp1;
} else {
diff --git a/src/main/java/org/ids_mannheim/TotalNGram.java b/src/main/java/org/ids_mannheim/TotalNGram.java
index 2d19a1f..3d4149c 100644
--- a/src/main/java/org/ids_mannheim/TotalNGram.java
+++ b/src/main/java/org/ids_mannheim/TotalNGram.java
@@ -130,10 +130,12 @@
es.shutdown();
boolean finished = es.awaitTermination(120, TimeUnit.HOURS);
etaPrinter.finish();
+ logger.info("Sorting and writing frequency table.");
System.err.println("Sorting and writing frequency table.");
- map.entrySet().stream()
- .sorted(Collections.reverseOrder(new ValueThenKeyComparator<>()))
- .forEach(entry -> output_stream.println(entry.getKey() + entry.getValue().toString()));
+ map.entrySet().parallelStream()
+ .sorted(new FreqListEntryComparator<>())
+ .forEachOrdered(entry -> output_stream.println(entry.getKey() + entry.getValue().toString()));
+ logger.info("Calculating column sums.");
System.err.println("Calculating column sums.");
IntStream.rangeClosed(1, FOLDS)
.forEach(i -> output_stream.print("\t" + map.values()