totalngrams: add -S to turn off output sorting
can be required for >= 3 grams
diff --git a/src/main/java/org/ids_mannheim/TotalNGrams.java b/src/main/java/org/ids_mannheim/TotalNGrams.java
index ba1a40d..236791e 100644
--- a/src/main/java/org/ids_mannheim/TotalNGrams.java
+++ b/src/main/java/org/ids_mannheim/TotalNGrams.java
@@ -76,6 +76,11 @@
"--fold"}, description = "current folds (default: ${DEFAULT-VALUE})")
int fold = 1;
+ @SuppressWarnings("CanBeFinal")
+ @CommandLine.Option(names = {"-S",
+ "--sort"}, description = "Toggle output sorting (default: ${DEFAULT-VALUE})")
+ boolean sort = true;
+
private Progressbar etaPrinter;
public TotalNGrams() {
@@ -158,18 +163,23 @@
es.shutdown();
boolean finished = es.awaitTermination(120, TimeUnit.HOURS);
etaPrinter.finish();
- logger.info("Sorting and writing frequency table.");
- System.err.println("Sorting and writing frequency table.");
- map.entrySet().parallelStream()
- .sorted((a, b) -> {
- int cmp1 = Integer.compareUnsigned(b.getValue().get(), a.getValue().get());
- if (cmp1 != 0) {
- return cmp1;
- } else {
- return a.getKey().compareTo(b.getKey());
- }
- })
- .forEachOrdered(entry -> output_stream.println(entry.getKey() + "\t" + entry.getValue().toString()));
+ if (!sort) {
+ map.entrySet()
+ .forEach(entry -> output_stream.println(entry.getKey() + "\t" + entry.getValue().toString()));
+ } else {
+ logger.info("Sorting and writing frequency table.");
+ System.err.println("Sorting and writing frequency table.");
+ map.entrySet().parallelStream()
+ .sorted((a, b) -> {
+ int cmp1 = Integer.compareUnsigned(b.getValue().get(), a.getValue().get());
+ if (cmp1 != 0) {
+ return cmp1;
+ } else {
+ return a.getKey().compareTo(b.getKey());
+ }
+ })
+ .forEachOrdered(entry -> output_stream.println(entry.getKey() + "\t" + entry.getValue().toString()));
+ }
logger.info("Finished.");
output_stream.close();
return null;