totalngrams: sort input files by size
diff --git a/src/main/java/org/ids_mannheim/TotalNGram.java b/src/main/java/org/ids_mannheim/TotalNGram.java
index ceec820..82a8f56 100644
--- a/src/main/java/org/ids_mannheim/TotalNGram.java
+++ b/src/main/java/org/ids_mannheim/TotalNGram.java
@@ -8,6 +8,7 @@
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
+import java.util.List;
import java.util.Locale;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;
@@ -35,7 +36,7 @@
}
@CommandLine.Parameters(arity = "1..*", description = "input files")
- private final ArrayList<String> inputFiles = new ArrayList<>();
+ private ArrayList<String> inputFiles = new ArrayList<>();
@CommandLine.Option(names = {"-L",
"--log-file"}, defaultValue = DEFAULT_LOGFILE, description = "log file name (default: ${DEFAULT-VALUE})")
String logFileName;
@@ -122,7 +123,19 @@
}
ConcurrentHashMap<String, AtomicInteger> map = new ConcurrentHashMap<>();
- long totalFilesSizes = inputFiles.parallelStream().mapToLong(fname -> new File(fname).length()).sum();
+
+ long totalFilesSizes = inputFiles.parallelStream().mapToLong(fname -> {
+ long l = new File(fname).length();
+ if (l == 0) {
+ logger.severe("File "+fname+" does not exist or is empty.");
+ System.exit(-1);
+ }
+ return l;
+ }).sum();
+ inputFiles = (ArrayList<String>) inputFiles.parallelStream().sorted((a, b) ->
+ Long.compareUnsigned(new File(b).length(), new File(a).length())
+ ).collect(Collectors.toList());
+
etaPrinter = new Progressbar(totalFilesSizes);
BlockingQueue<Integer> queue = new LinkedBlockingQueue<>(inputFiles.size());
ExecutorService es = Executors.newCachedThreadPool();