totalngram: Use our own ETA printer
We need to know approximately when the process will be finished to make
sure it's not in 10 years.
diff --git a/pom.xml b/pom.xml
index 97ca101..ffb1e74 100644
--- a/pom.xml
+++ b/pom.xml
@@ -139,16 +139,10 @@
</plugins>
</build>
<dependencies>
- <!-- https://mvnrepository.com/artifact/gnu.getopt/java-getopt -->
<dependency>
<groupId>info.picocli</groupId>
<artifactId>picocli</artifactId>
<version>4.2.0</version>
</dependency>
- <dependency>
- <groupId>com.vdurmont</groupId>
- <artifactId>etaprinter</artifactId>
- <version>2.0.0</version>
- </dependency>
</dependencies>
</project>
\ No newline at end of file
diff --git a/src/main/java/org/ids_mannheim/Progressbar.java b/src/main/java/org/ids_mannheim/Progressbar.java
new file mode 100644
index 0000000..9bddc6a
--- /dev/null
+++ b/src/main/java/org/ids_mannheim/Progressbar.java
@@ -0,0 +1,99 @@
+package org.ids_mannheim;
+
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+
+public class Progressbar {
+ private final long max;
+ private long current;
+ private final long start;
+ private long lastUpdate;
+ private static final int width=40;
+// private static final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+ private static final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("ccc dd. LLL HH:mm:ss");
+
+ public Progressbar(long max, String name) {
+ this.start = System.currentTimeMillis();
+ this.max = max;
+ System.err.println(name + ":");
+ this.printBar(false);
+ }
+
+ public void setVal(long i) {
+ this.current = i;
+ if ((System.currentTimeMillis() - this.lastUpdate) > 100) {
+ this.lastUpdate = System.currentTimeMillis();
+ this.printBar(false);
+ }
+ }
+
+ public void update(long i) {
+ this.current += i;
+ if ((System.currentTimeMillis() - this.lastUpdate) > 100) {
+ this.lastUpdate = System.currentTimeMillis();
+ this.printBar(false);
+ }
+ }
+
+ public void finish() {
+ this.current = this.max;
+ this.printBar(true);
+ }
+
+ public String shortDateTimeFromMillis(long millis) {
+ LocalDateTime date = LocalDateTime.ofInstant(Instant.ofEpochMilli(millis), ZoneId.systemDefault());
+ return date.format(formatter);
+ }
+
+ public String hmsFromMillis(long millis) {
+ int seconds = (int) (millis / 1000) % 60;
+ int minutes = (int) (millis / 1000) % 3600 / 60;
+ int hours = (int) (millis / 1000) / 3600;
+ return String.format("%02d:%02d:%02d", hours, minutes, seconds);
+ }
+
+ private void printBar(boolean finished) {
+ double numbar = Math.floor(width * (double) current / (double) max);
+ StringBuilder strbar = new StringBuilder();
+ String percent = String.format("%3d%%", (int)(100 * (double) current / (double) max));
+ int i;
+ for (i = 0; i < numbar-1; i++) {
+ strbar.append("=");
+ }
+ for (i = (int) numbar-1; i < numbar; i++) {
+ if (i < width-1) {
+ strbar.append(">");
+ } else {
+ strbar.append("=");
+ }
+ }
+ for (i = (int) numbar; i < width; i++) {
+ strbar.append(" ");
+ }
+ long elapsed = (System.currentTimeMillis() - this.start);
+ String strend = hmsFromMillis(elapsed);
+
+ String strETA;
+ if (elapsed < 2000) {
+ strETA = "--:--:--";
+ } else {
+ long timeETA = elapsed * (long) ((double) max / (double) current);
+ strETA = hmsFromMillis(timeETA) + " -> " + shortDateTimeFromMillis(start + elapsed + timeETA);
+ }
+
+ String speed = String.format("@%.1fMB/s", 1000.0 * current / (1024*1024) / elapsed);
+ if (finished) {
+ strend = "Finished: " + speed + " " + strend + " = " + shortDateTimeFromMillis(System.currentTimeMillis());
+ } else {
+// strend = "Elapsed: " + strend + " ETA: " + strETA + "\u001B[0K";
+ strend = speed + " ETA: " + strETA + "\u001B[0K";
+ }
+ System.err.print("\r" + percent+ " [" + strbar + "] " + strend + "\u001B[0K");
+ if (finished) {
+ System.err.print("\n");
+ }
+ System.err.flush();
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/org/ids_mannheim/TotalNGram.java b/src/main/java/org/ids_mannheim/TotalNGram.java
index a56580f..949d7ab 100644
--- a/src/main/java/org/ids_mannheim/TotalNGram.java
+++ b/src/main/java/org/ids_mannheim/TotalNGram.java
@@ -9,7 +9,6 @@
import java.util.stream.Collectors;
import java.util.stream.IntStream;
-import com.vdurmont.etaprinter.ETAPrinter;
import picocli.CommandLine;
@CommandLine.Command(mixinStandardHelpOptions = true,
@@ -17,7 +16,7 @@
public class TotalNGram implements Callable<Integer> {
private static final int MAX_THREADS = Runtime.getRuntime().availableProcessors() * 2 / 3;
- private ETAPrinter etaPrinter;
+ private Progressbar etaPrinter;
@CommandLine.Option(names = {"-L",
"--log-file"}, defaultValue = "sum.log", description = "log file name (default: ${DEFAULT-VALUE})")
@@ -56,7 +55,7 @@
FoldedEntry.setFolds(FOLDS);
ConcurrentHashMap<String, FoldedEntry> map = new ConcurrentHashMap<>();
long totalFilesSizes = inputFiles.parallelStream().mapToLong(fname -> new File(fname).length()).sum();
- etaPrinter = ETAPrinter.init("sum", totalFilesSizes, System.err, false);
+ etaPrinter = new Progressbar(totalFilesSizes, "MB");
BlockingQueue<Integer> queue = new LinkedBlockingQueue<>(inputFiles.size());
ExecutorService es = Executors.newCachedThreadPool();
int threads = Math.min(max_threads, inputFiles.size());
@@ -71,9 +70,12 @@
});
es.shutdown();
boolean finished = es.awaitTermination(120, TimeUnit.HOURS);
+ etaPrinter.finish();
+ System.err.println("Sorting and writing frequency table.");
map.entrySet().stream()
.sorted(Collections.reverseOrder(new ValueThenKeyComparator<>()))
.forEach(entry -> output_stream.println(entry.getKey() + entry.getValue().toString()));
+ System.err.println("Calculating column sums.");
IntStream.rangeClosed(1, FOLDS)
.forEach(i -> output_stream.print("\t" + map.values()
.parallelStream().mapToLong(e -> e.count.get(i)).sum()));
diff --git a/src/main/java/org/ids_mannheim/Worker.java b/src/main/java/org/ids_mannheim/Worker.java
index 5bf951b..8ce5288 100644
--- a/src/main/java/org/ids_mannheim/Worker.java
+++ b/src/main/java/org/ids_mannheim/Worker.java
@@ -1,7 +1,5 @@
package org.ids_mannheim;
-import com.vdurmont.etaprinter.ETAPrinter;
-
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
@@ -18,7 +16,7 @@
private final BlockingQueue<Integer> queue;
private final ConcurrentHashMap<String, FoldedEntry> map;
private final int folds;
- private final ETAPrinter etaPrinter;
+ private final Progressbar etaPrinter;
private final int ngram_size;
public Worker(BlockingQueue<Integer> queue, ArrayList<String> fnames, int ngram_size, int folds, ConcurrentHashMap<String, FoldedEntry> map, ETAPrinter etaPrinter) {