totalngrams: log warnings and retry on errors
diff --git a/src/main/java/org/ids_mannheim/TotalNGram.java b/src/main/java/org/ids_mannheim/TotalNGram.java
index 752307c..2d19a1f 100644
--- a/src/main/java/org/ids_mannheim/TotalNGram.java
+++ b/src/main/java/org/ids_mannheim/TotalNGram.java
@@ -7,22 +7,36 @@
 import java.nio.file.AccessDeniedException;
 import java.nio.file.FileAlreadyExistsException;
 import java.nio.file.Files;
-import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Locale;
 import java.util.concurrent.*;
+import java.util.logging.FileHandler;
+import java.util.logging.Logger;
+import java.util.logging.SimpleFormatter;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
+
 @CommandLine.Command(mixinStandardHelpOptions = true,
         name = "totalngram", description = "add ngram counts from KorAP-XML or CoNLL-U files")
 
 public class TotalNGram implements Callable<Integer> {
     private static final int MAX_THREADS = Runtime.getRuntime().availableProcessors() * 2 / 3;
-    private Progressbar etaPrinter;
+    static private final Logger logger;
 
+    static {
+        String path = TotalNGram.class.getClassLoader()
+                .getResource("logging.properties")
+                .getFile();
+        System.setProperty("java.util.logging.config.file", path);
+        logger = Logger.getLogger(TotalNGram.class.getSimpleName());
+    }
+
+    @CommandLine.Parameters(arity = "1..*", description = "input files")
+    private final ArrayList<String> inputFiles = new ArrayList<>();
     @CommandLine.Option(names = {"-L",
-            "--log-file"}, defaultValue = "sum.log", description = "log file name (default: ${DEFAULT-VALUE})")
+            "--log-file"}, defaultValue = "totalngram.log", description = "log file name (default: ${DEFAULT-VALUE})")
     String logFileName;
 
     @SuppressWarnings("CanBeFinal")
@@ -53,15 +67,27 @@
     @CommandLine.Option(names = {"-f",
             "--folds"}, description = "number of folds (default: ${DEFAULT-VALUE})")
     int FOLDS = 10;
-
-    @CommandLine.Parameters(arity = "1..*", description = "input files")
-    private final ArrayList<String> inputFiles = new ArrayList<>();
+    private Progressbar etaPrinter;
 
     public TotalNGram() {
     }
 
+    public static void main(String[] args) {
+        System.exit(new CommandLine(new TotalNGram()).execute(args));
+    }
+
     @Override
     public Integer call() throws Exception {
+        try {
+            FileHandler fileHandler = new FileHandler(logFileName);
+            fileHandler.setFormatter(new SimpleFormatter());
+            //java.util.logging.SimpleFormatter.format="%1$tb %1$td, %1$tY %1$tl:%1$tM:%1$tS %1$Tp %2$s%n%4$s: %5$s%n";
+            java.util.Locale.setDefault(Locale.Category.FORMAT, Locale.ROOT);
+            logger.addHandler(fileHandler);
+
+        } catch (Exception e) {
+        }
+
         PrintStream output_stream;
         if ((output_fillename == null) || output_fillename.equals("-")) {
             output_stream = System.out;
@@ -92,7 +118,7 @@
             max_threads = workerNodePool.size;
         }
         int threads = Math.min(max_threads, inputFiles.size());
-        IntStream.range(0, threads).forEach(unused -> es.execute(new Worker(queue, inputFiles, ngram_size, FOLDS, map, workerNodePool, etaPrinter)));
+        IntStream.range(0, threads).forEach(unused -> es.execute(new Worker(queue, inputFiles, ngram_size, FOLDS, map, workerNodePool, etaPrinter, logger)));
         queue.addAll(IntStream.range(0, inputFiles.size()).boxed().collect(Collectors.toList()));
         IntStream.range(0, threads).forEach(unused -> {
             try {
@@ -115,8 +141,4 @@
         output_stream.println("\t" + map.values().parallelStream().mapToLong(e -> e.count.get(0)).sum());
         return null;
     }
-
-    public static void main(String[] args) {
-        System.exit(new CommandLine(new TotalNGram()).execute(args));
-    }
 }
diff --git a/src/main/java/org/ids_mannheim/Worker.java b/src/main/java/org/ids_mannheim/Worker.java
index a82b0c7..0a62f68 100644
--- a/src/main/java/org/ids_mannheim/Worker.java
+++ b/src/main/java/org/ids_mannheim/Worker.java
@@ -7,23 +7,28 @@
 import java.util.ArrayList;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ThreadLocalRandom;
+import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import static java.lang.Thread.sleep;
+
 public class Worker implements Runnable {
     private static final Pattern new_text_pattern = Pattern.compile("^#\\s+text_id\\s*=\\s*(.+)");
+    private static final int MAX_RETRIES = 10;
     private final ArrayList<String> fnames;
     private final BlockingQueue<Integer> queue;
     private final ConcurrentHashMap<String, FoldedEntry> map;
     private final int folds;
     private final Progressbar etaPrinter;
     private final int ngram_size;
-    private WorkerNodePool pool;
+    private final Logger logger;
+    private final WorkerNodePool pool;
+
     public Worker(BlockingQueue<Integer> queue, ArrayList<String> fnames, int ngram_size, int folds,
                   ConcurrentHashMap<String, FoldedEntry> map,
                   WorkerNodePool pool,
-                  Progressbar etaPrinter) {
+                  Progressbar etaPrinter, Logger logger) {
         this.queue = queue;
         this.fnames = fnames;
         this.map = map;
@@ -31,17 +36,20 @@
         this.folds = folds;
         this.pool = pool;
         this.etaPrinter = etaPrinter;
+        this.logger = logger;
     }
 
     @Override
     public void run() {
         try {
             int index = queue.take();
+            int retries = MAX_RETRIES;
             while (index >= 0) {
                 String fname = fnames.get(index);
-                long file_size = new File(fname).length();
+                File current_file = new File(fname);
+                long file_size = current_file.length();
                 int poolIndex = index % pool.size;
-                System.err.println(String.format("%4d/%4d %-10s %-10s", index, fnames.size(), pool.getHost(poolIndex), fname));
+                logger.info(String.format("%5d/%5d %-10s %-10s", index, fnames.size(), pool.getHost(poolIndex), current_file.getName()));
                 String[] cmd = {
                         "/bin/sh",
                         "-c",
@@ -52,26 +60,40 @@
                 String line;
                 int fold = -1;
                 SlidingWindowQueue slidingWindowQueue = null;
+                int texts=0;
                 while ((line = in.readLine()) != null) {
                     if (line.startsWith("#")) {
                         Matcher matcher = new_text_pattern.matcher(line);
                         if (matcher.find()) {
-                            fold = Math.abs(matcher.group(1).hashCode()) % folds +1;
+                            fold = Math.abs(matcher.group(1).hashCode()) % folds + 1;
                         }
                         int finalFold = fold;
-                        slidingWindowQueue = new SlidingWindowQueue(ngram_size, s -> {
-                            FoldedEntry.incr(map, s, finalFold);
-                        });
+                        slidingWindowQueue = new SlidingWindowQueue(ngram_size, s -> FoldedEntry.incr(map, s, finalFold));
+                        texts++;
                         continue;
                     }
                     String[] strings = line.split("\\s+");
                     if (strings.length < 4) {
                         continue;
                     }
+                    //noinspection ConstantConditions
                     slidingWindowQueue.add(strings[1]);
                 }
-                etaPrinter.update(file_size);
-                index = queue.take();
+                if (texts > 0) {
+                    logger.info(pool.getHost(poolIndex)+" finished " + fname + " with "+texts+ " texts");
+                    etaPrinter.update(file_size);
+                    retries = MAX_RETRIES;
+                    index = queue.take();
+                } else {
+                    if (--retries > 0) {
+                        logger.warning("Retrying " + fname);
+                        sleep(1000);
+                    } else {
+                        logger.severe ("Giving up " + fname);
+                        index = queue.take();
+                        sleep(1000);
+                    }
+                }
             }
         } catch (InterruptedException | IOException e) {
             Thread.currentThread().interrupt();
diff --git a/src/main/resources/logging.properties b/src/main/resources/logging.properties
new file mode 100644
index 0000000..c793dcc
--- /dev/null
+++ b/src/main/resources/logging.properties
@@ -0,0 +1,9 @@
+handlers= java.util.logging.FileHandler
+.level= INFO
+java.util.logging.FileHandler.level = INFO
+java.util.logging.FileHandler.pattern = totalngrams.log
+java.util.logging.FileHandler.limit = 50000
+java.util.logging.FileHandler.count = 1
+java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter
+# java.util.logging.SimpleFormatter.format=%4$s: %5$s [%1$tc]%n
+java.util.logging.SimpleFormatter.format="%1$tb %1$td, %1$tY %1$tH:%1$tM:%1$tS %1$Tp %2$s%n%4$s: %5$s%n"