Fixed the argument handler in the Indexer.

Change-Id: Iafd413a0b581f1cfb150144aa359e568e647a9e5
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 06713b2..e4a6594 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -556,7 +556,7 @@
 
         // Fail to add json object
         catch (IOException e) {
-            log.error("File {} not found", json);
+            log.error("File {} not found", json, e);
         };
         return (FieldDocument) null;
     };
diff --git a/src/main/java/de/ids_mannheim/korap/index/Indexer.java b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
index b5dfd20..96054cc 100644
--- a/src/main/java/de/ids_mannheim/korap/index/Indexer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
@@ -1,7 +1,19 @@
 package de.ids_mannheim.korap.index;
 
 import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import java.io.*;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.MissingOptionException;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang.StringUtils;
 import org.apache.lucene.store.MMapDirectory;
 import de.ids_mannheim.korap.KrillIndex;
 import static de.ids_mannheim.korap.util.KrillProperties.*;
@@ -18,6 +30,9 @@
  * 
  * Usage: java -jar Krill-Indexer.jar [--config propfile]
  * [directories]*
+ * 
+ * @author diewald, margaretha
+ *
  */
 public class Indexer {
     KrillIndex index;
@@ -26,9 +41,10 @@
 
     // private static String propFile = "krill.properties";
     private static String path = null;
+    private static Pattern jsonFilePattern;
 
     // Init logger
-    private final static Logger log = LoggerFactory.getLogger(KrillIndex.class);
+    private final static Logger log = LoggerFactory.getLogger(Indexer.class);
 
 
     /**
@@ -41,9 +57,9 @@
     public Indexer (Properties prop) throws IOException {
         if (this.path == null) {
             this.path = prop.getProperty("krill.indexDir");
-        };
+        }
 
-        System.out.println("Index to " + this.path);
+        log.info("Output directory: " + this.path);
 
         // Default to 1000 documents till the next commit
         String commitCount = prop.getProperty("krill.index.commit.count",
@@ -53,7 +69,9 @@
         this.index = new KrillIndex(new MMapDirectory(Paths.get(this.path)));
         this.count = 0;
         this.commitCount = Integer.parseInt(commitCount);
-    };
+
+        jsonFilePattern = Pattern.compile(".*\\.json\\.gz$");
+    }
 
 
     /**
@@ -64,47 +82,51 @@
      *            documents to index.
      */
     public void parse (File dir) {
+        Matcher matcher;
         for (String file : dir.list()) {
-            if (file.matches("^[^\\.].+?\\.json\\.gz$")) {
-                String found = dir.getPath() + '/' + file;
-                System.out.print("  Index " + found + " ... ");
+            //log.info("Json file: "+file);
+            matcher = jsonFilePattern.matcher(file);
+            if (matcher.find()) {
+                file = dir.getPath() + '/' + file;
+                log.info("Adding " + file + " to the index. ");
 
                 // Add file to the index
                 try {
-                    if (this.index.addDoc(new FileInputStream(found),
+                    if (this.index.addDoc(new FileInputStream(file),
                             true) == null) {
-                        System.out.println("fail.");
+                        log.warn("fail.");
                         continue;
-                    };
-                    System.out.println("done (" + count + ").");
+                    }
                     this.count++;
+                    log.debug("Finished adding files. (" + count + ").");
 
                     // Commit in case the commit count is reached
                     if ((this.count % this.commitCount) == 0)
                         this.commit();
                 }
                 catch (FileNotFoundException e) {
-                    System.out.println("not found!");
-                };
-            };
-        };
-    };
+                    log.error("File " + file + " is not found!");
+                }
+            }
+            else {
+                log.warn(file + " does not have json.gz format.");
+            }
+        }
+    }
 
 
     /**
      * Commit changes to the index.
      */
     public void commit () {
-        System.out.println("-----");
-        System.out.print("  Commit ... ");
+        log.info("Committing index ... ");
         try {
             this.index.commit();
         }
         catch (IOException e) {
-            System.err.println("Unable to commit to index " + this.path);
-        };
-        System.out.println("done.");
-    };
+            log.error("Unable to commit to index " + this.path);
+        }
+    }
 
 
     /**
@@ -118,75 +140,71 @@
      */
     public static void main (String[] argv) throws IOException {
 
-        if (argv.length == 0) {
-            String jar = new File(Indexer.class.getProtectionDomain()
-                    .getCodeSource().getLocation().getPath()).getName();
+        Options options = new Options();
+        options.addOption(Option.builder("c").longOpt("config")
+                .desc("configuration file (defaults to "
+                        + de.ids_mannheim.korap.util.KrillProperties.propStr
+                        + ").")
+                .hasArg().argName("properties file").required().build());
+        options.addOption(Option.builder("i").longOpt("inputDir")
+                .desc("input directories separated by semicolons. The input files "
+                        + "have to be in <filename>.json.gz format. ")
+                .hasArgs().argName("input directories").required()
+                .valueSeparator(new Character(';')).build());
+        options.addOption(Option.builder("o").longOpt("outputDir")
+                .desc("index output directory (defaults to "
+                        + "krill.indexDir in the configuration.")
+                .hasArg().argName("output directory").build());
 
-            System.out.println(
-                    "Add documents from a directory to the Krill index.");
-            System.out.println("Usage: java -jar " + jar
-                    + " [--config propfile] [directories]*");
-            System.out.println();
-            System.err.println("  --config|-c    Configuration file");
-            System.err.println("                 (defaults to "
-                    + de.ids_mannheim.korap.util.KrillProperties.propStr + ")");
-            System.err.println("  --indexDir|-d  Index directory");
-            System.err.println("                 (defaults to krill.indexDir"
-                    + " in configuration)");
-            System.err.println();
-            return;
-        };
+        CommandLineParser parser = new DefaultParser();
 
-        int i = 0;
-        boolean last = false;
         String propFile = null;
+        String[] inputDirectories = null;
+        try {
+            CommandLine cmd = parser.parse(options, argv);
 
-        for (i = 0; i < argv.length; i += 2) {
-            switch (argv[i]) {
-                case "--config":
-                case "-cfg":
-                case "-c":
-                    propFile = argv[i + 1];
-                    break;
-                case "--indexDir":
-                case "-d":
-                    path = argv[i + 1];
-                    break;
-                default:
-                    last = true;
-                    break;
-            };
+            log.info("Configuration file: " + cmd.getOptionValue("c"));
+            propFile = cmd.getOptionValue("c");
+            log.info("Input directories: "
+                    + StringUtils.join(cmd.getOptionValues("i"), ";"));
+            inputDirectories = cmd.getOptionValues("i");
 
-            if (last)
-                break;
-        };
+            if (cmd.hasOption("o")) {
+                log.info("Output directory: " + cmd.getOptionValue("o"));
+                path = cmd.getOptionValue("o");
+            }
+        }
+        catch (MissingOptionException e) {
+            HelpFormatter formatter = new HelpFormatter();
+            formatter.printHelp(
+                    "Krill indexer\n java -jar -c <properties file> -i <input directories> "
+                            + "[-o <output directory>]",
+                    options);
+            System.exit(0);
+        }
+        catch (ParseException e) {
+            log.error("Unexpected error: " + e);
+            e.printStackTrace();
+        }
 
         // Load properties
-        /*
-          InputStream fr = new FileInputStream(argv[0]);
-          prop.load(fr);
-        */
         Properties prop = loadProperties(propFile);
 
         // Get indexer object
         Indexer ki = new Indexer(prop);
 
-        // Empty line
-        System.out.println();
-
         // Iterate over list of directories
-        for (String arg : Arrays.copyOfRange(argv, i, argv.length)) {
+        for (String arg : inputDirectories) {
+            log.info("Indexing files in"+arg);
             File f = new File(arg);
             if (f.isDirectory())
                 ki.parse(f);
-        };
+        }
 
         // Final commit
         ki.commit();
-
+        log.info("Finished indexing.");
         // Finish indexing
-        System.out.println("-----");
-        System.out.println("  Indexed " + ki.count + " files.");
-        System.out.println();
-    };
-};
+        System.out.println("Indexed " + ki.count + " files.");
+    }
+}
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index b46d82c..1554002 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-log4j.rootLogger = ERROR, stdout
+log4j.rootLogger = DEBUG, stdout
 
 # Queries:
 # log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -42,6 +42,7 @@
 # Server
 # log4j.category.org.glassfish.jersey = TRACE, stdout
 
+log4j.appender.stdout.Target=System.out
 log4j.appender.stdout = org.apache.log4j.ConsoleAppender
 log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
 log4j.appender.stdout.layout.ConversionPattern = %5p (%F:%L) -> %m%n