Merge "Fixed the argument handler in the Indexer."
diff --git a/pom.xml b/pom.xml
index 31efb78..7035a00 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,341 +1,344 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
- <!--
- ** Server
- # Start the server with
- $ mvn compile exec:java
-
- # Or after packaging
- $ mvn clean package
- # with
- $ java -jar target/Krill-Server.jar
-
-
- ** Formatter
- # Format the code with
- $ mvn java-formatter:format
-
- ** Indexer after packaging (see above)
- $ java -jar target/Krill-Indexer.jar
- -cfg src/main/resources/krill.properties
- /data/hdd/lucene-new/WPD/
- -->
-
- <groupId>de.ids_mannheim.korap</groupId>
- <artifactId>Krill</artifactId>
- <version>0.55.7</version>
- <packaging>jar</packaging>
-
- <name>Krill</name>
- <url>http://www.ids-mannheim.de/</url>
-
- <organization>
- <name>IDS Mannheim</name>
- <url>http://www.ids-mannheim.de/</url>
- </organization>
-
- <developers>
- <developer>
- <name>Nils Diewald</name>
- <email>diewald@ids-mannheim.de</email>
- <url>http://nils-diewald.de</url>
- </developer>
- <developer>
- <name>Eliza Margaretha</name>
- <email>margaretha@ids-mannheim.de</email>
- </developer>
- </developers>
-
- <properties>
- <jersey.version>2.15</jersey.version>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- </properties>
-
- <dependencyManagement>
- <dependencies>
- <dependency>
- <groupId>org.glassfish.jersey</groupId>
- <artifactId>jersey-bom</artifactId>
- <version>${jersey.version}</version>
- <type>pom</type>
- <scope>import</scope>
- </dependency>
- </dependencies>
- </dependencyManagement>
-
- <dependencies>
- <!-- junit dependency -->
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>4.11</version>
- <scope>test</scope>
- </dependency>
-
- <!-- log4j dependency -->
- <dependency>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- <version>1.2.17</version>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <version>1.7.5</version>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>jul-to-slf4j</artifactId>
- <version>1.7.5</version>
- </dependency>
-
- <!-- SQLite for database connection tests -->
- <dependency>
- <groupId>org.xerial</groupId>
- <artifactId>sqlite-jdbc</artifactId>
- <version>3.7.2</version>
- </dependency>
-
- <!-- Database Connection Pool Manager -->
- <dependency>
- <groupId>c3p0</groupId>
- <artifactId>c3p0</artifactId>
- <version>0.9.1.2</version>
- </dependency>
+ <!-- ** Server
+
+ # Start the server with
+
+ $ mvn compile exec:java
+
+ # Or after packaging
+
+ $ mvn clean package
- <!-- Lucene core dependency -->
- <dependency>
- <artifactId>lucene-core</artifactId>
- <groupId>org.apache.lucene</groupId>
- <type>jar</type>
- <version>5.0.0</version>
- </dependency>
+ # with
+
+ $ java -jar target/Krill-Server.jar
+
+ ** Formatter
+
+ # Format the code with $ mvn java-formatter:format
+
+ ** Indexer
+ # after packaging (see above)
+
+ $ java -jar target/Krill-Indexer.jar -c [configuration file]
+ -i [input directories] -o [output directory]
+
+ for example:
+
+ $ java -jar target/Krill-Indexer.jar -c src/test/resources/krill.properties
+ -i src/test/resources/bzk -o index/
+
+ -->
- <!-- Lucene queryparser dependency -->
- <dependency>
- <artifactId>lucene-queryparser</artifactId>
- <groupId>org.apache.lucene</groupId>
- <type>jar</type>
- <version>5.0.0</version>
- </dependency>
+ <groupId>de.ids_mannheim.korap</groupId>
+ <artifactId>Krill</artifactId>
+ <version>0.55.7</version>
+ <packaging>jar</packaging>
- <!-- Lucene analyzers dependency -->
- <dependency>
- <artifactId>lucene-analyzers-common</artifactId>
- <groupId>org.apache.lucene</groupId>
- <type>jar</type>
- <version>5.0.0</version>
- </dependency>
+ <name>Krill</name>
+ <url>http://www.ids-mannheim.de/</url>
- <dependency>
- <groupId>org.hamcrest</groupId>
- <artifactId>hamcrest-core</artifactId>
- <version>1.3</version>
- </dependency>
+ <organization>
+ <name>IDS Mannheim</name>
+ <url>http://www.ids-mannheim.de/</url>
+ </organization>
- <!-- JCache -->
- <dependency>
- <groupId>net.sf.jsr107cache</groupId>
- <artifactId>jsr107cache</artifactId>
- <version>1.0</version>
- </dependency>
+ <developers>
+ <developer>
+ <name>Nils Diewald</name>
+ <email>diewald@ids-mannheim.de</email>
+ <url>http://nils-diewald.de</url>
+ </developer>
+ <developer>
+ <name>Eliza Margaretha</name>
+ <email>margaretha@ids-mannheim.de</email>
+ </developer>
+ </developers>
- <!-- Jersey -->
- <dependency>
- <groupId>org.glassfish.jersey.containers</groupId>
- <artifactId>jersey-container-grizzly2-http</artifactId>
- </dependency>
+ <properties>
+ <jersey.version>2.15</jersey.version>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
- <!-- JSON support in Jersey -->
- <dependency>
- <groupId>com.fasterxml.jackson.jaxrs</groupId>
- <artifactId>jackson-jaxrs-json-provider</artifactId>
- <version>2.4.4</version>
- </dependency>
+ <dependencyManagement>
+ <dependencies>
+ <dependency>
+ <groupId>org.glassfish.jersey</groupId>
+ <artifactId>jersey-bom</artifactId>
+ <version>${jersey.version}</version>
+ <type>pom</type>
+ <scope>import</scope>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
- <!-- JSON support using Jackson -->
- <!-- see https://github.com/FasterXML/jackson-core -->
- <!-- https://github.com/FasterXML/jackson-databind -->
- <dependency>
- <groupId>com.fasterxml.jackson.core</groupId>
- <artifactId>jackson-databind</artifactId>
- <version>2.4.4</version>
- </dependency>
- <dependency>
- <groupId>com.fasterxml.jackson.core</groupId>
- <artifactId>jackson-annotations</artifactId>
- <version>2.4.4</version>
- </dependency>
- <dependency>
- <groupId>com.fasterxml.jackson.core</groupId>
- <artifactId>jackson-core</artifactId>
- <version>2.4.4</version>
- </dependency>
+ <dependencies>
+ <!-- junit dependency -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.11</version>
+ <scope>test</scope>
+ </dependency>
- <!-- JSON-LD support -->
- <!--
- <dependency>
- <groupId>com.github.jsonld-java</groupId>
- <artifactId>jsonld-java</artifactId>
- <version>0.5.2-SNAPSHOT</version>
- </dependency>
+ <!-- log4j dependency -->
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <version>1.2.17</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <version>1.7.5</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jul-to-slf4j</artifactId>
+ <version>1.7.5</version>
+ </dependency>
- Temporarily disable @Experimental annotation
- <dependency>
- <groupId>KorapAnnotationProcessor</groupId>
- <artifactId>KorapAnnotationProcessor</artifactId>
- <version>0.0.1-SNAPSHOT</version>
- <scope>compile</scope>
- </dependency>
- -->
+ <!-- SQLite for database connection tests -->
+ <dependency>
+ <groupId>org.xerial</groupId>
+ <artifactId>sqlite-jdbc</artifactId>
+ <version>3.7.2</version>
+ </dependency>
- <!-- Some language extensions like StringUtil -->
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- <version>2.3</version>
- </dependency>
- </dependencies>
+ <!-- Database Connection Pool Manager -->
+ <dependency>
+ <groupId>c3p0</groupId>
+ <artifactId>c3p0</artifactId>
+ <version>0.9.1.2</version>
+ </dependency>
- <build>
- <sourceDirectory>${basedir}/src/main/java</sourceDirectory>
- <outputDirectory>${basedir}/bin</outputDirectory>
- <plugins>
- <plugin>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>2.5.1</version>
- <inherited>true</inherited>
- <configuration>
- <showWarnings>true</showWarnings>
- <source>1.7</source>
- <target>1.7</target>
- </configuration>
- </plugin>
+ <!-- Lucene core dependency -->
+ <dependency>
+ <artifactId>lucene-core</artifactId>
+ <groupId>org.apache.lucene</groupId>
+ <type>jar</type>
+ <version>5.0.0</version>
+ </dependency>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-shade-plugin</artifactId>
- <version>2.4.1</version>
- <executions>
- <execution>
- <id>indexer</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <transformers>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
- <mainClass>de.ids_mannheim.korap.index.Indexer</mainClass>
- </transformer>
- </transformers>
- <finalName>${project.artifactId}-Indexer</finalName>
- </configuration>
- </execution>
- <execution>
- <id>server</id>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <transformers>
- <transformer
- implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
- <mainClass>de.ids_mannheim.korap.server.Node</mainClass>
- </transformer>
- <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
- <resource>krill.properties</resource>
- </transformer>
- </transformers>
- <finalName>${project.name}-Server</finalName>
- </configuration>
- </execution>
- </executions>
- </plugin>
+ <!-- Lucene queryparser dependency -->
+ <dependency>
+ <artifactId>lucene-queryparser</artifactId>
+ <groupId>org.apache.lucene</groupId>
+ <type>jar</type>
+ <version>5.0.0</version>
+ </dependency>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>2.16</version>
- <configuration>
- <excludes>
- <exclude>**/benchmark/*</exclude>
- <exclude>**/TestWPDIndex.java</exclude>
- <exclude>**/TestRealIndex.java</exclude>
- </excludes>
- </configuration>
- </plugin>
+ <!-- Lucene analyzers dependency -->
+ <dependency>
+ <artifactId>lucene-analyzers-common</artifactId>
+ <groupId>org.apache.lucene</groupId>
+ <type>jar</type>
+ <version>5.0.0</version>
+ </dependency>
- <!--
- Formatter plugin for Eclipse based coding conventions
- http://maven-java-formatter-plugin.googlecode.com/svn/site/0.4/usage.html
- -->
- <plugin>
- <groupId>com.googlecode.maven-java-formatter-plugin</groupId>
- <artifactId>maven-java-formatter-plugin</artifactId>
- <version>0.4</version>
- <configuration>
- <configFile>${project.basedir}/Format.xml</configFile>
- <overrideConfigCompilerVersion>true</overrideConfigCompilerVersion>
- <compilerSource>1.7</compilerSource>
- <compilerCompliance>1.7</compilerCompliance>
- <compilerTargetPlatform>1.7</compilerTargetPlatform>
- </configuration>
- </plugin>
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>hamcrest-core</artifactId>
+ <version>1.3</version>
+ </dependency>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>exec-maven-plugin</artifactId>
- <version>1.2.1</version>
- <executions>
- <execution>
- <goals>
- <goal>java</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <mainClass>de.ids_mannheim.korap.server.Node</mainClass>
- </configuration>
- </plugin>
- </plugins>
+ <!-- JCache -->
+ <dependency>
+ <groupId>net.sf.jsr107cache</groupId>
+ <artifactId>jsr107cache</artifactId>
+ <version>1.0</version>
+ </dependency>
- <resources>
- <resource>
- <directory>src/main/resources</directory>
- <filtering>true</filtering>
- <includes>
- <include>krill.info</include>
- </includes>
- </resource>
- <resource>
- <directory>src/main/resources</directory>
- <filtering>false</filtering>
- <excludes>
- <exclude>krill.info</exclude>
- </excludes>
- </resource>
- </resources>
+ <!-- Jersey -->
+ <dependency>
+ <groupId>org.glassfish.jersey.containers</groupId>
+ <artifactId>jersey-container-grizzly2-http</artifactId>
+ </dependency>
- <testResources>
- <testResource>
- <directory>src/test/resources</directory>
- <filtering>true</filtering>
- <includes>
- <include>krill.properties</include>
- </includes>
- </testResource>
- <testResource>
- <directory>src/test/resources</directory>
- <filtering>false</filtering>
- <excludes>
- <exclude>krill.properties</exclude>
- </excludes>
- </testResource>
- </testResources>
- </build>
+ <!-- JSON support in Jersey -->
+ <dependency>
+ <groupId>com.fasterxml.jackson.jaxrs</groupId>
+ <artifactId>jackson-jaxrs-json-provider</artifactId>
+ <version>2.4.4</version>
+ </dependency>
+
+ <!-- JSON support using Jackson -->
+ <!-- see https://github.com/FasterXML/jackson-core -->
+ <!-- https://github.com/FasterXML/jackson-databind -->
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ <version>2.4.4</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ <version>2.4.4</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ <version>2.4.4</version>
+ </dependency>
+
+ <!-- JSON-LD support -->
+ <!-- <dependency> <groupId>com.github.jsonld-java</groupId> <artifactId>jsonld-java</artifactId>
+ <version>0.5.2-SNAPSHOT</version> </dependency> Temporarily disable @Experimental
+ annotation <dependency> <groupId>KorapAnnotationProcessor</groupId> <artifactId>KorapAnnotationProcessor</artifactId>
+ <version>0.0.1-SNAPSHOT</version> <scope>compile</scope> </dependency> -->
+
+ <!-- Some language extensions like StringUtil -->
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ <version>2.3</version>
+ </dependency>
+
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>1.3.1</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <sourceDirectory>${basedir}/src/main/java</sourceDirectory>
+ <outputDirectory>${basedir}/bin</outputDirectory>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.5.1</version>
+ <inherited>true</inherited>
+ <configuration>
+ <showWarnings>true</showWarnings>
+ <source>1.7</source>
+ <target>1.7</target>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>2.4.1</version>
+ <executions>
+ <execution>
+ <id>indexer</id>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <transformers>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <mainClass>de.ids_mannheim.korap.index.Indexer</mainClass>
+ </transformer>
+ </transformers>
+ <finalName>${project.artifactId}-Indexer</finalName>
+ </configuration>
+ </execution>
+ <execution>
+ <id>server</id>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <transformers>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <mainClass>de.ids_mannheim.korap.server.Node</mainClass>
+ </transformer>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>krill.properties</resource>
+ </transformer>
+ </transformers>
+ <finalName>${project.name}-Server</finalName>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.16</version>
+ <configuration>
+ <excludes>
+ <exclude>**/benchmark/*</exclude>
+ <exclude>**/TestWPDIndex.java</exclude>
+ <exclude>**/TestRealIndex.java</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+
+ <!-- Formatter plugin for Eclipse based coding conventions http://maven-java-formatter-plugin.googlecode.com/svn/site/0.4/usage.html -->
+ <plugin>
+ <groupId>com.googlecode.maven-java-formatter-plugin</groupId>
+ <artifactId>maven-java-formatter-plugin</artifactId>
+ <version>0.4</version>
+ <configuration>
+ <configFile>${project.basedir}/Format.xml</configFile>
+ <overrideConfigCompilerVersion>true</overrideConfigCompilerVersion>
+ <compilerSource>1.7</compilerSource>
+ <compilerCompliance>1.7</compilerCompliance>
+ <compilerTargetPlatform>1.7</compilerTargetPlatform>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>1.2.1</version>
+ <executions>
+ <execution>
+ <goals>
+ <goal>java</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <mainClass>de.ids_mannheim.korap.server.Node</mainClass>
+ </configuration>
+ </plugin>
+ </plugins>
+
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ <filtering>true</filtering>
+ <includes>
+ <include>krill.info</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>src/main/resources</directory>
+ <filtering>false</filtering>
+ <excludes>
+ <exclude>krill.info</exclude>
+ </excludes>
+ </resource>
+ </resources>
+
+ <testResources>
+ <testResource>
+ <directory>src/test/resources</directory>
+ <filtering>true</filtering>
+ <includes>
+ <include>krill.properties</include>
+ </includes>
+ </testResource>
+ <testResource>
+ <directory>src/test/resources</directory>
+ <filtering>false</filtering>
+ <excludes>
+ <exclude>krill.properties</exclude>
+ </excludes>
+ </testResource>
+ </testResources>
+ </build>
</project>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
index 06713b2..e4a6594 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java
@@ -556,7 +556,7 @@
// Fail to add json object
catch (IOException e) {
- log.error("File {} not found", json);
+ log.error("File {} not found", json, e);
};
return (FieldDocument) null;
};
diff --git a/src/main/java/de/ids_mannheim/korap/index/Indexer.java b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
index b5dfd20..96054cc 100644
--- a/src/main/java/de/ids_mannheim/korap/index/Indexer.java
+++ b/src/main/java/de/ids_mannheim/korap/index/Indexer.java
@@ -1,7 +1,19 @@
package de.ids_mannheim.korap.index;
import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import java.io.*;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.MissingOptionException;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang.StringUtils;
import org.apache.lucene.store.MMapDirectory;
import de.ids_mannheim.korap.KrillIndex;
import static de.ids_mannheim.korap.util.KrillProperties.*;
@@ -18,6 +30,9 @@
*
* Usage: java -jar Krill-Indexer.jar [--config propfile]
* [directories]*
+ *
+ * @author diewald, margaretha
+ *
*/
public class Indexer {
KrillIndex index;
@@ -26,9 +41,10 @@
// private static String propFile = "krill.properties";
private static String path = null;
+ private static Pattern jsonFilePattern;
// Init logger
- private final static Logger log = LoggerFactory.getLogger(KrillIndex.class);
+ private final static Logger log = LoggerFactory.getLogger(Indexer.class);
/**
@@ -41,9 +57,9 @@
public Indexer (Properties prop) throws IOException {
if (this.path == null) {
this.path = prop.getProperty("krill.indexDir");
- };
+ }
- System.out.println("Index to " + this.path);
+ log.info("Output directory: " + this.path);
// Default to 1000 documents till the next commit
String commitCount = prop.getProperty("krill.index.commit.count",
@@ -53,7 +69,9 @@
this.index = new KrillIndex(new MMapDirectory(Paths.get(this.path)));
this.count = 0;
this.commitCount = Integer.parseInt(commitCount);
- };
+
+ jsonFilePattern = Pattern.compile(".*\\.json\\.gz$");
+ }
/**
@@ -64,47 +82,51 @@
* documents to index.
*/
public void parse (File dir) {
+ Matcher matcher;
for (String file : dir.list()) {
- if (file.matches("^[^\\.].+?\\.json\\.gz$")) {
- String found = dir.getPath() + '/' + file;
- System.out.print(" Index " + found + " ... ");
+ //log.info("Json file: "+file);
+ matcher = jsonFilePattern.matcher(file);
+ if (matcher.find()) {
+ file = dir.getPath() + '/' + file;
+ log.info("Adding " + file + " to the index. ");
// Add file to the index
try {
- if (this.index.addDoc(new FileInputStream(found),
+ if (this.index.addDoc(new FileInputStream(file),
true) == null) {
- System.out.println("fail.");
+ log.warn("fail.");
continue;
- };
- System.out.println("done (" + count + ").");
+ }
this.count++;
+ log.debug("Finished adding files. (" + count + ").");
// Commit in case the commit count is reached
if ((this.count % this.commitCount) == 0)
this.commit();
}
catch (FileNotFoundException e) {
- System.out.println("not found!");
- };
- };
- };
- };
+ log.error("File " + file + " is not found!");
+ }
+ }
+ else {
+ log.warn(file + " does not have json.gz format.");
+ }
+ }
+ }
/**
* Commit changes to the index.
*/
public void commit () {
- System.out.println("-----");
- System.out.print(" Commit ... ");
+ log.info("Committing index ... ");
try {
this.index.commit();
}
catch (IOException e) {
- System.err.println("Unable to commit to index " + this.path);
- };
- System.out.println("done.");
- };
+ log.error("Unable to commit to index " + this.path);
+ }
+ }
/**
@@ -118,75 +140,71 @@
*/
public static void main (String[] argv) throws IOException {
- if (argv.length == 0) {
- String jar = new File(Indexer.class.getProtectionDomain()
- .getCodeSource().getLocation().getPath()).getName();
+ Options options = new Options();
+ options.addOption(Option.builder("c").longOpt("config")
+ .desc("configuration file (defaults to "
+ + de.ids_mannheim.korap.util.KrillProperties.propStr
+ + ").")
+ .hasArg().argName("properties file").required().build());
+ options.addOption(Option.builder("i").longOpt("inputDir")
+ .desc("input directories separated by semicolons. The input files "
+ + "have to be in <filename>.json.gz format. ")
+ .hasArgs().argName("input directories").required()
+ .valueSeparator(new Character(';')).build());
+ options.addOption(Option.builder("o").longOpt("outputDir")
+ .desc("index output directory (defaults to "
+ + "krill.indexDir in the configuration.")
+ .hasArg().argName("output directory").build());
- System.out.println(
- "Add documents from a directory to the Krill index.");
- System.out.println("Usage: java -jar " + jar
- + " [--config propfile] [directories]*");
- System.out.println();
- System.err.println(" --config|-c Configuration file");
- System.err.println(" (defaults to "
- + de.ids_mannheim.korap.util.KrillProperties.propStr + ")");
- System.err.println(" --indexDir|-d Index directory");
- System.err.println(" (defaults to krill.indexDir"
- + " in configuration)");
- System.err.println();
- return;
- };
+ CommandLineParser parser = new DefaultParser();
- int i = 0;
- boolean last = false;
String propFile = null;
+ String[] inputDirectories = null;
+ try {
+ CommandLine cmd = parser.parse(options, argv);
- for (i = 0; i < argv.length; i += 2) {
- switch (argv[i]) {
- case "--config":
- case "-cfg":
- case "-c":
- propFile = argv[i + 1];
- break;
- case "--indexDir":
- case "-d":
- path = argv[i + 1];
- break;
- default:
- last = true;
- break;
- };
+ log.info("Configuration file: " + cmd.getOptionValue("c"));
+ propFile = cmd.getOptionValue("c");
+ log.info("Input directories: "
+ + StringUtils.join(cmd.getOptionValues("i"), ";"));
+ inputDirectories = cmd.getOptionValues("i");
- if (last)
- break;
- };
+ if (cmd.hasOption("o")) {
+ log.info("Output directory: " + cmd.getOptionValue("o"));
+ path = cmd.getOptionValue("o");
+ }
+ }
+ catch (MissingOptionException e) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp(
+ "Krill indexer\n java -jar -c <properties file> -i <input directories> "
+ + "[-o <output directory>]",
+ options);
+ System.exit(0);
+ }
+ catch (ParseException e) {
+ log.error("Unexpected error: " + e);
+ e.printStackTrace();
+ }
// Load properties
- /*
- InputStream fr = new FileInputStream(argv[0]);
- prop.load(fr);
- */
Properties prop = loadProperties(propFile);
// Get indexer object
Indexer ki = new Indexer(prop);
- // Empty line
- System.out.println();
-
// Iterate over list of directories
- for (String arg : Arrays.copyOfRange(argv, i, argv.length)) {
+ for (String arg : inputDirectories) {
+ log.info("Indexing files in"+arg);
File f = new File(arg);
if (f.isDirectory())
ki.parse(f);
- };
+ }
// Final commit
ki.commit();
-
+ log.info("Finished indexing.");
// Finish indexing
- System.out.println("-----");
- System.out.println(" Indexed " + ki.count + " files.");
- System.out.println();
- };
-};
+ System.out.println("Indexed " + ki.count + " files.");
+ }
+}
diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties
index b46d82c..1554002 100644
--- a/src/main/resources/log4j.properties
+++ b/src/main/resources/log4j.properties
@@ -1,4 +1,4 @@
-log4j.rootLogger = ERROR, stdout
+log4j.rootLogger = DEBUG, stdout
# Queries:
# log4j.logger.de.ids_mannheim.korap.query.SpanNextQuery = TRACE, stdout
@@ -42,6 +42,7 @@
# Server
# log4j.category.org.glassfish.jersey = TRACE, stdout
+log4j.appender.stdout.Target=System.out
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern = %5p (%F:%L) -> %m%n