totalngrams: support gzipped output
diff --git a/pom.xml b/pom.xml
index 94999ca..42a6461 100644
--- a/pom.xml
+++ b/pom.xml
@@ -150,5 +150,11 @@
<artifactId>picocli</artifactId>
<version>4.2.0</version>
</dependency>
+ <!-- https://mvnrepository.com/artifact/org.anarres/parallelgzip -->
+ <dependency>
+ <groupId>org.anarres</groupId>
+ <artifactId>parallelgzip</artifactId>
+ <version>1.0.3</version>
+ </dependency>
</dependencies>
</project>
\ No newline at end of file
diff --git a/src/main/java/org/ids_mannheim/TotalNGram.java b/src/main/java/org/ids_mannheim/TotalNGram.java
index 44bd6e1..6e0add0 100644
--- a/src/main/java/org/ids_mannheim/TotalNGram.java
+++ b/src/main/java/org/ids_mannheim/TotalNGram.java
@@ -1,8 +1,10 @@
package org.ids_mannheim;
+import org.anarres.parallelgzip.ParallelGZIPOutputStream;
import picocli.CommandLine;
import java.io.File;
+import java.io.FileOutputStream;
import java.io.PrintStream;
import java.nio.file.AccessDeniedException;
import java.nio.file.FileAlreadyExistsException;
@@ -89,6 +91,7 @@
}
PrintStream output_stream;
+ final PrintStream output_stream;
if ((output_fillename == null) || output_fillename.equals("-")) {
output_stream = System.out;
} else {
@@ -104,7 +107,12 @@
System.exit(-1);
}
}
- output_stream = new PrintStream(f);
+ if (output_fillename.endsWith(".gz")) {
+ output_stream = new PrintStream(new ParallelGZIPOutputStream(new FileOutputStream(f)));
+ } else {
+ output_stream = new PrintStream(f);
+ }
+
}
FoldedEntry.setFolds(FOLDS);