Adding loading namedVC from gz files.
Change-Id: Iffa9587d50d71dfd3d5b57a54292d9bfcd3c33e6
diff --git a/Changes b/Changes
index 4ec8f1c..3949571 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.58.0 2018-08-15
+0.58.0 2018-08-21
- [feature] Implemented referencing cached collection (margaretha)
- [feature] Implemented deserialization of collection with array values
and cache option (margaretha)
@@ -12,6 +12,7 @@
- [feature] Implemented custom namedVC path (margaretha)
- [bugfix] Fix wrong behaviour of negative constraints in and-Groups
of VCs (#42; diewald)
+ - [feature] Adding loading namedVC from gz (margaretha)
0.57 2018-04-05
- [feature] Support text queries in metadata
diff --git a/pom.xml b/pom.xml
index 4514523..d73ed2c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -213,11 +213,11 @@
<artifactId>commons-cli</artifactId>
<version>1.3.1</version>
</dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-io</artifactId>
- <version>1.3.2</version>
- </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>2.6</version>
+ </dependency>
</dependencies>
<build>
diff --git a/src/main/java/de/ids_mannheim/korap/KrillCollection.java b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
index 6e384a1..ddea3f0 100644
--- a/src/main/java/de/ids_mannheim/korap/KrillCollection.java
+++ b/src/main/java/de/ids_mannheim/korap/KrillCollection.java
@@ -8,8 +8,10 @@
import java.util.List;
import java.util.Map;
import java.util.Properties;
+import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@@ -400,27 +402,46 @@
private String loadVCFile (String ref) {
Properties prop = KrillProperties.loadDefaultProperties();
- if (prop == null){
+ if (prop == null) {
this.addError(StatusCodes.MISSING_KRILL_PROPERTIES,
"krill.properties is not found.");
return null;
}
-
+
String namedVCPath = prop.getProperty("krill.namedVC");
- if (!namedVCPath.endsWith("/")){
+ if (!namedVCPath.endsWith("/")) {
namedVCPath += "/";
}
- File file = new File(namedVCPath+ref+".jsonld");
-
+ File file;
String json = null;
- try {
- FileInputStream fis = new FileInputStream(file);
- json = IOUtils.toString(fis);
+ if ((file= new File(namedVCPath + ref + ".jsonld")).exists()) {
+ try (FileInputStream fis = new FileInputStream(file)) {
+ json = IOUtils.toString(fis,"utf-8");
+ }
+ catch (IOException e) {
+ this.addError(StatusCodes.READING_COLLECTION_FAILED,
+ e.getMessage());
+ }
}
- catch (IOException e) {
+ // slower than plain text, but save space
+ else if ((file = new File(namedVCPath + ref + ".jsonld.gz")).exists()){
+ try (GZIPInputStream gzipInputStream =
+ new GZIPInputStream(new FileInputStream(file));
+ ByteArrayOutputStream bos =
+ new ByteArrayOutputStream(512);) {
+ bos.write(gzipInputStream);
+ json = bos.toString("utf-8");
+ }
+ catch (IOException e) {
+ this.addError(StatusCodes.READING_COLLECTION_FAILED,
+ e.getMessage());
+ }
+ }
+ else{
this.addError(StatusCodes.MISSING_COLLECTION,
"Collection is not found.");
}
+
return json;
}
diff --git a/src/main/java/de/ids_mannheim/korap/util/StatusCodes.java b/src/main/java/de/ids_mannheim/korap/util/StatusCodes.java
index 0560af1..4663363 100644
--- a/src/main/java/de/ids_mannheim/korap/util/StatusCodes.java
+++ b/src/main/java/de/ids_mannheim/korap/util/StatusCodes.java
@@ -61,6 +61,7 @@
// 800 - 899 - Virtual Collection Messages
public static final int MISSING_COLLECTION = 800;
+ public static final int READING_COLLECTION_FAILED = 801;
public static final int UNSUPPORTED_MATCH_TYPE = 802;
public static final int UNKNOWN_VALUE_TYPE = 804;
public static final int INVALID_VALUE = 805;
diff --git a/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java b/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java
index 632a194..a333f70 100644
--- a/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java
+++ b/src/test/java/de/ids_mannheim/korap/collection/TestVCCaching.java
@@ -45,7 +45,7 @@
InputStream is = getClass().getClassLoader()
.getResourceAsStream("collection/unknown-vc-ref.jsonld");
- String json = IOUtils.toString(is);
+ String json = IOUtils.toString(is,"utf-8");
KrillCollection kc = new KrillCollection(json);
List<Message> messages = kc.getErrors().getMessages();
@@ -80,7 +80,7 @@
private void testManualAddToCache (String filename, String vcName) throws IOException {
InputStream is = getClass().getClassLoader()
.getResourceAsStream(filename);
- String json = IOUtils.toString(is);
+ String json = IOUtils.toString(is,"utf-8");
is.close();
KrillCollection kc = new KrillCollection(json);
@@ -91,7 +91,7 @@
private void testSearchCachedVC () throws IOException {
InputStream is = getClass().getClassLoader()
.getResourceAsStream("collection/query-with-vc-ref.jsonld");
- String json = IOUtils.toString(is);
+ String json = IOUtils.toString(is, "utf-8");
String result = new Krill(json).apply(this.index).toJsonString();
System.out.println(json);