blob: 410a58a60787004899b8b7c99b09f51193c563dd [file] [log] [blame]
package de.ids_mannheim.korap.config;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.zip.GZIPInputStream;
import javax.management.RuntimeErrorException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import de.ids_mannheim.korap.cache.VirtualCorpusCache;
import de.ids_mannheim.korap.constant.QueryType;
import de.ids_mannheim.korap.constant.ResourceType;
import de.ids_mannheim.korap.exceptions.KustvaktException;
import de.ids_mannheim.korap.exceptions.StatusCodes;
import de.ids_mannheim.korap.service.QueryService;
import de.ids_mannheim.korap.util.QueryException;
import de.ids_mannheim.korap.web.SearchKrill;
/**
* <p>Loads predefined virtual corpora at server start up and caches
* them, if the VC have not been cached before. If there are changes
* in the index, the cache will be updated.
* </p>
*
* <p>
* All predefined VC are set as SYSTEM VC. The filenames are used as
* VC names. Acceptable file extensions are .jsonld.gz or .jsonld. The
* VC should be located at the folder indicated by <em>krill.namedVC</em>
* specified in kustvakt.conf.
* </p>
*
* @author margaretha
*
*/
@Component
public class NamedVCLoader implements Runnable{
@Autowired
private FullConfiguration config;
@Autowired
private SearchKrill searchKrill;
@Autowired
private QueryService vcService;
public static Logger jlog = LogManager.getLogger(NamedVCLoader.class);
public static boolean DEBUG = false;
@Override
public void run () {
try {
loadVCToCache();
}
catch (IOException | QueryException e) {
// e.printStackTrace();
throw new RuntimeErrorException(new Error(e.getMessage(), e.getCause()));
}
}
/** Used for testing
*
* @param filename
* @param filePath
* @throws IOException
* @throws QueryException
* @throws KustvaktException
*/
public void loadVCToCache (String filename, String filePath)
throws IOException, QueryException, KustvaktException {
InputStream is = NamedVCLoader.class.getResourceAsStream(filePath);
String json = IOUtils.toString(is, "utf-8");
if (json != null) {
cacheVC(filename,json);
vcService.storeQuery("system",filename, ResourceType.SYSTEM,
QueryType.VIRTUAL_CORPUS, json, null, null, null, true,
"system", null, null);
}
}
public void loadVCToCache ()
throws IOException, QueryException {
String dir = config.getNamedVCPath();
if (dir.isEmpty()) return;
File d = new File(dir);
if (!d.isDirectory()) {
throw new IOException("Directory " + dir + " is not valid");
}
jlog.info(Arrays.toString(d.list()));
for (File file : d.listFiles()) {
if (!file.exists()) {
throw new IOException("File " + file + " is not found.");
}
String filename = file.getName();
String[] strArr = readFile(file, filename);
filename = strArr[0];
String json = strArr[1];
if (json != null) {
cacheVC(filename,json);
storeVCinDB(filename, json);
}
}
}
private String[] readFile (File file, String filename)
throws IOException {
String json = null;
long start = System.currentTimeMillis();
if (filename.endsWith(".jsonld")) {
filename = filename.substring(0, filename.length() - 7);
json = FileUtils.readFileToString(file, "utf-8");
}
else if (filename.endsWith(".jsonld.gz")) {
filename = filename.substring(0, filename.length() - 10);
GZIPInputStream gzipInputStream =
new GZIPInputStream(new FileInputStream(file));
ByteArrayOutputStream bos = new ByteArrayOutputStream(512);
bos.write(gzipInputStream);
json = bos.toString("utf-8");
bos.close();
}
else {
System.err.println("File " + filename
+ " is not allowed. Filename must ends with .jsonld or .jsonld.gz");
}
long end = System.currentTimeMillis();
if (DEBUG) {
jlog.debug("READ " + filename + " duration: " + (end - start));
}
return new String[] { filename, json };
}
/**
* Caches the given VC if the VC is not found in cache and updates
* the VC if it exists and there are changes in the index.
*
* @param vcId
* vc-name
* @param koralQuery
* @throws IOException
* @throws QueryException
*/
private void cacheVC (String vcId, String koralQuery)
throws IOException, QueryException {
config.setVcInCaching(vcId);
if (VirtualCorpusCache.contains(vcId)) {
jlog.info("Checking {} in cache ", vcId);
}
else {
jlog.info("Storing {} in cache ", vcId);
}
long start, end;
start = System.currentTimeMillis();
VirtualCorpusCache.store(vcId, searchKrill.getIndex());
end = System.currentTimeMillis();
jlog.info("Duration : {}", (end - start));
config.setVcInCaching("");
}
/** Stores the VC if it doesn't exist in the database.
*
* @param vcId
* @param koralQuery
*/
private void storeVCinDB (String vcId, String koralQuery) {
try {
vcService.searchQueryByName("system", vcId, "system",
QueryType.VIRTUAL_CORPUS);
}
catch (KustvaktException e) {
if (e.getStatusCode() == StatusCodes.NO_RESOURCE_FOUND) {
try {
jlog.info("Storing {} in database ", vcId);
vcService.storeQuery("system", vcId, ResourceType.SYSTEM,
QueryType.VIRTUAL_CORPUS, koralQuery, null, null,
null, true, "system", null, null);
}
catch (KustvaktException e1) {
throw new RuntimeException(e1);
}
}
else {
throw new RuntimeException(e);
}
}
}
}