blob: 21216a8be0b31196ab1a885e490d68b59ea16949 [file] [log] [blame]
margaretha1b320452018-08-02 16:56:25 +02001package de.ids_mannheim.korap.config;
2
3import java.io.File;
margaretha6ad08b42018-08-22 18:33:54 +02004import java.io.FileInputStream;
margaretha1b320452018-08-02 16:56:25 +02005import java.io.IOException;
margaretha9e53bb22018-09-14 19:39:15 +02006import java.io.InputStream;
margaretha6ad08b42018-08-22 18:33:54 +02007import java.util.zip.GZIPInputStream;
margaretha1b320452018-08-02 16:56:25 +02008
9import org.apache.commons.io.FileUtils;
margaretha9e53bb22018-09-14 19:39:15 +020010import org.apache.commons.io.IOUtils;
margaretha6ad08b42018-08-22 18:33:54 +020011import org.apache.commons.io.output.ByteArrayOutputStream;
margaretha1b320452018-08-02 16:56:25 +020012import org.apache.logging.log4j.LogManager;
13import org.apache.logging.log4j.Logger;
14import org.springframework.beans.factory.annotation.Autowired;
15import org.springframework.stereotype.Component;
16
17import de.ids_mannheim.korap.KrillCollection;
margaretha563aabe2018-09-13 20:39:45 +020018import de.ids_mannheim.korap.constant.VirtualCorpusType;
margaretha563aabe2018-09-13 20:39:45 +020019import de.ids_mannheim.korap.exceptions.KustvaktException;
margaretha563aabe2018-09-13 20:39:45 +020020import de.ids_mannheim.korap.service.VirtualCorpusService;
margarethac7196d22018-08-27 14:20:03 +020021import de.ids_mannheim.korap.util.QueryException;
margaretha1b320452018-08-02 16:56:25 +020022import de.ids_mannheim.korap.web.SearchKrill;
23
24@Component
25public class NamedVCLoader {
26 @Autowired
27 private FullConfiguration config;
28 @Autowired
29 private SearchKrill searchKrill;
margaretha563aabe2018-09-13 20:39:45 +020030 @Autowired
margaretha563aabe2018-09-13 20:39:45 +020031 private VirtualCorpusService vcService;
margaretha9e53bb22018-09-14 19:39:15 +020032
margaretha1b320452018-08-02 16:56:25 +020033 private static Logger jlog = LogManager.getLogger(NamedVCLoader.class);
34
margaretha9e53bb22018-09-14 19:39:15 +020035 public void loadVCToCache (String filename, String filePath)
36 throws IOException, QueryException, KustvaktException {
37
38 InputStream is = NamedVCLoader.class.getResourceAsStream(filePath);
39 String json = IOUtils.toString(is, "utf-8");
40 if (json != null) {
41 cacheVC(json, filename);
margarethaf7abb362018-09-18 20:09:37 +020042 vcService.storeVC(filename, VirtualCorpusType.SYSTEM, json, null,
43 null, null, true, "system");
margaretha9e53bb22018-09-14 19:39:15 +020044 }
45 }
46
47 public void loadVCToCache ()
48 throws IOException, QueryException, KustvaktException {
margaretha1b320452018-08-02 16:56:25 +020049
50 String dir = config.getNamedVCPath();
51 File d = new File(dir);
52 if (!d.isDirectory()) {
53 throw new IOException("Directory " + dir + " is not valid");
54 }
55
56 for (File file : d.listFiles()) {
57 if (!file.exists()) {
58 throw new IOException("File " + file + " is not found.");
59 }
margaretha1b320452018-08-02 16:56:25 +020060
margaretha6ad08b42018-08-22 18:33:54 +020061 String filename = file.getName();
margaretha9e53bb22018-09-14 19:39:15 +020062 String json = readFile(file, filename);
63 if (json != null) {
64 cacheVC(json, filename);
margarethaf7abb362018-09-18 20:09:37 +020065 vcService.storeVC(filename, VirtualCorpusType.SYSTEM, json, null,
66 null, null, true, "system");
margaretha6ad08b42018-08-22 18:33:54 +020067 }
margaretha1b320452018-08-02 16:56:25 +020068 }
69 }
margaretha9e53bb22018-09-14 19:39:15 +020070
71 private String readFile (File file, String filename) throws IOException {
72 String json = null;
73 long start = System.currentTimeMillis();
74 if (filename.endsWith(".jsonld")) {
75 filename = filename.substring(0, filename.length() - 7);
76 json = FileUtils.readFileToString(file, "utf-8");
77 }
78 else if (filename.endsWith(".jsonld.gz")) {
79 filename = filename.substring(0, filename.length() - 10);
80 GZIPInputStream gzipInputStream =
81 new GZIPInputStream(new FileInputStream(file));
82 ByteArrayOutputStream bos = new ByteArrayOutputStream(512);
83 bos.write(gzipInputStream);
84 json = bos.toString("utf-8");
85 bos.close();
86 }
87 else {
88 System.err.println("File " + filename
89 + " is not allowed. Filename must ends with .jsonld or .jsonld.gz");
90 }
91 long end = System.currentTimeMillis();
92 jlog.debug("READ " + filename + " duration: " + (end - start));
93 return json;
94 }
95
96 private void cacheVC (String json, String filename)
97 throws IOException, QueryException {
margaretha563aabe2018-09-13 20:39:45 +020098 long start, end;
99 start = System.currentTimeMillis();
margaretha9e53bb22018-09-14 19:39:15 +0200100
margaretha563aabe2018-09-13 20:39:45 +0200101 KrillCollection collection = new KrillCollection(json);
102 collection.setIndex(searchKrill.getIndex());
103
104 if (collection != null) {
105 collection.storeInCache(filename);
106 }
107 end = System.currentTimeMillis();
108 jlog.info(filename + " caching duration: " + (end - start));
109 jlog.debug("memory cache: "
110 + KrillCollection.cache.calculateInMemorySize());
111 }
112
margaretha1b320452018-08-02 16:56:25 +0200113}