blob: eaa462e2e053da698cf08a54e54754f86bc788dd [file] [log] [blame]
margaretha1b320452018-08-02 16:56:25 +02001package de.ids_mannheim.korap.config;
2
3import java.io.File;
margaretha6ad08b42018-08-22 18:33:54 +02004import java.io.FileInputStream;
margaretha1b320452018-08-02 16:56:25 +02005import java.io.IOException;
margaretha9e53bb22018-09-14 19:39:15 +02006import java.io.InputStream;
margaretha6ad08b42018-08-22 18:33:54 +02007import java.util.zip.GZIPInputStream;
margaretha1b320452018-08-02 16:56:25 +02008
margaretha0b903912019-01-08 17:41:39 +01009import javax.management.RuntimeErrorException;
10
margaretha1b320452018-08-02 16:56:25 +020011import org.apache.commons.io.FileUtils;
margaretha9e53bb22018-09-14 19:39:15 +020012import org.apache.commons.io.IOUtils;
margaretha6ad08b42018-08-22 18:33:54 +020013import org.apache.commons.io.output.ByteArrayOutputStream;
margaretha1b320452018-08-02 16:56:25 +020014import org.apache.logging.log4j.LogManager;
15import org.apache.logging.log4j.Logger;
16import org.springframework.beans.factory.annotation.Autowired;
17import org.springframework.stereotype.Component;
18
19import de.ids_mannheim.korap.KrillCollection;
margaretha563aabe2018-09-13 20:39:45 +020020import de.ids_mannheim.korap.constant.VirtualCorpusType;
margarethae72355a2018-11-28 16:53:09 +010021import de.ids_mannheim.korap.entity.VirtualCorpus;
margaretha563aabe2018-09-13 20:39:45 +020022import de.ids_mannheim.korap.exceptions.KustvaktException;
margaretha563aabe2018-09-13 20:39:45 +020023import de.ids_mannheim.korap.service.VirtualCorpusService;
margarethac7196d22018-08-27 14:20:03 +020024import de.ids_mannheim.korap.util.QueryException;
margaretha1b320452018-08-02 16:56:25 +020025import de.ids_mannheim.korap.web.SearchKrill;
26
margaretha398f4722019-01-09 19:07:20 +010027/** Loads predefined virtual corpora at server start up and cache them.
28 *
29 * @author margaretha
30 *
31 */
margaretha1b320452018-08-02 16:56:25 +020032@Component
margaretha0b903912019-01-08 17:41:39 +010033public class NamedVCLoader implements Runnable{
margaretha1b320452018-08-02 16:56:25 +020034 @Autowired
35 private FullConfiguration config;
36 @Autowired
37 private SearchKrill searchKrill;
margaretha563aabe2018-09-13 20:39:45 +020038 @Autowired
margaretha563aabe2018-09-13 20:39:45 +020039 private VirtualCorpusService vcService;
margaretha9e53bb22018-09-14 19:39:15 +020040
margarethadda4ef72018-12-06 14:20:51 +010041 public static Logger jlog = LogManager.getLogger(NamedVCLoader.class);
42 public static boolean DEBUG = false;
margaretha1b320452018-08-02 16:56:25 +020043
margaretha0b903912019-01-08 17:41:39 +010044 @Override
45 public void run () {
46 try {
47 loadVCToCache();
48 }
49 catch (IOException | QueryException | KustvaktException e) {
50// e.printStackTrace();
51 throw new RuntimeErrorException(new Error(e.getMessage(), e.getCause()));
52 }
53 }
54
margaretha9e53bb22018-09-14 19:39:15 +020055 public void loadVCToCache (String filename, String filePath)
56 throws IOException, QueryException, KustvaktException {
57
58 InputStream is = NamedVCLoader.class.getResourceAsStream(filePath);
59 String json = IOUtils.toString(is, "utf-8");
60 if (json != null) {
61 cacheVC(json, filename);
margarethaf7abb362018-09-18 20:09:37 +020062 vcService.storeVC(filename, VirtualCorpusType.SYSTEM, json, null,
63 null, null, true, "system");
margaretha9e53bb22018-09-14 19:39:15 +020064 }
65 }
66
67 public void loadVCToCache ()
68 throws IOException, QueryException, KustvaktException {
margaretha1b320452018-08-02 16:56:25 +020069
70 String dir = config.getNamedVCPath();
margaretha51e5e3f2018-10-17 15:10:03 +020071 if (dir.isEmpty()) return;
margarethae72355a2018-11-28 16:53:09 +010072
margaretha1b320452018-08-02 16:56:25 +020073 File d = new File(dir);
74 if (!d.isDirectory()) {
75 throw new IOException("Directory " + dir + " is not valid");
76 }
77
78 for (File file : d.listFiles()) {
79 if (!file.exists()) {
80 throw new IOException("File " + file + " is not found.");
81 }
margaretha1b320452018-08-02 16:56:25 +020082
margaretha6ad08b42018-08-22 18:33:54 +020083 String filename = file.getName();
margaretha339fd2e2018-11-13 12:14:53 +010084 String[] strArr = readFile(file, filename);
85 filename = strArr[0];
86 String json = strArr[1];
margaretha9e53bb22018-09-14 19:39:15 +020087 if (json != null) {
88 cacheVC(json, filename);
margarethae72355a2018-11-28 16:53:09 +010089 try {
90 VirtualCorpus vc = vcService.searchVCByName("system",
91 filename, "system");
92 if (vc != null) {
margarethadda4ef72018-12-06 14:20:51 +010093 if (DEBUG) {
94 jlog.debug("Delete existing vc: " + filename);
95 }
margarethae72355a2018-11-28 16:53:09 +010096 vcService.deleteVC("system", vc.getId());
97 }
98 }
99 catch (KustvaktException e) {
100 // ignore
margaretha47a72a82019-07-03 16:00:54 +0200101 if (DEBUG) jlog.debug(e);
margarethae72355a2018-11-28 16:53:09 +0100102 }
103 vcService.storeVC(filename, VirtualCorpusType.SYSTEM, json,
104 null, null, null, true, "system");
margaretha6ad08b42018-08-22 18:33:54 +0200105 }
margaretha1b320452018-08-02 16:56:25 +0200106 }
107 }
margaretha9e53bb22018-09-14 19:39:15 +0200108
margarethae72355a2018-11-28 16:53:09 +0100109 private String[] readFile (File file, String filename)
110 throws IOException, KustvaktException {
margaretha9e53bb22018-09-14 19:39:15 +0200111 String json = null;
112 long start = System.currentTimeMillis();
113 if (filename.endsWith(".jsonld")) {
114 filename = filename.substring(0, filename.length() - 7);
115 json = FileUtils.readFileToString(file, "utf-8");
116 }
117 else if (filename.endsWith(".jsonld.gz")) {
118 filename = filename.substring(0, filename.length() - 10);
119 GZIPInputStream gzipInputStream =
120 new GZIPInputStream(new FileInputStream(file));
121 ByteArrayOutputStream bos = new ByteArrayOutputStream(512);
122 bos.write(gzipInputStream);
123 json = bos.toString("utf-8");
124 bos.close();
125 }
126 else {
127 System.err.println("File " + filename
128 + " is not allowed. Filename must ends with .jsonld or .jsonld.gz");
129 }
130 long end = System.currentTimeMillis();
margarethadda4ef72018-12-06 14:20:51 +0100131 if (DEBUG) {
132 jlog.debug("READ " + filename + " duration: " + (end - start));
133 }
margarethae72355a2018-11-28 16:53:09 +0100134
135 return new String[] { filename, json };
margaretha9e53bb22018-09-14 19:39:15 +0200136 }
137
138 private void cacheVC (String json, String filename)
139 throws IOException, QueryException {
margaretha52ee9e32019-12-11 16:36:14 +0100140 config.setVcInCaching(filename);
margaretha563aabe2018-09-13 20:39:45 +0200141 long start, end;
142 start = System.currentTimeMillis();
margaretha9e53bb22018-09-14 19:39:15 +0200143
margaretha563aabe2018-09-13 20:39:45 +0200144 KrillCollection collection = new KrillCollection(json);
145 collection.setIndex(searchKrill.getIndex());
margarethae72355a2018-11-28 16:53:09 +0100146
margaretha52ee9e32019-12-11 16:36:14 +0100147 jlog.info("Storing {} in cache ", filename);
margaretha563aabe2018-09-13 20:39:45 +0200148 if (collection != null) {
149 collection.storeInCache(filename);
150 }
151 end = System.currentTimeMillis();
margaretha0b903912019-01-08 17:41:39 +0100152 jlog.info("{} caching duration: {}", filename, (end - start));
margarethadda4ef72018-12-06 14:20:51 +0100153 if (DEBUG) {
154 jlog.debug("memory cache: "
155 + KrillCollection.cache.calculateInMemorySize());
156 }
margaretha52ee9e32019-12-11 16:36:14 +0100157 config.setVcInCaching("");
margaretha563aabe2018-09-13 20:39:45 +0200158 }
margaretha1b320452018-08-02 16:56:25 +0200159}