| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 1 | package de.ids_mannheim.korap.config; |
| 2 | |
| 3 | import java.io.File; |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 4 | import java.io.FileInputStream; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 5 | import java.io.IOException; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 6 | import java.io.InputStream; |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 7 | import java.util.zip.GZIPInputStream; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 8 | |
| margaretha | 0b90391 | 2019-01-08 17:41:39 +0100 | [diff] [blame] | 9 | import javax.management.RuntimeErrorException; |
| 10 | |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 11 | import org.apache.commons.io.FileUtils; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 12 | import org.apache.commons.io.IOUtils; |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 13 | import org.apache.commons.io.output.ByteArrayOutputStream; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 14 | import org.apache.logging.log4j.LogManager; |
| 15 | import org.apache.logging.log4j.Logger; |
| 16 | import org.springframework.beans.factory.annotation.Autowired; |
| 17 | import org.springframework.stereotype.Component; |
| 18 | |
| 19 | import de.ids_mannheim.korap.KrillCollection; |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 20 | import de.ids_mannheim.korap.constant.VirtualCorpusType; |
| margaretha | e72355a | 2018-11-28 16:53:09 +0100 | [diff] [blame] | 21 | import de.ids_mannheim.korap.entity.VirtualCorpus; |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 22 | import de.ids_mannheim.korap.exceptions.KustvaktException; |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 23 | import de.ids_mannheim.korap.service.VirtualCorpusService; |
| margaretha | c7196d2 | 2018-08-27 14:20:03 +0200 | [diff] [blame] | 24 | import de.ids_mannheim.korap.util.QueryException; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 25 | import de.ids_mannheim.korap.web.SearchKrill; |
| 26 | |
| margaretha | 398f472 | 2019-01-09 19:07:20 +0100 | [diff] [blame] | 27 | /** Loads predefined virtual corpora at server start up and cache them. |
| 28 | * |
| 29 | * @author margaretha |
| 30 | * |
| 31 | */ |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 32 | @Component |
| margaretha | 0b90391 | 2019-01-08 17:41:39 +0100 | [diff] [blame] | 33 | public class NamedVCLoader implements Runnable{ |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 34 | @Autowired |
| 35 | private FullConfiguration config; |
| 36 | @Autowired |
| 37 | private SearchKrill searchKrill; |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 38 | @Autowired |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 39 | private VirtualCorpusService vcService; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 40 | |
| margaretha | dda4ef7 | 2018-12-06 14:20:51 +0100 | [diff] [blame] | 41 | public static Logger jlog = LogManager.getLogger(NamedVCLoader.class); |
| 42 | public static boolean DEBUG = false; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 43 | |
| margaretha | 0b90391 | 2019-01-08 17:41:39 +0100 | [diff] [blame] | 44 | @Override |
| 45 | public void run () { |
| 46 | try { |
| 47 | loadVCToCache(); |
| 48 | } |
| 49 | catch (IOException | QueryException | KustvaktException e) { |
| 50 | // e.printStackTrace(); |
| 51 | throw new RuntimeErrorException(new Error(e.getMessage(), e.getCause())); |
| 52 | } |
| 53 | } |
| 54 | |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 55 | public void loadVCToCache (String filename, String filePath) |
| 56 | throws IOException, QueryException, KustvaktException { |
| 57 | |
| 58 | InputStream is = NamedVCLoader.class.getResourceAsStream(filePath); |
| 59 | String json = IOUtils.toString(is, "utf-8"); |
| 60 | if (json != null) { |
| 61 | cacheVC(json, filename); |
| margaretha | f7abb36 | 2018-09-18 20:09:37 +0200 | [diff] [blame] | 62 | vcService.storeVC(filename, VirtualCorpusType.SYSTEM, json, null, |
| 63 | null, null, true, "system"); |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 64 | } |
| 65 | } |
| 66 | |
| 67 | public void loadVCToCache () |
| 68 | throws IOException, QueryException, KustvaktException { |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 69 | |
| 70 | String dir = config.getNamedVCPath(); |
| margaretha | 51e5e3f | 2018-10-17 15:10:03 +0200 | [diff] [blame] | 71 | if (dir.isEmpty()) return; |
| margaretha | e72355a | 2018-11-28 16:53:09 +0100 | [diff] [blame] | 72 | |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 73 | File d = new File(dir); |
| 74 | if (!d.isDirectory()) { |
| 75 | throw new IOException("Directory " + dir + " is not valid"); |
| 76 | } |
| 77 | |
| 78 | for (File file : d.listFiles()) { |
| 79 | if (!file.exists()) { |
| 80 | throw new IOException("File " + file + " is not found."); |
| 81 | } |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 82 | |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 83 | String filename = file.getName(); |
| margaretha | 339fd2e | 2018-11-13 12:14:53 +0100 | [diff] [blame] | 84 | String[] strArr = readFile(file, filename); |
| 85 | filename = strArr[0]; |
| 86 | String json = strArr[1]; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 87 | if (json != null) { |
| 88 | cacheVC(json, filename); |
| margaretha | e72355a | 2018-11-28 16:53:09 +0100 | [diff] [blame] | 89 | try { |
| 90 | VirtualCorpus vc = vcService.searchVCByName("system", |
| 91 | filename, "system"); |
| 92 | if (vc != null) { |
| margaretha | dda4ef7 | 2018-12-06 14:20:51 +0100 | [diff] [blame] | 93 | if (DEBUG) { |
| 94 | jlog.debug("Delete existing vc: " + filename); |
| 95 | } |
| margaretha | e72355a | 2018-11-28 16:53:09 +0100 | [diff] [blame] | 96 | vcService.deleteVC("system", vc.getId()); |
| 97 | } |
| 98 | } |
| 99 | catch (KustvaktException e) { |
| 100 | // ignore |
| margaretha | 47a72a8 | 2019-07-03 16:00:54 +0200 | [diff] [blame] | 101 | if (DEBUG) jlog.debug(e); |
| margaretha | e72355a | 2018-11-28 16:53:09 +0100 | [diff] [blame] | 102 | } |
| 103 | vcService.storeVC(filename, VirtualCorpusType.SYSTEM, json, |
| 104 | null, null, null, true, "system"); |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 105 | } |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 106 | } |
| 107 | } |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 108 | |
| margaretha | e72355a | 2018-11-28 16:53:09 +0100 | [diff] [blame] | 109 | private String[] readFile (File file, String filename) |
| 110 | throws IOException, KustvaktException { |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 111 | String json = null; |
| 112 | long start = System.currentTimeMillis(); |
| 113 | if (filename.endsWith(".jsonld")) { |
| 114 | filename = filename.substring(0, filename.length() - 7); |
| 115 | json = FileUtils.readFileToString(file, "utf-8"); |
| 116 | } |
| 117 | else if (filename.endsWith(".jsonld.gz")) { |
| 118 | filename = filename.substring(0, filename.length() - 10); |
| 119 | GZIPInputStream gzipInputStream = |
| 120 | new GZIPInputStream(new FileInputStream(file)); |
| 121 | ByteArrayOutputStream bos = new ByteArrayOutputStream(512); |
| 122 | bos.write(gzipInputStream); |
| 123 | json = bos.toString("utf-8"); |
| 124 | bos.close(); |
| 125 | } |
| 126 | else { |
| 127 | System.err.println("File " + filename |
| 128 | + " is not allowed. Filename must ends with .jsonld or .jsonld.gz"); |
| 129 | } |
| 130 | long end = System.currentTimeMillis(); |
| margaretha | dda4ef7 | 2018-12-06 14:20:51 +0100 | [diff] [blame] | 131 | if (DEBUG) { |
| 132 | jlog.debug("READ " + filename + " duration: " + (end - start)); |
| 133 | } |
| margaretha | e72355a | 2018-11-28 16:53:09 +0100 | [diff] [blame] | 134 | |
| 135 | return new String[] { filename, json }; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 136 | } |
| 137 | |
| 138 | private void cacheVC (String json, String filename) |
| 139 | throws IOException, QueryException { |
| margaretha | 52ee9e3 | 2019-12-11 16:36:14 +0100 | [diff] [blame] | 140 | config.setVcInCaching(filename); |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 141 | long start, end; |
| 142 | start = System.currentTimeMillis(); |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 143 | |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 144 | KrillCollection collection = new KrillCollection(json); |
| 145 | collection.setIndex(searchKrill.getIndex()); |
| margaretha | e72355a | 2018-11-28 16:53:09 +0100 | [diff] [blame] | 146 | |
| margaretha | 52ee9e3 | 2019-12-11 16:36:14 +0100 | [diff] [blame] | 147 | jlog.info("Storing {} in cache ", filename); |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 148 | if (collection != null) { |
| 149 | collection.storeInCache(filename); |
| 150 | } |
| 151 | end = System.currentTimeMillis(); |
| margaretha | 0b90391 | 2019-01-08 17:41:39 +0100 | [diff] [blame] | 152 | jlog.info("{} caching duration: {}", filename, (end - start)); |
| margaretha | dda4ef7 | 2018-12-06 14:20:51 +0100 | [diff] [blame] | 153 | if (DEBUG) { |
| 154 | jlog.debug("memory cache: " |
| 155 | + KrillCollection.cache.calculateInMemorySize()); |
| 156 | } |
| margaretha | 52ee9e3 | 2019-12-11 16:36:14 +0100 | [diff] [blame] | 157 | config.setVcInCaching(""); |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 158 | } |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 159 | } |