| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 1 | package de.ids_mannheim.korap.config; |
| 2 | |
| 3 | import java.io.File; |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 4 | import java.io.FileInputStream; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 5 | import java.io.IOException; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 6 | import java.io.InputStream; |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 7 | import java.util.zip.GZIPInputStream; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 8 | |
| 9 | import org.apache.commons.io.FileUtils; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 10 | import org.apache.commons.io.IOUtils; |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 11 | import org.apache.commons.io.output.ByteArrayOutputStream; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 12 | import org.apache.logging.log4j.LogManager; |
| 13 | import org.apache.logging.log4j.Logger; |
| 14 | import org.springframework.beans.factory.annotation.Autowired; |
| 15 | import org.springframework.stereotype.Component; |
| 16 | |
| 17 | import de.ids_mannheim.korap.KrillCollection; |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 18 | import de.ids_mannheim.korap.constant.VirtualCorpusType; |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 19 | import de.ids_mannheim.korap.exceptions.KustvaktException; |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 20 | import de.ids_mannheim.korap.service.VirtualCorpusService; |
| margaretha | c7196d2 | 2018-08-27 14:20:03 +0200 | [diff] [blame] | 21 | import de.ids_mannheim.korap.util.QueryException; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 22 | import de.ids_mannheim.korap.web.SearchKrill; |
| 23 | |
| 24 | @Component |
| 25 | public class NamedVCLoader { |
| 26 | @Autowired |
| 27 | private FullConfiguration config; |
| 28 | @Autowired |
| 29 | private SearchKrill searchKrill; |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 30 | @Autowired |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 31 | private VirtualCorpusService vcService; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 32 | |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 33 | private static Logger jlog = LogManager.getLogger(NamedVCLoader.class); |
| 34 | |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 35 | public void loadVCToCache (String filename, String filePath) |
| 36 | throws IOException, QueryException, KustvaktException { |
| 37 | |
| 38 | InputStream is = NamedVCLoader.class.getResourceAsStream(filePath); |
| 39 | String json = IOUtils.toString(is, "utf-8"); |
| 40 | if (json != null) { |
| 41 | cacheVC(json, filename); |
| margaretha | f7abb36 | 2018-09-18 20:09:37 +0200 | [diff] [blame^] | 42 | vcService.storeVC(filename, VirtualCorpusType.SYSTEM, json, null, |
| 43 | null, null, true, "system"); |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 44 | } |
| 45 | } |
| 46 | |
| 47 | public void loadVCToCache () |
| 48 | throws IOException, QueryException, KustvaktException { |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 49 | |
| 50 | String dir = config.getNamedVCPath(); |
| 51 | File d = new File(dir); |
| 52 | if (!d.isDirectory()) { |
| 53 | throw new IOException("Directory " + dir + " is not valid"); |
| 54 | } |
| 55 | |
| 56 | for (File file : d.listFiles()) { |
| 57 | if (!file.exists()) { |
| 58 | throw new IOException("File " + file + " is not found."); |
| 59 | } |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 60 | |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 61 | String filename = file.getName(); |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 62 | String json = readFile(file, filename); |
| 63 | if (json != null) { |
| 64 | cacheVC(json, filename); |
| margaretha | f7abb36 | 2018-09-18 20:09:37 +0200 | [diff] [blame^] | 65 | vcService.storeVC(filename, VirtualCorpusType.SYSTEM, json, null, |
| 66 | null, null, true, "system"); |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 67 | } |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 68 | } |
| 69 | } |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 70 | |
| 71 | private String readFile (File file, String filename) throws IOException { |
| 72 | String json = null; |
| 73 | long start = System.currentTimeMillis(); |
| 74 | if (filename.endsWith(".jsonld")) { |
| 75 | filename = filename.substring(0, filename.length() - 7); |
| 76 | json = FileUtils.readFileToString(file, "utf-8"); |
| 77 | } |
| 78 | else if (filename.endsWith(".jsonld.gz")) { |
| 79 | filename = filename.substring(0, filename.length() - 10); |
| 80 | GZIPInputStream gzipInputStream = |
| 81 | new GZIPInputStream(new FileInputStream(file)); |
| 82 | ByteArrayOutputStream bos = new ByteArrayOutputStream(512); |
| 83 | bos.write(gzipInputStream); |
| 84 | json = bos.toString("utf-8"); |
| 85 | bos.close(); |
| 86 | } |
| 87 | else { |
| 88 | System.err.println("File " + filename |
| 89 | + " is not allowed. Filename must ends with .jsonld or .jsonld.gz"); |
| 90 | } |
| 91 | long end = System.currentTimeMillis(); |
| 92 | jlog.debug("READ " + filename + " duration: " + (end - start)); |
| 93 | return json; |
| 94 | } |
| 95 | |
| 96 | private void cacheVC (String json, String filename) |
| 97 | throws IOException, QueryException { |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 98 | long start, end; |
| 99 | start = System.currentTimeMillis(); |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 100 | |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 101 | KrillCollection collection = new KrillCollection(json); |
| 102 | collection.setIndex(searchKrill.getIndex()); |
| 103 | |
| 104 | if (collection != null) { |
| 105 | collection.storeInCache(filename); |
| 106 | } |
| 107 | end = System.currentTimeMillis(); |
| 108 | jlog.info(filename + " caching duration: " + (end - start)); |
| 109 | jlog.debug("memory cache: " |
| 110 | + KrillCollection.cache.calculateInMemorySize()); |
| 111 | } |
| 112 | |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 113 | } |