| margaretha | c858890 | 2024-06-13 09:36:53 +0200 | [diff] [blame] | 1 | package de.ids_mannheim.korap.init; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 2 | |
| 3 | import java.io.File; |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 4 | import java.io.FileInputStream; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 5 | import java.io.IOException; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 6 | import java.io.InputStream; |
| margaretha | e1228ab | 2021-02-22 11:51:38 +0100 | [diff] [blame] | 7 | import java.util.Arrays; |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 8 | import java.util.zip.GZIPInputStream; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 9 | |
| 10 | import org.apache.commons.io.FileUtils; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 11 | import org.apache.commons.io.IOUtils; |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 12 | import org.apache.commons.io.output.ByteArrayOutputStream; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 13 | import org.apache.logging.log4j.LogManager; |
| 14 | import org.apache.logging.log4j.Logger; |
| 15 | import org.springframework.beans.factory.annotation.Autowired; |
| 16 | import org.springframework.stereotype.Component; |
| 17 | |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 18 | import de.ids_mannheim.korap.cache.VirtualCorpusCache; |
| margaretha | c858890 | 2024-06-13 09:36:53 +0200 | [diff] [blame] | 19 | import de.ids_mannheim.korap.config.FullConfiguration; |
| Akron | da08015 | 2020-12-03 13:53:29 +0100 | [diff] [blame] | 20 | import de.ids_mannheim.korap.constant.QueryType; |
| 21 | import de.ids_mannheim.korap.constant.ResourceType; |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 22 | import de.ids_mannheim.korap.entity.QueryDO; |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 23 | import de.ids_mannheim.korap.exceptions.KustvaktException; |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 24 | import de.ids_mannheim.korap.exceptions.StatusCodes; |
| margaretha | b097fb0 | 2021-02-22 19:28:33 +0100 | [diff] [blame] | 25 | import de.ids_mannheim.korap.service.QueryService; |
| margaretha | c7196d2 | 2018-08-27 14:20:03 +0200 | [diff] [blame] | 26 | import de.ids_mannheim.korap.util.QueryException; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 27 | import de.ids_mannheim.korap.web.SearchKrill; |
| 28 | |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 29 | /** |
| 30 | * <p>Loads predefined virtual corpora at server start up and caches |
| 31 | * them, if the VC have not been cached before. If there are changes |
| 32 | * in the index, the cache will be updated. |
| 33 | * </p> |
| 34 | * |
| 35 | * <p> |
| 36 | * All predefined VC are set as SYSTEM VC. The filenames are used as |
| 37 | * VC names. Acceptable file extensions are .jsonld.gz or .jsonld. The |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 38 | * VC should be located at the folder indicated by |
| 39 | * <em>krill.namedVC</em> |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 40 | * specified in kustvakt.conf. |
| 41 | * </p> |
| margaretha | 398f472 | 2019-01-09 19:07:20 +0100 | [diff] [blame] | 42 | * |
| 43 | * @author margaretha |
| 44 | * |
| 45 | */ |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 46 | @Component |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 47 | public class NamedVCLoader implements Runnable { |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 48 | @Autowired |
| 49 | private FullConfiguration config; |
| 50 | @Autowired |
| 51 | private SearchKrill searchKrill; |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 52 | @Autowired |
| margaretha | b097fb0 | 2021-02-22 19:28:33 +0100 | [diff] [blame] | 53 | private QueryService vcService; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 54 | |
| margaretha | dda4ef7 | 2018-12-06 14:20:51 +0100 | [diff] [blame] | 55 | public static Logger jlog = LogManager.getLogger(NamedVCLoader.class); |
| 56 | public static boolean DEBUG = false; |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 57 | |
| margaretha | 0b90391 | 2019-01-08 17:41:39 +0100 | [diff] [blame] | 58 | @Override |
| 59 | public void run () { |
| 60 | try { |
| 61 | loadVCToCache(); |
| 62 | } |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 63 | catch (IOException | QueryException e) { |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 64 | // e.printStackTrace(); |
| margaretha | 4b883ec | 2023-09-25 12:21:24 +0200 | [diff] [blame] | 65 | throw new RuntimeException(e.getMessage(), e.getCause()); |
| margaretha | 0b90391 | 2019-01-08 17:41:39 +0100 | [diff] [blame] | 66 | } |
| 67 | } |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 68 | |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 69 | public void loadVCToCache (String filename, String filePath) |
| 70 | throws IOException, QueryException, KustvaktException { |
| 71 | loadVCToCache(filename,filePath,null); |
| 72 | } |
| 73 | |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 74 | /** |
| 75 | * Used for testing |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 76 | * |
| 77 | * @param filename |
| 78 | * @param filePath |
| 79 | * @throws IOException |
| 80 | * @throws QueryException |
| 81 | * @throws KustvaktException |
| 82 | */ |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 83 | public void loadVCToCache (String filename, String filePath, String json) |
| 84 | throws IOException, QueryException { |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 85 | |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 86 | if (json==null || json.isEmpty()) { |
| 87 | InputStream is = NamedVCLoader.class.getResourceAsStream(filePath); |
| 88 | json = IOUtils.toString(is, "utf-8"); |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 89 | } |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 90 | processVC(filename, json); |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 91 | } |
| 92 | |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 93 | public void loadVCToCache () throws IOException, QueryException { |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 94 | |
| 95 | String dir = config.getNamedVCPath(); |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 96 | if (dir.isEmpty()) |
| 97 | return; |
| margaretha | e72355a | 2018-11-28 16:53:09 +0100 | [diff] [blame] | 98 | |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 99 | File d = new File(dir); |
| 100 | if (!d.isDirectory()) { |
| 101 | throw new IOException("Directory " + dir + " is not valid"); |
| 102 | } |
| 103 | |
| margaretha | e1228ab | 2021-02-22 11:51:38 +0100 | [diff] [blame] | 104 | jlog.info(Arrays.toString(d.list())); |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 105 | |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 106 | for (File file : d.listFiles()) { |
| 107 | if (!file.exists()) { |
| 108 | throw new IOException("File " + file + " is not found."); |
| 109 | } |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 110 | |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 111 | String filename = file.getName(); |
| margaretha | 339fd2e | 2018-11-13 12:14:53 +0100 | [diff] [blame] | 112 | String[] strArr = readFile(file, filename); |
| 113 | filename = strArr[0]; |
| 114 | String json = strArr[1]; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 115 | if (json != null) { |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 116 | processVC(filename, json); |
| margaretha | 6ad08b4 | 2018-08-22 18:33:54 +0200 | [diff] [blame] | 117 | } |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 118 | } |
| 119 | } |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 120 | |
| 121 | /** |
| 122 | * Stores and caches VC if the given VC does not exist. |
| 123 | * Updates VC in the database and re-caches it, if the given VC exists. |
| 124 | * Updates VC if there is any change in the index. |
| 125 | * |
| 126 | * In this method, it will be checked if |
| 127 | * <ol> |
| 128 | * <li> VC exists in the database</li> |
| 129 | * <li> VC exists in the cache </li> |
| 130 | * <li> KoralQuery of the given VC differs from an existing VC with |
| 131 | * the same id. </li> |
| 132 | * <li> Index has been changed</li> |
| 133 | * </ol> |
| 134 | * |
| 135 | * Koral Query |
| 136 | * |
| 137 | * @param vcId |
| 138 | * @param json |
| 139 | * @throws IOException |
| 140 | * @throws QueryException |
| 141 | */ |
| 142 | private void processVC (String vcId, String json) |
| 143 | throws IOException, QueryException { |
| 144 | boolean updateCache = false; |
| 145 | try { |
| 146 | // if VC exists in the DB |
| 147 | QueryDO existingVC = vcService.searchQueryByName("system", vcId, "system", |
| 148 | QueryType.VIRTUAL_CORPUS); |
| 149 | |
| 150 | String koralQuery = existingVC.getKoralQuery(); |
| 151 | // if existing VC is different from input |
| 152 | if (json.hashCode() != koralQuery.hashCode()) { |
| 153 | updateCache = true; |
| 154 | // updateVCinDB |
| 155 | storeVCinDB(vcId, json, existingVC); |
| 156 | } |
| 157 | } |
| 158 | catch (KustvaktException e) { |
| 159 | // VC doesn't exist in the DB |
| 160 | if (e.getStatusCode() == StatusCodes.NO_RESOURCE_FOUND) { |
| 161 | storeVCinDB(vcId, json, null); |
| 162 | } |
| 163 | else { |
| 164 | throw new RuntimeException(e); |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | cacheVC(vcId, json, updateCache); |
| 169 | } |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 170 | |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 171 | private String[] readFile (File file, String filename) throws IOException { |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 172 | String json = null; |
| 173 | long start = System.currentTimeMillis(); |
| 174 | if (filename.endsWith(".jsonld")) { |
| 175 | filename = filename.substring(0, filename.length() - 7); |
| 176 | json = FileUtils.readFileToString(file, "utf-8"); |
| 177 | } |
| 178 | else if (filename.endsWith(".jsonld.gz")) { |
| 179 | filename = filename.substring(0, filename.length() - 10); |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 180 | GZIPInputStream gzipInputStream = new GZIPInputStream( |
| 181 | new FileInputStream(file)); |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 182 | ByteArrayOutputStream bos = new ByteArrayOutputStream(512); |
| 183 | bos.write(gzipInputStream); |
| 184 | json = bos.toString("utf-8"); |
| 185 | bos.close(); |
| 186 | } |
| 187 | else { |
| 188 | System.err.println("File " + filename |
| 189 | + " is not allowed. Filename must ends with .jsonld or .jsonld.gz"); |
| 190 | } |
| 191 | long end = System.currentTimeMillis(); |
| margaretha | dda4ef7 | 2018-12-06 14:20:51 +0100 | [diff] [blame] | 192 | if (DEBUG) { |
| 193 | jlog.debug("READ " + filename + " duration: " + (end - start)); |
| 194 | } |
| margaretha | e72355a | 2018-11-28 16:53:09 +0100 | [diff] [blame] | 195 | |
| 196 | return new String[] { filename, json }; |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 197 | } |
| 198 | |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 199 | /** |
| 200 | * Caches the given VC if the VC is not found in cache and updates |
| 201 | * the VC if it exists and there are changes in the index. |
| 202 | * |
| 203 | * @param vcId |
| 204 | * vc-name |
| 205 | * @param koralQuery |
| 206 | * @throws IOException |
| 207 | * @throws QueryException |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 208 | * @throws KustvaktException |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 209 | */ |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 210 | private void cacheVC (String vcId, String koralQuery, boolean updateVC) |
| margaretha | 9e53bb2 | 2018-09-14 19:39:15 +0200 | [diff] [blame] | 211 | throws IOException, QueryException { |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 212 | config.setVcInCaching(vcId); |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 213 | if (updateVC) { |
| 214 | jlog.info("Updating {} in cache ", vcId); |
| 215 | VirtualCorpusCache.delete(vcId); |
| 216 | } |
| 217 | else if (VirtualCorpusCache.contains(vcId)) { |
| margaretha | 326520b | 2021-12-08 17:58:09 +0100 | [diff] [blame] | 218 | jlog.info("Checking {} in cache ", vcId); |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 219 | |
| margaretha | 326520b | 2021-12-08 17:58:09 +0100 | [diff] [blame] | 220 | } |
| 221 | else { |
| 222 | jlog.info("Storing {} in cache ", vcId); |
| 223 | } |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 224 | |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 225 | long start, end; |
| 226 | start = System.currentTimeMillis(); |
| margaretha | ae1ca62 | 2024-08-02 10:43:08 +0200 | [diff] [blame] | 227 | try { |
| 228 | VirtualCorpusCache.store(vcId, searchKrill.getIndex()); |
| 229 | } |
| 230 | catch (Exception e) { |
| 231 | jlog.error("Failed caching vc "+vcId, e); |
| 232 | } |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 233 | end = System.currentTimeMillis(); |
| margaretha | 326520b | 2021-12-08 17:58:09 +0100 | [diff] [blame] | 234 | jlog.info("Duration : {}", (end - start)); |
| margaretha | 52ee9e3 | 2019-12-11 16:36:14 +0100 | [diff] [blame] | 235 | config.setVcInCaching(""); |
| margaretha | 563aabe | 2018-09-13 20:39:45 +0200 | [diff] [blame] | 236 | } |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 237 | |
| 238 | /** |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 239 | * Stores new VC or updates existing VC |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 240 | * |
| 241 | * @param vcId |
| 242 | * @param koralQuery |
| 243 | */ |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 244 | private void storeVCinDB (String vcId, String koralQuery, QueryDO existingVC) { |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 245 | try { |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 246 | String info = (existingVC == null) ? "Storing" : "Updating"; |
| margaretha | ae1ca62 | 2024-08-02 10:43:08 +0200 | [diff] [blame] | 247 | jlog.info("{} {} in the database ", info, vcId); |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 248 | |
| 249 | vcService.storeQuery(existingVC, "system", vcId, ResourceType.SYSTEM, |
| 250 | QueryType.VIRTUAL_CORPUS, koralQuery, null, null, null, |
| 251 | true, "system", null, null); |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 252 | } |
| margaretha | ae1ca62 | 2024-08-02 10:43:08 +0200 | [diff] [blame] | 253 | catch (Exception e) { |
| 254 | jlog.error("Failed storing VC: "+vcId, e); |
| margaretha | 05818b4 | 2024-04-10 13:10:02 +0200 | [diff] [blame] | 255 | throw new RuntimeException(e); |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 256 | } |
| margaretha | 7c1f428 | 2021-11-29 17:27:53 +0100 | [diff] [blame] | 257 | } |
| margaretha | 1b32045 | 2018-08-02 16:56:25 +0200 | [diff] [blame] | 258 | } |