| margaretha | 2c50c73 | 2018-10-17 18:48:52 +0200 | [diff] [blame] | 1 | package de.ids_mannheim.korap.annotation; |
| 2 | |
| margaretha | f0c53b8 | 2019-04-05 10:03:48 +0200 | [diff] [blame] | 3 | import java.io.File; |
| 4 | import java.io.FileInputStream; |
| margaretha | 2c50c73 | 2018-10-17 18:48:52 +0200 | [diff] [blame] | 5 | import java.io.IOException; |
| 6 | import java.io.InputStream; |
| 7 | import java.util.HashSet; |
| 8 | import java.util.Set; |
| 9 | |
| 10 | import org.apache.logging.log4j.LogManager; |
| 11 | import org.apache.logging.log4j.Logger; |
| 12 | import org.springframework.beans.factory.annotation.Autowired; |
| 13 | import org.springframework.stereotype.Component; |
| 14 | |
| 15 | import com.fasterxml.jackson.databind.JsonNode; |
| 16 | import com.fasterxml.jackson.databind.ObjectMapper; |
| 17 | |
| margaretha | 5b70879 | 2023-05-12 16:55:29 +0200 | [diff] [blame] | 18 | import de.ids_mannheim.korap.core.entity.AnnotationLayer; |
| 19 | import de.ids_mannheim.korap.core.entity.Resource; |
| margaretha | 2c50c73 | 2018-10-17 18:48:52 +0200 | [diff] [blame] | 20 | import de.ids_mannheim.korap.dao.AnnotationDao; |
| 21 | import de.ids_mannheim.korap.dao.ResourceDao; |
| margaretha | 2c50c73 | 2018-10-17 18:48:52 +0200 | [diff] [blame] | 22 | import de.ids_mannheim.korap.exceptions.KustvaktException; |
| 23 | |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 24 | /** |
| 25 | * Parser for extracting data from free-resources.json containing |
| margaretha | 398f472 | 2019-01-09 19:07:20 +0100 | [diff] [blame] | 26 | * listing free (non-licensed) corpora. |
| 27 | * |
| 28 | * @author margaretha |
| 29 | * |
| 30 | */ |
| margaretha | 2c50c73 | 2018-10-17 18:48:52 +0200 | [diff] [blame] | 31 | @Component |
| 32 | public class FreeResourceParser { |
| 33 | private Logger log = LogManager.getLogger(FreeResourceParser.class); |
| 34 | |
| 35 | @Autowired |
| 36 | private ResourceDao resourceDao; |
| 37 | @Autowired |
| 38 | private AnnotationDao annotationDao; |
| 39 | |
| 40 | public static String FREE_RESOURCE_FILE = "free-resources.json"; |
| 41 | public static ObjectMapper mapper = new ObjectMapper(); |
| 42 | |
| 43 | public void run () throws IOException, KustvaktException { |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 44 | InputStream is = null; |
| margaretha | 387e468 | 2024-03-22 11:26:29 +0100 | [diff] [blame] | 45 | File f = new File("data/"+FREE_RESOURCE_FILE); |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 46 | if (f.exists()) { |
| margaretha | f0c53b8 | 2019-04-05 10:03:48 +0200 | [diff] [blame] | 47 | is = new FileInputStream(f); |
| 48 | } |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 49 | else { |
| margaretha | f0c53b8 | 2019-04-05 10:03:48 +0200 | [diff] [blame] | 50 | is = FreeResourceParser.class.getClassLoader() |
| margaretha | 387e468 | 2024-03-22 11:26:29 +0100 | [diff] [blame] | 51 | .getResourceAsStream("json/"+FREE_RESOURCE_FILE); |
| margaretha | f0c53b8 | 2019-04-05 10:03:48 +0200 | [diff] [blame] | 52 | } |
| 53 | |
| margaretha | 2c50c73 | 2018-10-17 18:48:52 +0200 | [diff] [blame] | 54 | JsonNode node = mapper.readTree(is); |
| 55 | for (JsonNode resource : node) { |
| 56 | String resourceId = resource.at("/id").asText(); |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 57 | // log.debug(resourceId); |
| margaretha | 2c50c73 | 2018-10-17 18:48:52 +0200 | [diff] [blame] | 58 | Set<AnnotationLayer> layers = parseLayers(resource.at("/layers")); |
| 59 | try { |
| 60 | Resource r = resourceDao.retrieveResource(resourceId); |
| 61 | if (r == null) { |
| 62 | resourceDao.createResource(resource.at("/id").asText(), |
| 63 | resource.at("/de_title").asText(), |
| 64 | resource.at("/en_title").asText(), |
| 65 | resource.at("/en_description").asText(), layers); |
| 66 | } |
| 67 | } |
| 68 | catch (Exception e) { |
| 69 | log.warn("Failed creating resource: " + e.getMessage()); |
| 70 | } |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | private Set<AnnotationLayer> parseLayers (JsonNode layers) { |
| 75 | Set<AnnotationLayer> layerSet = new HashSet<>(layers.size()); |
| 76 | for (JsonNode layer : layers) { |
| 77 | String[] codes = layer.asText().split("/"); |
| margaretha | 35e1ca2 | 2023-11-16 22:00:01 +0100 | [diff] [blame] | 78 | AnnotationLayer annotationLayer = annotationDao |
| 79 | .retrieveAnnotationLayer(codes[0], codes[1]); |
| margaretha | 2c50c73 | 2018-10-17 18:48:52 +0200 | [diff] [blame] | 80 | layerSet.add(annotationLayer); |
| 81 | } |
| 82 | return layerSet; |
| 83 | } |
| 84 | } |