blob: 6a6ca88aa7e31570ab61662ecd0beba6daf620fd [file] [log] [blame]
margaretha2c50c732018-10-17 18:48:52 +02001package de.ids_mannheim.korap.annotation;
2
margarethaf0c53b82019-04-05 10:03:48 +02003import java.io.File;
4import java.io.FileInputStream;
margaretha2c50c732018-10-17 18:48:52 +02005import java.io.IOException;
6import java.io.InputStream;
7import java.util.HashSet;
8import java.util.Set;
9
10import org.apache.logging.log4j.LogManager;
11import org.apache.logging.log4j.Logger;
12import org.springframework.beans.factory.annotation.Autowired;
13import org.springframework.stereotype.Component;
14
15import com.fasterxml.jackson.databind.JsonNode;
16import com.fasterxml.jackson.databind.ObjectMapper;
17
margaretha5b708792023-05-12 16:55:29 +020018import de.ids_mannheim.korap.core.entity.AnnotationLayer;
19import de.ids_mannheim.korap.core.entity.Resource;
margaretha2c50c732018-10-17 18:48:52 +020020import de.ids_mannheim.korap.dao.AnnotationDao;
21import de.ids_mannheim.korap.dao.ResourceDao;
margaretha2c50c732018-10-17 18:48:52 +020022import de.ids_mannheim.korap.exceptions.KustvaktException;
23
margaretha35e1ca22023-11-16 22:00:01 +010024/**
25 * Parser for extracting data from free-resources.json containing
margaretha398f4722019-01-09 19:07:20 +010026 * listing free (non-licensed) corpora.
27 *
28 * @author margaretha
29 *
30 */
margaretha2c50c732018-10-17 18:48:52 +020031@Component
32public class FreeResourceParser {
33 private Logger log = LogManager.getLogger(FreeResourceParser.class);
34
35 @Autowired
36 private ResourceDao resourceDao;
37 @Autowired
38 private AnnotationDao annotationDao;
39
40 public static String FREE_RESOURCE_FILE = "free-resources.json";
41 public static ObjectMapper mapper = new ObjectMapper();
42
43 public void run () throws IOException, KustvaktException {
margaretha35e1ca22023-11-16 22:00:01 +010044 InputStream is = null;
margaretha387e4682024-03-22 11:26:29 +010045 File f = new File("data/"+FREE_RESOURCE_FILE);
margaretha35e1ca22023-11-16 22:00:01 +010046 if (f.exists()) {
margarethaf0c53b82019-04-05 10:03:48 +020047 is = new FileInputStream(f);
48 }
margaretha35e1ca22023-11-16 22:00:01 +010049 else {
margarethaf0c53b82019-04-05 10:03:48 +020050 is = FreeResourceParser.class.getClassLoader()
margaretha387e4682024-03-22 11:26:29 +010051 .getResourceAsStream("json/"+FREE_RESOURCE_FILE);
margarethaf0c53b82019-04-05 10:03:48 +020052 }
53
margaretha2c50c732018-10-17 18:48:52 +020054 JsonNode node = mapper.readTree(is);
55 for (JsonNode resource : node) {
56 String resourceId = resource.at("/id").asText();
margaretha35e1ca22023-11-16 22:00:01 +010057 // log.debug(resourceId);
margaretha2c50c732018-10-17 18:48:52 +020058 Set<AnnotationLayer> layers = parseLayers(resource.at("/layers"));
59 try {
60 Resource r = resourceDao.retrieveResource(resourceId);
61 if (r == null) {
62 resourceDao.createResource(resource.at("/id").asText(),
63 resource.at("/de_title").asText(),
64 resource.at("/en_title").asText(),
65 resource.at("/en_description").asText(), layers);
66 }
67 }
68 catch (Exception e) {
69 log.warn("Failed creating resource: " + e.getMessage());
70 }
71 }
72 }
73
74 private Set<AnnotationLayer> parseLayers (JsonNode layers) {
75 Set<AnnotationLayer> layerSet = new HashSet<>(layers.size());
76 for (JsonNode layer : layers) {
77 String[] codes = layer.asText().split("/");
margaretha35e1ca22023-11-16 22:00:01 +010078 AnnotationLayer annotationLayer = annotationDao
79 .retrieveAnnotationLayer(codes[0], codes[1]);
margaretha2c50c732018-10-17 18:48:52 +020080 layerSet.add(annotationLayer);
81 }
82 return layerSet;
83 }
84}