blob: 3f71e9f7b4c5072b5a1a00a810b1f42de78a33fb [file] [log] [blame]
margaretha51e5e3f2018-10-17 15:10:03 +02001package de.ids_mannheim.korap.annotation;
2
3import java.io.BufferedReader;
margaretha51e5e3f2018-10-17 15:10:03 +02004import java.io.IOException;
margaretha2df06602018-11-14 19:10:30 +01005import java.io.InputStream;
margaretha51e5e3f2018-10-17 15:10:03 +02006import java.io.InputStreamReader;
7import java.util.ArrayList;
8import java.util.HashSet;
9import java.util.Set;
10import java.util.regex.Matcher;
11import java.util.regex.Pattern;
12
13import org.apache.logging.log4j.LogManager;
14import org.apache.logging.log4j.Logger;
15import org.springframework.beans.factory.annotation.Autowired;
margaretha2df06602018-11-14 19:10:30 +010016import org.springframework.core.io.Resource;
17import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
margaretha51e5e3f2018-10-17 15:10:03 +020018import org.springframework.stereotype.Component;
19
20import de.ids_mannheim.korap.constant.AnnotationType;
margaretha5b708792023-05-12 16:55:29 +020021import de.ids_mannheim.korap.core.entity.Annotation;
22import de.ids_mannheim.korap.core.entity.AnnotationKey;
23import de.ids_mannheim.korap.core.entity.AnnotationLayer;
margaretha51e5e3f2018-10-17 15:10:03 +020024import de.ids_mannheim.korap.dao.AnnotationDao;
margaretha51e5e3f2018-10-17 15:10:03 +020025
margaretha35e1ca22023-11-16 22:00:01 +010026/**
27 * Parser for extracting annotation descriptions from Kalamar
28 * javascripts
margaretha398f4722019-01-09 19:07:20 +010029 *
30 * @author margaretha
31 *
32 */
margarethafc5fab22021-07-26 12:17:39 +020033@Deprecated
margaretha51e5e3f2018-10-17 15:10:03 +020034@Component
35public class AnnotationParser {
36
37 private Logger log = LogManager.getLogger(AnnotationDao.class);
38
39 public static final Pattern quotePattern = Pattern.compile("\"([^\"]*)\"");
40
41 @Autowired
42 private AnnotationDao annotationDao;
43
44 private Annotation foundry = null;
45 private AnnotationLayer layer = null;
46 private AnnotationKey key = null;
47
48 private Set<AnnotationKey> keys = new HashSet<>();
49 private Set<Annotation> values = new HashSet<>();
50
51 public void run () throws IOException {
margaretha35e1ca22023-11-16 22:00:01 +010052 PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(
53 getClass().getClassLoader());
margaretha2df06602018-11-14 19:10:30 +010054 Resource[] resources = resolver
55 .getResources("classpath:annotation-scripts/foundries/*.js");
margaretha51e5e3f2018-10-17 15:10:03 +020056
margaretha35e1ca22023-11-16 22:00:01 +010057 if (resources.length < 1)
58 return;
margaretha51e5e3f2018-10-17 15:10:03 +020059
margaretha2df06602018-11-14 19:10:30 +010060 for (Resource r : resources) {
margaretha35e1ca22023-11-16 22:00:01 +010061 // log.debug(r.getFilename());
margaretha2df06602018-11-14 19:10:30 +010062 readFile(r.getInputStream());
margaretha51e5e3f2018-10-17 15:10:03 +020063 }
64 }
65
margaretha2df06602018-11-14 19:10:30 +010066 private void readFile (InputStream inputStream) throws IOException {
margaretha35e1ca22023-11-16 22:00:01 +010067 BufferedReader br = new BufferedReader(
68 new InputStreamReader(inputStream), 1024);
margaretha51e5e3f2018-10-17 15:10:03 +020069
70 foundry = null;
71
72 String line, annotationCode = "", annotationType = "";
73 Matcher m;
74 ArrayList<String> array;
75 while ((line = br.readLine()) != null) {
76 line = line.trim();
77 if (line.startsWith("ah")) {
78 m = quotePattern.matcher(line);
79 if (m.find()) {
80 annotationCode = m.group(1);
81 annotationType = computeAnnotationType(annotationCode);
82 }
83 m.reset();
84 }
85 else if (line.startsWith("];")) {
86 if (!keys.isEmpty()) {
87 layer.setKeys(keys);
88 annotationDao.updateAnnotationLayer(layer);
89 }
90 if (!values.isEmpty()) {
91 key.setValues(values);
92 annotationDao.updateAnnotationKey(key);
93 }
94 keys.clear();
95 values.clear();
96 layer = null;
97 key = null;
98 }
99 else if (line.startsWith("[")) {
100 array = computeValues(line);
101 parseArray(annotationCode, annotationType, array);
102 }
103
104 }
105 br.close();
106 }
107
108 public static ArrayList<String> computeValues (String line) {
109 ArrayList<String> values;
110 Matcher m = quotePattern.matcher(line);
111 values = new ArrayList<String>();
112 while (m.find()) {
113 values.add(m.group(1));
114 }
115 return values;
116 }
117
118 private void parseArray (String annotationCode, String annotationType,
119 ArrayList<String> array) {
120 if (annotationType.equals(AnnotationType.FOUNDRY)) {
121 String code = array.get(1).substring(0, array.get(1).length() - 1);
122 foundry = retrieveOrCreateAnnotation(code, AnnotationType.FOUNDRY,
123 null, array.get(0));
124 }
125 else if (annotationType.equals(AnnotationType.LAYER)) {
126 String code = array.get(1);
127 if (code.endsWith("=")) {
128 code = code.substring(0, code.length() - 1);
129 }
130 Annotation layer = retrieveOrCreateAnnotation(code, annotationType,
131 null, array.get(0));
132 try {
margaretha35e1ca22023-11-16 22:00:01 +0100133 AnnotationLayer annotationLayer = annotationDao
134 .retrieveAnnotationLayer(foundry.getCode(),
margaretha2c50c732018-10-17 18:48:52 +0200135 layer.getCode());
136 if (annotationLayer == null) {
137 annotationDao.createAnnotationLayer(foundry, layer);
138 }
margaretha51e5e3f2018-10-17 15:10:03 +0200139 }
140 catch (Exception e) {
141 log.debug("Duplicate annotation layer: " + foundry.getCode()
142 + "/" + layer.getCode());
143 }
144 }
margaretha2c50c732018-10-17 18:48:52 +0200145 else if (annotationType.equals(AnnotationType.KEY))
146
147 {
margaretha51e5e3f2018-10-17 15:10:03 +0200148 if (layer == null) {
149 computeLayer(annotationCode);
150 }
margaretha3da7cd32018-10-22 17:42:52 +0200151
margaretha51e5e3f2018-10-17 15:10:03 +0200152 Annotation annotation = null;
153 if (array.size() == 2) {
margaretha3da7cd32018-10-22 17:42:52 +0200154 String code = array.get(1);
155 if (code.endsWith("=") || code.endsWith(":")) {
156 code = code.substring(0, code.length() - 1);
157 }
margaretha51e5e3f2018-10-17 15:10:03 +0200158 annotation = retrieveOrCreateAnnotation(code, annotationType,
159 null, array.get(0));
160 }
161 else if (array.size() == 3) {
margaretha3da7cd32018-10-22 17:42:52 +0200162 annotation = retrieveOrCreateAnnotation(array.get(0),
163 annotationType, array.get(1), array.get(2));
margaretha51e5e3f2018-10-17 15:10:03 +0200164 }
165 if (annotation != null) {
margaretha35e1ca22023-11-16 22:00:01 +0100166 AnnotationKey annotationKey = annotationDao
167 .retrieveAnnotationKey(layer, annotation);
margaretha51e5e3f2018-10-17 15:10:03 +0200168 if (annotationKey == null) {
169 annotationDao.createAnnotationKey(layer, annotation);
170 }
171 this.keys.add(annotationKey);
172 }
173 }
174 else if (annotationType.equals(AnnotationType.VALUE)) {
175 if (this.key == null) {
176 computeKey(annotationCode);
177 }
margaretha3da7cd32018-10-22 17:42:52 +0200178 Annotation value = retrieveOrCreateAnnotation(array.get(0),
margaretha51e5e3f2018-10-17 15:10:03 +0200179 AnnotationType.VALUE, array.get(1), array.get(2));
180 if (value != null) {
181 values.add(value);
182 }
183 }
184 }
185
186 private void computeKey (String code) {
187 String[] codes = code.split("=");
188 if (codes.length > 1) {
189 computeLayer(codes[0]);
190 String keyCode = codes[1];
191 if (keyCode.endsWith(":") || keyCode.endsWith("-")) {
192 keyCode = keyCode.substring(0, keyCode.length() - 1);
193 }
194 Annotation key = annotationDao.retrieveAnnotation(keyCode,
195 AnnotationType.KEY);
196 this.key = annotationDao.retrieveAnnotationKey(layer, key);
197 }
198
199 }
200
201 private void computeLayer (String code) {
202 String[] codes = code.split("/");
203 if (codes.length > 1) {
204 String layerCode = codes[1];
205 if (layerCode.endsWith("=")) {
206 layerCode = layerCode.substring(0, layerCode.length() - 1);
207 }
margaretha35e1ca22023-11-16 22:00:01 +0100208 this.layer = annotationDao.retrieveAnnotationLayer(codes[0],
209 layerCode);
margaretha51e5e3f2018-10-17 15:10:03 +0200210 if (layer == null) {
211 log.warn("Layer is null for " + code);
212 }
213 }
214 }
215
216 private Annotation retrieveOrCreateAnnotation (String code, String type,
217 String text, String description) {
218 Annotation annotation = annotationDao.retrieveAnnotation(code, type);
219 if (annotation == null) {
220 annotation = annotationDao.createAnnotation(code, type, text,
221 description);
222 }
223 return annotation;
224 }
225
226 private String computeAnnotationType (String code) {
227 String[] codes = code.split("/");
228 if (codes.length == 1) {
229 if (codes[0].equals("-")) {
230 return AnnotationType.FOUNDRY;
231 }
232 return AnnotationType.LAYER;
233 }
234 else if (codes.length == 2) {
235 if (codes[1].endsWith(":") || codes[1].endsWith("-")) {
236 return AnnotationType.VALUE;
237 }
238 else {
239 return AnnotationType.KEY;
240 }
241 }
242
243 return "unknown";
244 }
245
246}