blob: ccb6d1a67b5ebbae03cba354436a58c0bea00a41 [file] [log] [blame]
margaretha51e5e3f2018-10-17 15:10:03 +02001package de.ids_mannheim.korap.annotation;
2
3import java.io.BufferedReader;
margaretha51e5e3f2018-10-17 15:10:03 +02004import java.io.IOException;
margaretha2df06602018-11-14 19:10:30 +01005import java.io.InputStream;
margaretha51e5e3f2018-10-17 15:10:03 +02006import java.io.InputStreamReader;
7import java.util.ArrayList;
8import java.util.HashSet;
9import java.util.Set;
10import java.util.regex.Matcher;
11import java.util.regex.Pattern;
12
13import org.apache.logging.log4j.LogManager;
14import org.apache.logging.log4j.Logger;
15import org.springframework.beans.factory.annotation.Autowired;
margaretha2df06602018-11-14 19:10:30 +010016import org.springframework.core.io.Resource;
17import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
margaretha51e5e3f2018-10-17 15:10:03 +020018import org.springframework.stereotype.Component;
19
20import de.ids_mannheim.korap.constant.AnnotationType;
margaretha5b708792023-05-12 16:55:29 +020021import de.ids_mannheim.korap.core.entity.Annotation;
22import de.ids_mannheim.korap.core.entity.AnnotationKey;
23import de.ids_mannheim.korap.core.entity.AnnotationLayer;
margaretha51e5e3f2018-10-17 15:10:03 +020024import de.ids_mannheim.korap.dao.AnnotationDao;
margaretha51e5e3f2018-10-17 15:10:03 +020025
margaretha398f4722019-01-09 19:07:20 +010026/** Parser for extracting annotation descriptions from Kalamar javascripts
27 *
28 * @author margaretha
29 *
30 */
margarethafc5fab22021-07-26 12:17:39 +020031@Deprecated
margaretha51e5e3f2018-10-17 15:10:03 +020032@Component
33public class AnnotationParser {
34
35 private Logger log = LogManager.getLogger(AnnotationDao.class);
36
37 public static final Pattern quotePattern = Pattern.compile("\"([^\"]*)\"");
38
39 @Autowired
40 private AnnotationDao annotationDao;
41
42 private Annotation foundry = null;
43 private AnnotationLayer layer = null;
44 private AnnotationKey key = null;
45
46 private Set<AnnotationKey> keys = new HashSet<>();
47 private Set<Annotation> values = new HashSet<>();
48
49 public void run () throws IOException {
margaretha2df06602018-11-14 19:10:30 +010050 PathMatchingResourcePatternResolver resolver =
51 new PathMatchingResourcePatternResolver(
52 getClass().getClassLoader());
53 Resource[] resources = resolver
54 .getResources("classpath:annotation-scripts/foundries/*.js");
margaretha51e5e3f2018-10-17 15:10:03 +020055
margaretha2df06602018-11-14 19:10:30 +010056 if (resources.length < 1) return;
margaretha51e5e3f2018-10-17 15:10:03 +020057
margaretha2df06602018-11-14 19:10:30 +010058 for (Resource r : resources) {
margarethae72355a2018-11-28 16:53:09 +010059// log.debug(r.getFilename());
margaretha2df06602018-11-14 19:10:30 +010060 readFile(r.getInputStream());
margaretha51e5e3f2018-10-17 15:10:03 +020061 }
62 }
63
margaretha2df06602018-11-14 19:10:30 +010064 private void readFile (InputStream inputStream) throws IOException {
65 BufferedReader br =
66 new BufferedReader(new InputStreamReader(inputStream), 1024);
margaretha51e5e3f2018-10-17 15:10:03 +020067
68 foundry = null;
69
70 String line, annotationCode = "", annotationType = "";
71 Matcher m;
72 ArrayList<String> array;
73 while ((line = br.readLine()) != null) {
74 line = line.trim();
75 if (line.startsWith("ah")) {
76 m = quotePattern.matcher(line);
77 if (m.find()) {
78 annotationCode = m.group(1);
79 annotationType = computeAnnotationType(annotationCode);
80 }
81 m.reset();
82 }
83 else if (line.startsWith("];")) {
84 if (!keys.isEmpty()) {
85 layer.setKeys(keys);
86 annotationDao.updateAnnotationLayer(layer);
87 }
88 if (!values.isEmpty()) {
89 key.setValues(values);
90 annotationDao.updateAnnotationKey(key);
91 }
92 keys.clear();
93 values.clear();
94 layer = null;
95 key = null;
96 }
97 else if (line.startsWith("[")) {
98 array = computeValues(line);
99 parseArray(annotationCode, annotationType, array);
100 }
101
102 }
103 br.close();
104 }
105
106 public static ArrayList<String> computeValues (String line) {
107 ArrayList<String> values;
108 Matcher m = quotePattern.matcher(line);
109 values = new ArrayList<String>();
110 while (m.find()) {
111 values.add(m.group(1));
112 }
113 return values;
114 }
115
116 private void parseArray (String annotationCode, String annotationType,
117 ArrayList<String> array) {
118 if (annotationType.equals(AnnotationType.FOUNDRY)) {
119 String code = array.get(1).substring(0, array.get(1).length() - 1);
120 foundry = retrieveOrCreateAnnotation(code, AnnotationType.FOUNDRY,
121 null, array.get(0));
122 }
123 else if (annotationType.equals(AnnotationType.LAYER)) {
124 String code = array.get(1);
125 if (code.endsWith("=")) {
126 code = code.substring(0, code.length() - 1);
127 }
128 Annotation layer = retrieveOrCreateAnnotation(code, annotationType,
129 null, array.get(0));
130 try {
margaretha2c50c732018-10-17 18:48:52 +0200131 AnnotationLayer annotationLayer =
132 annotationDao.retrieveAnnotationLayer(foundry.getCode(),
133 layer.getCode());
134 if (annotationLayer == null) {
135 annotationDao.createAnnotationLayer(foundry, layer);
136 }
margaretha51e5e3f2018-10-17 15:10:03 +0200137 }
138 catch (Exception e) {
139 log.debug("Duplicate annotation layer: " + foundry.getCode()
140 + "/" + layer.getCode());
141 }
142 }
margaretha2c50c732018-10-17 18:48:52 +0200143 else if (annotationType.equals(AnnotationType.KEY))
144
145 {
margaretha51e5e3f2018-10-17 15:10:03 +0200146 if (layer == null) {
147 computeLayer(annotationCode);
148 }
margaretha3da7cd32018-10-22 17:42:52 +0200149
margaretha51e5e3f2018-10-17 15:10:03 +0200150 Annotation annotation = null;
151 if (array.size() == 2) {
margaretha3da7cd32018-10-22 17:42:52 +0200152 String code = array.get(1);
153 if (code.endsWith("=") || code.endsWith(":")) {
154 code = code.substring(0, code.length() - 1);
155 }
margaretha51e5e3f2018-10-17 15:10:03 +0200156 annotation = retrieveOrCreateAnnotation(code, annotationType,
157 null, array.get(0));
158 }
159 else if (array.size() == 3) {
margaretha3da7cd32018-10-22 17:42:52 +0200160 annotation = retrieveOrCreateAnnotation(array.get(0),
161 annotationType, array.get(1), array.get(2));
margaretha51e5e3f2018-10-17 15:10:03 +0200162 }
163 if (annotation != null) {
164 AnnotationKey annotationKey =
165 annotationDao.retrieveAnnotationKey(layer, annotation);
166 if (annotationKey == null) {
167 annotationDao.createAnnotationKey(layer, annotation);
168 }
169 this.keys.add(annotationKey);
170 }
171 }
172 else if (annotationType.equals(AnnotationType.VALUE)) {
173 if (this.key == null) {
174 computeKey(annotationCode);
175 }
margaretha3da7cd32018-10-22 17:42:52 +0200176 Annotation value = retrieveOrCreateAnnotation(array.get(0),
margaretha51e5e3f2018-10-17 15:10:03 +0200177 AnnotationType.VALUE, array.get(1), array.get(2));
178 if (value != null) {
179 values.add(value);
180 }
181 }
182 }
183
184 private void computeKey (String code) {
185 String[] codes = code.split("=");
186 if (codes.length > 1) {
187 computeLayer(codes[0]);
188 String keyCode = codes[1];
189 if (keyCode.endsWith(":") || keyCode.endsWith("-")) {
190 keyCode = keyCode.substring(0, keyCode.length() - 1);
191 }
192 Annotation key = annotationDao.retrieveAnnotation(keyCode,
193 AnnotationType.KEY);
194 this.key = annotationDao.retrieveAnnotationKey(layer, key);
195 }
196
197 }
198
199 private void computeLayer (String code) {
200 String[] codes = code.split("/");
201 if (codes.length > 1) {
202 String layerCode = codes[1];
203 if (layerCode.endsWith("=")) {
204 layerCode = layerCode.substring(0, layerCode.length() - 1);
205 }
206 this.layer =
207 annotationDao.retrieveAnnotationLayer(codes[0], layerCode);
208 if (layer == null) {
209 log.warn("Layer is null for " + code);
210 }
211 }
212 }
213
214 private Annotation retrieveOrCreateAnnotation (String code, String type,
215 String text, String description) {
216 Annotation annotation = annotationDao.retrieveAnnotation(code, type);
217 if (annotation == null) {
218 annotation = annotationDao.createAnnotation(code, type, text,
219 description);
220 }
221 return annotation;
222 }
223
224 private String computeAnnotationType (String code) {
225 String[] codes = code.split("/");
226 if (codes.length == 1) {
227 if (codes[0].equals("-")) {
228 return AnnotationType.FOUNDRY;
229 }
230 return AnnotationType.LAYER;
231 }
232 else if (codes.length == 2) {
233 if (codes[1].endsWith(":") || codes[1].endsWith("-")) {
234 return AnnotationType.VALUE;
235 }
236 else {
237 return AnnotationType.KEY;
238 }
239 }
240
241 return "unknown";
242 }
243
244}