blob: 95fed85853d5f02945d6340f6887e8b195ae578c [file] [log] [blame]
margaretha4ec2cd32016-02-29 09:46:36 +00001package de.mannheim.ids.korap.sru;
2
3import java.io.ByteArrayInputStream;
4import java.io.IOException;
5import java.io.InputStream;
margarethad7fda432016-08-17 15:49:02 +02006import java.net.URISyntaxException;
margaretha4a5f1c22016-08-03 17:34:32 +02007import java.util.Arrays;
8import java.util.List;
9import java.util.Map;
10import java.util.Set;
margaretha4ec2cd32016-02-29 09:46:36 +000011
12import javax.xml.parsers.ParserConfigurationException;
13import javax.xml.parsers.SAXParser;
14import javax.xml.parsers.SAXParserFactory;
15import javax.xml.stream.XMLStreamException;
16import javax.xml.stream.XMLStreamWriter;
17
18import org.slf4j.Logger;
19import org.slf4j.LoggerFactory;
20import org.xml.sax.SAXException;
21
22import eu.clarin.sru.server.SRUConstants;
23import eu.clarin.sru.server.SRUDiagnosticList;
24import eu.clarin.sru.server.SRUException;
25import eu.clarin.sru.server.SRUSearchResultSet;
margaretha4a5f1c22016-08-03 17:34:32 +020026import eu.clarin.sru.server.fcs.AdvancedDataViewWriter;
27import eu.clarin.sru.server.fcs.Layer;
margaretha4ec2cd32016-02-29 09:46:36 +000028import eu.clarin.sru.server.fcs.XMLStreamWriterHelper;
29
margarethad7fda432016-08-17 15:49:02 +020030/**
31 * Prepares and creates a search result set for a search retrieve URL
32 * call.
33 *
34 * @author margaretha
35 *
36 */
margaretha4ec2cd32016-02-29 09:46:36 +000037public class KorapSRUSearchResultSet extends SRUSearchResultSet {
38
margaretha4a5f1c22016-08-03 17:34:32 +020039 private Logger logger = (Logger) LoggerFactory
40 .getLogger(KorapSRUSearchResultSet.class);
margaretha4ec2cd32016-02-29 09:46:36 +000041
margaretha4a5f1c22016-08-03 17:34:32 +020042 private int i = -1;
43 private KorapResult korapResult;
44 private List<String> dataviews;
margaretha4a5f1c22016-08-03 17:34:32 +020045 private SAXParser saxParser;
margarethad7fda432016-08-17 15:49:02 +020046 private Layer textLayer;
47 private AnnotationHandler annotationHandler;
margaretha43ea7312016-08-08 19:00:23 +020048
margarethad7fda432016-08-17 15:49:02 +020049 /**
50 * Constructs a KorapSRUSearchResultSet for the given KorapResult.
51 *
52 * @param diagnostics
53 * a list of SRU diagnostics
54 * @param korapResult
55 * the query result
56 * @param dataviews
57 * the required dataviews to generate
58 * @param textlayer
59 * the text layer
60 * @param annotationLayers
61 * the list of annotation layers
62 * @throws SRUException
63 */
margaretha4a5f1c22016-08-03 17:34:32 +020064 public KorapSRUSearchResultSet (SRUDiagnosticList diagnostics,
margarethad7fda432016-08-17 15:49:02 +020065 KorapResult korapResult, List<String> dataviews, Layer textlayer,
66 List<AnnotationLayer> annotationLayers) throws SRUException {
margaretha4a5f1c22016-08-03 17:34:32 +020067 super(diagnostics);
margaretha4ec2cd32016-02-29 09:46:36 +000068
margaretha4a5f1c22016-08-03 17:34:32 +020069 SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
70 try {
71 saxParser = saxParserFactory.newSAXParser();
72 }
73 catch (ParserConfigurationException | SAXException e) {
margaretha43ea7312016-08-08 19:00:23 +020074 throw new SRUException(SRUConstants.SRU_GENERAL_SYSTEM_ERROR, e);
margaretha4a5f1c22016-08-03 17:34:32 +020075 }
margaretha4ec2cd32016-02-29 09:46:36 +000076
margaretha4a5f1c22016-08-03 17:34:32 +020077 this.korapResult = korapResult;
78 this.dataviews = dataviews;
margarethad7fda432016-08-17 15:49:02 +020079 this.textLayer = textlayer;
80 annotationHandler = new AnnotationHandler(annotationLayers);
margaretha4a5f1c22016-08-03 17:34:32 +020081 }
margaretha4ec2cd32016-02-29 09:46:36 +000082
margaretha4a5f1c22016-08-03 17:34:32 +020083 @Override
84 public int getTotalRecordCount() {
85 return korapResult.getTotalResults();
86 }
margaretha4ec2cd32016-02-29 09:46:36 +000087
margaretha4a5f1c22016-08-03 17:34:32 +020088 @Override
89 public int getRecordCount() {
90 return korapResult.getMatchSize();
91 }
margaretha4ec2cd32016-02-29 09:46:36 +000092
margaretha4a5f1c22016-08-03 17:34:32 +020093 @Override
94 public String getRecordSchemaIdentifier() {
95 return KorapSRU.CLARIN_FCS_RECORD_SCHEMA;
96 }
margaretha4ec2cd32016-02-29 09:46:36 +000097
margaretha4a5f1c22016-08-03 17:34:32 +020098 @Override
99 public boolean nextRecord() throws SRUException {
100 return (++i < korapResult.getMatchSize() ? true : false);
101 }
102
103 @Override
104 public String getRecordIdentifier() {
margaretha43ea7312016-08-08 19:00:23 +0200105 return korapResult.getMatch(i).getMatchId();
margaretha4a5f1c22016-08-03 17:34:32 +0200106 }
107
108 @Override
109 public void writeRecord(XMLStreamWriter writer) throws XMLStreamException {
margarethad7fda432016-08-17 15:49:02 +0200110 KorapMatch match = korapResult.getMatch(i);
margaretha43ea7312016-08-08 19:00:23 +0200111 match.parseMatchId();
margaretha43ea7312016-08-08 19:00:23 +0200112 XMLStreamWriterHelper.writeStartResource(writer, match.getMatchId(),
113 null);
margaretha4a5f1c22016-08-03 17:34:32 +0200114 XMLStreamWriterHelper.writeStartResourceFragment(writer, null, null);
115
116 List<AnnotationLayer> annotationLayers;
margaretha43ea7312016-08-08 19:00:23 +0200117 annotationLayers = parseAnnotations(match);
margaretha4a5f1c22016-08-03 17:34:32 +0200118
119 writeAdvancedDataView(writer, annotationLayers);
120
121 XMLStreamWriterHelper.writeEndResourceFragment(writer);
122 XMLStreamWriterHelper.writeEndResource(writer);
123 }
124
margarethad7fda432016-08-17 15:49:02 +0200125 /**
126 * Parses the current match snippet from KorAP search API into
127 * keyword, left context and right context.
128 *
129 * @return a KorapMatch
130 * @throws XMLStreamException
131 */
132 @Deprecated
margaretha4a5f1c22016-08-03 17:34:32 +0200133 private KorapMatch parseMatch() throws XMLStreamException {
134 KorapMatch match = korapResult.getMatch(i);
135 String snippet = "<snippet>" + match.getSnippet() + "</snippet>";
136 InputStream is = new ByteArrayInputStream(snippet.getBytes());
137 try {
138 saxParser.parse(is, new KorapMatchHandler(match));
139 }
140 catch (SAXException | IOException e) {
141 throw new XMLStreamException(e);
142 }
143 return match;
144 }
145
margarethad7fda432016-08-17 15:49:02 +0200146 /**
147 * Retrieves and parses the annotations of a match from KorAP
148 * MatchInfo API.
149 *
150 * @param match
151 * a KorapMatch
152 * @return a list of annotation layers containing the match
153 * annotations.
154 * @throws XMLStreamException
155 */
margaretha4a5f1c22016-08-03 17:34:32 +0200156 private List<AnnotationLayer> parseAnnotations(KorapMatch match)
margaretha43ea7312016-08-08 19:00:23 +0200157 throws XMLStreamException {
margarethad7fda432016-08-17 15:49:02 +0200158 if (match == null) {
159 throw new NullPointerException("KorapMatch is null.");
160 }
161
margaretha4a5f1c22016-08-03 17:34:32 +0200162 try {
margarethad7fda432016-08-17 15:49:02 +0200163 String annotationSnippet = KorapClient.retrieveAnnotations(
Eliza Margarethae0e40a32016-11-09 19:16:08 +0100164 match.getCorpusId(), match.getDocId(), match.getTextId(),
margarethad7fda432016-08-17 15:49:02 +0200165 match.getPositionId(), "*");
166 InputStream is = new ByteArrayInputStream(
Eliza Margarethae0e40a32016-11-09 19:16:08 +0100167 annotationSnippet.getBytes("UTF-8"));
margaretha4a5f1c22016-08-03 17:34:32 +0200168 saxParser.parse(is, annotationHandler);
169 }
margarethad7fda432016-08-17 15:49:02 +0200170 catch (SAXException | IOException | URISyntaxException e) {
margaretha43ea7312016-08-08 19:00:23 +0200171 throw new XMLStreamException(e);
margaretha4a5f1c22016-08-03 17:34:32 +0200172 }
margaretha43ea7312016-08-08 19:00:23 +0200173
margaretha4a5f1c22016-08-03 17:34:32 +0200174 return annotationHandler.getAnnotationLayers();
175 }
176
margarethad7fda432016-08-17 15:49:02 +0200177 /**
178 * Writes advanced data views, namely segment annotations for each
179 * annotation layer.
180 *
181 * @param writer
182 * an XMLStreamWriter
183 * @param annotationLayers
184 * a list of annotation layers
185 * @throws XMLStreamException
186 */
margaretha4a5f1c22016-08-03 17:34:32 +0200187 private void writeAdvancedDataView(XMLStreamWriter writer,
margaretha43ea7312016-08-08 19:00:23 +0200188 List<AnnotationLayer> annotationLayers) throws XMLStreamException {
margaretha4a5f1c22016-08-03 17:34:32 +0200189
190 AdvancedDataViewWriter helper = new AdvancedDataViewWriter(
191 AdvancedDataViewWriter.Unit.ITEM);
192
193 addAnnotationsToWriter(helper, annotationLayers);
194
195 helper.writeHitsDataView(writer, textLayer.getResultId());
196
197 if (dataviews.contains("adv")) {
198 helper.writeAdvancedDataView(writer);
199 }
margaretha4a5f1c22016-08-03 17:34:32 +0200200 }
margaretha43ea7312016-08-08 19:00:23 +0200201
margarethad7fda432016-08-17 15:49:02 +0200202 /**
203 * Adds all annotations to the AdvancedDataViewWriter.
204 *
205 * @param helper
206 * an AdvancedDataViewWriter
207 * @param annotationLayers
208 * a list of annotation layers containing match
209 * annotations.
210 */
margaretha4a5f1c22016-08-03 17:34:32 +0200211 private void addAnnotationsToWriter(AdvancedDataViewWriter helper,
212 List<AnnotationLayer> annotationLayers) {
margaretha43ea7312016-08-08 19:00:23 +0200213
214 Map<Integer, List<Annotation>> map;
215 for (AnnotationLayer annotationLayer : annotationLayers) {
margaretha4a5f1c22016-08-03 17:34:32 +0200216 map = annotationLayer.getAnnotationMap();
217 Set<Integer> keyset = map.keySet();
218 Integer[] keyArray = keyset.toArray(new Integer[keyset.size()]);
219 Arrays.sort(keyArray);
220 for (int key : keyArray) {
221 List<Annotation> annotations = map.get(key);
222 if (annotations == null) {
223 continue;
224 }
225
226 // FCS advanced dataview does not allow multiple
227 // annotations on the same segment.
margaretha43ea7312016-08-08 19:00:23 +0200228 // for (Annotation annotation : annotations){
margaretha4a5f1c22016-08-03 17:34:32 +0200229 Annotation annotation = annotations.get(0);
margaretha43ea7312016-08-08 19:00:23 +0200230
margarethad7fda432016-08-17 15:49:02 +0200231 if (annotation.getHitLevel() > 0) {
margaretha43ea7312016-08-08 19:00:23 +0200232 helper.addSpan(annotationLayer.getLayerId(),
233 annotation.getStart(), annotation.getEnd(),
234 annotation.getValue(), annotation.getHitLevel());
margaretha4a5f1c22016-08-03 17:34:32 +0200235 }
236 else {
margaretha4a5f1c22016-08-03 17:34:32 +0200237 helper.addSpan(annotationLayer.getLayerId(),
238 annotation.getStart(), annotation.getEnd(),
239 annotation.getValue());
240 }
margaretha4a5f1c22016-08-03 17:34:32 +0200241 }
margaretha43ea7312016-08-08 19:00:23 +0200242 map.clear();
margaretha4a5f1c22016-08-03 17:34:32 +0200243 }
244 }
margaretha4ec2cd32016-02-29 09:46:36 +0000245}