Added advanced dataviews.
Change-Id: I941c73874c3dc79de2eef41dcc0b6eb0678bdd0a
diff --git a/src/main/java/de/mannheim/ids/korap/sru/Annotation.java b/src/main/java/de/mannheim/ids/korap/sru/Annotation.java
new file mode 100644
index 0000000..43081c3
--- /dev/null
+++ b/src/main/java/de/mannheim/ids/korap/sru/Annotation.java
@@ -0,0 +1,62 @@
+package de.mannheim.ids.korap.sru;
+
+/**
+ * @author margaretha
+ *
+ */
+public class Annotation {
+
+ private int id;
+ private long start;
+ private long end;
+ private String value;
+ private boolean isKeyword;
+
+ public Annotation (int id, String value, long start, long end, boolean isKeyword) {
+ this.id = id;
+ this.value = value;
+ this.start = start;
+ this.end = end;
+ this.isKeyword = isKeyword;
+ }
+
+ public int getId() {
+ return id;
+ }
+
+ public void setId(int id) {
+ this.id = id;
+ }
+
+ public long getStart() {
+ return start;
+ }
+
+ public void setStart(long start) {
+ this.start = start;
+ }
+
+ public long getEnd() {
+ return end;
+ }
+
+ public void setEnd(long end) {
+ this.end = end;
+ }
+
+ public String getValue() {
+ return value;
+ }
+
+ public void setValue(String value) {
+ this.value = value;
+ }
+
+ public boolean isKeyword() {
+ return isKeyword;
+ }
+
+ public void setKeyword(boolean isKeyword) {
+ this.isKeyword = isKeyword;
+ }
+}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/AnnotationHandler.java b/src/main/java/de/mannheim/ids/korap/sru/AnnotationHandler.java
new file mode 100644
index 0000000..33b6096
--- /dev/null
+++ b/src/main/java/de/mannheim/ids/korap/sru/AnnotationHandler.java
@@ -0,0 +1,156 @@
+package de.mannheim.ids.korap.sru;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.hamcrest.core.IsSame;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class AnnotationHandler extends DefaultHandler {
+
+ private Logger logger = (Logger) LoggerFactory
+ .getLogger(AnnotationHandler.class);
+
+ private boolean startSegment = true;
+
+ private int matchLevel = 0;
+
+ private List<AnnotationLayer> annotationLayers;
+ private List<String> annotationStrings;
+
+ private StringBuilder segmentBuilder = new StringBuilder();
+ private StringBuilder textBuilder = new StringBuilder();
+ private String text = "";
+
+ private int id;
+ long segmentStartOffset = 0, segmentEndOffset = 0;
+ long textStartOffset = 0, textEndOffset = 0;
+
+ public AnnotationHandler (List<AnnotationLayer> annotationLayers) {
+ this.annotationLayers = annotationLayers;
+ annotationStrings = new ArrayList<String>();
+ id = 1;
+ }
+
+ @Override
+ public void startElement(String uri, String localName, String qName,
+ Attributes attributes) throws SAXException {
+
+ if (qName.equals("mark")) {
+ text = textBuilder.toString();
+ textBuilder = new StringBuilder();
+ if (!text.isEmpty()) {
+ addAnnotationToMap(text, annotationLayers.get(0),
+ (matchLevel > 1), textStartOffset, textEndOffset);
+ textStartOffset = textEndOffset;
+ }
+ matchLevel++;
+ }
+ if (matchLevel > 0 && qName.equals("span")
+ && attributes.getQName(0).equals("title")) {
+ if (startSegment) {
+ segmentStartOffset = segmentEndOffset;
+ startSegment = false;
+ }
+ annotationStrings.add(attributes.getValue("title"));
+
+ }
+ super.startElement(uri, localName, qName, attributes);
+
+ }
+
+ private void parseAnnotation(String annotationStr) {
+ if (annotationStr == null || annotationStr.isEmpty()) return;
+
+ String[] strArr = annotationStr.split(":");
+ if (strArr.length < 2) return;
+
+ String layerCode = strArr[0];
+ String value = strArr[1];
+
+ for (AnnotationLayer annotationLayer : annotationLayers) {
+ if (annotationLayer.getLayerCode().equals(
+ AnnotationLayer.TYPE.TEXT.toString())) {
+ segmentBuilder = new StringBuilder();
+ }
+ else if (annotationLayer.getLayerCode().equals(layerCode)) {
+ addAnnotationToMap(value, annotationLayer, false);
+ break;
+ }
+ }
+ }
+
+ private void addAnnotationToMap(String value,
+ AnnotationLayer annotationLayer, boolean isKeyword) {
+ addAnnotationToMap(value, annotationLayer, isKeyword, segmentStartOffset, segmentEndOffset);
+ }
+
+ private void addAnnotationToMap(String value,
+ AnnotationLayer annotationLayer, boolean isKeyword, long startOffset, long endOffset) {
+
+ Annotation annotation = new Annotation(id, value, startOffset,
+ endOffset, isKeyword);
+
+ Map<Integer, List<Annotation>> map = annotationLayer.getAnnotationMap();
+
+ if (map.containsKey(id)) {
+ map.get(id).add(annotation);
+ }
+ else {
+ List<Annotation> annotations = new ArrayList<Annotation>();
+ annotations.add(annotation);
+ map.put(id, annotations);
+ }
+ }
+
+ @Override
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException {
+
+ if (qName.equals("mark")) {
+
+ annotationLayers.get(0);
+
+ text = textBuilder.toString();
+ textBuilder = new StringBuilder();
+
+ addAnnotationToMap(text, annotationLayers.get(0),
+ (matchLevel > 1), textStartOffset, textEndOffset);
+ textStartOffset = textEndOffset;
+
+ matchLevel--;
+ }
+ else if (!startSegment) {
+ for (String annotationStr : annotationStrings) {
+ parseAnnotation(annotationStr);
+ }
+ id++;
+ startSegment = true;
+ annotationStrings.clear();
+ }
+ }
+
+ @Override
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ if (!startSegment) {
+ segmentBuilder.append(ch, start, length);
+ }
+ textBuilder.append(ch, start, length);
+ segmentEndOffset += length;
+ textEndOffset += length;
+ }
+
+ public List<AnnotationLayer> getAnnotationLayers() {
+ return annotationLayers;
+ }
+
+ public void setAnnotationLayers(List<AnnotationLayer> annotationLayers) {
+ this.annotationLayers = annotationLayers;
+ }
+}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/AnnotationLayer.java b/src/main/java/de/mannheim/ids/korap/sru/AnnotationLayer.java
new file mode 100644
index 0000000..f79ffd8
--- /dev/null
+++ b/src/main/java/de/mannheim/ids/korap/sru/AnnotationLayer.java
@@ -0,0 +1,47 @@
+package de.mannheim.ids.korap.sru;
+
+import java.net.URI;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class AnnotationLayer {
+
+ public static enum TYPE {
+ TEXT, POS, LEMMA;
+
+ public String toString() {
+ return super.toString().toLowerCase();
+ };
+ }
+
+ private String layerCode;
+ private URI layerId;
+ private Map<Integer,List<Annotation>> annotationMap;
+
+ public AnnotationLayer (String layerCode, URI layerId) {
+ this.layerCode = layerCode;
+ this.layerId = layerId;
+ this.annotationMap = new HashMap<Integer,List<Annotation>>();
+ }
+ public String getLayerCode() {
+ return layerCode;
+ }
+ public void setLayerCode(String layerCode) {
+ this.layerCode = layerCode;
+ }
+
+ public Map<Integer,List<Annotation>> getAnnotationMap() {
+ return annotationMap;
+ }
+ public void setAnnotationMap(Map<Integer,List<Annotation>> annotationMap) {
+ this.annotationMap = annotationMap;
+ }
+
+ public URI getLayerId() {
+ return layerId;
+ }
+ public void setLayerId(URI layerId) {
+ this.layerId = layerId;
+ }
+}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/KorapClient.java b/src/main/java/de/mannheim/ids/korap/sru/KorapClient.java
index fff2e5d..69c3c0a 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/KorapClient.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/KorapClient.java
@@ -27,189 +27,189 @@
import com.fasterxml.jackson.databind.ObjectMapper;
public class KorapClient {
-
- private static final String SERVICE_URI = "http://10.0.10.13:7070/api/v0.1/";
- private String CONTEXT_TYPE = "sentence";
-
- private int defaultNumOfRecords = 10;
- private int defaultMaxRecords = 10;
-
- private ObjectMapper objectMapper;
- private Logger logger = (Logger) LoggerFactory.getLogger(KorapClient.class);
-
- public KorapClient(int numOfRecords, int maxRecords) {
- this.objectMapper = new ObjectMapper();
- this.defaultNumOfRecords = numOfRecords;
- this.defaultMaxRecords = maxRecords;
- }
-
- public JsonNode retrieveResources() throws URISyntaxException, IOException {
-
- URIBuilder builder = new URIBuilder(SERVICE_URI+"VirtualCollection");
- //builder.addParameter("type", "VirtualCollection");
- URI uri = builder.build();
- logger.info("Resource URI: "+ uri.toString());
- HttpGet httpRequest = new HttpGet(uri);
-
- CloseableHttpClient client = HttpClients.createDefault();
- CloseableHttpResponse response = null;
- JsonNode resources = null;
-
- try {
- response = client.execute(httpRequest);
-
- int statusCode = response.getStatusLine().getStatusCode();
- if (statusCode != HttpStatus.SC_OK){
- logger.warn("Error response code: "+statusCode);
- logger.warn("Error message: "+response.getStatusLine().getReasonPhrase());
- throw new HttpResponseException(statusCode,
- response.getStatusLine().getReasonPhrase()
- );
- }
-
- BufferedInputStream jsonStream = new BufferedInputStream(
- response.getEntity().getContent() );
- try {
- resources = objectMapper.readValue(jsonStream, JsonNode.class);
- } catch (JsonParseException | JsonMappingException e) {
- throw e;
- }
- finally{
- jsonStream.close();
- }
- }
- finally{
- response.close();
- }
-
- return resources;
- }
-
-
- public KorapResult query(String query, QueryLanguage queryLanguage,
- String version, int startRecord, int maximumRecords,
- String[] corpora) throws HttpResponseException, IOException {
-
- checkQuery(query, startRecord, maximumRecords);
-
- HttpUriRequest httpRequest = null;
-
- /*if (corpora != null){
- // create virtual collection
- logger.info("Select collection");
- CollectionQuery collectionQuery = new CollectionQuery()
- .addMetaFilter("corpusID", DEFAULT_COLLECTION);
-
- logger.info("create JsonLD");
- QuerySerializer ss = new QuerySerializer()
- .setQuery(query, QUERY_LANGUAGE,version)
- .setCollection(collectionQuery)
- .setMeta(CONTEXT_TYPE, CONTEXT_TYPE,
- CONTEXT_SIZE, CONTEXT_SIZE, 5, startRecord-1);
-
- String jsonld=ss.build();
- logger.info(jsonld);
-
- HttpPost post = new HttpPost(SERVICE_URI+"_raw");
- post.setEntity(new StringEntity(jsonld));
- httpRequest = post;
- }
- else {*/
-
- try {
- httpRequest = createRequest(query, queryLanguage, version,
- startRecord - 1,
- maximumRecords);
- } catch (URISyntaxException e) {
- throw new IOException("Failed creating http request.");
- }
- //}
-
- CloseableHttpClient client = HttpClients.createDefault();
- CloseableHttpResponse response = null;
- KorapResult result = null;
- try {
- response = client.execute(httpRequest);
-
- int statusCode = response.getStatusLine().getStatusCode();
- if (statusCode != HttpStatus.SC_OK){
- logger.warn("Error response code: "+statusCode);
- logger.warn("Error message: "+response.getStatusLine().getReasonPhrase());
- String[] errorMsg = parseError(response);
- logger.warn(errorMsg[0] +"#" +errorMsg[1]);
- throw new HttpResponseException(Integer.parseInt(errorMsg[0]),
- errorMsg[1]);
- }
-
- BufferedInputStream jsonStream = new BufferedInputStream(
- response.getEntity().getContent() );
- try {
- result = objectMapper.readValue(jsonStream, KorapResult.class);
- } catch (IOException e) {
- throw new IOException("Failed processing response.");
- }
- finally{
- jsonStream.close();
- }
- }
- finally{
- response.close();
- }
-
- return result;
- }
-
- private String[] parseError(CloseableHttpResponse response)
- throws IOException{
- InputStream is = response.getEntity().getContent();
- JsonNode node = objectMapper.readTree(is);
- String message = node.get("error").textValue();
- String[] errorItems;
- if (message.contains("SRU diagnostic")) {
- errorItems = message.split(":", 2);
- errorItems[0] = errorItems[0].replace("SRU diagnostic ", "");
- errorItems[1] = errorItems[1].trim();
- }
- else if (message.contains("not a supported query language")){
- errorItems = new String[]{"4",
- "KorAP does not support the query language."};
- }
- else {
- errorItems = new String[]{"1", message};
- }
-
- return errorItems;
- }
-
- private HttpGet createRequest(String query, QueryLanguage queryLanguage,
- String version, int startRecord, int maximumRecords)
- throws URISyntaxException {
- if (maximumRecords <= 0) {
- maximumRecords = defaultNumOfRecords;
- } else if (maximumRecords > defaultMaxRecords) {
- logger.info("limit truncated from {} to {}", maximumRecords,
- defaultMaxRecords);
+ private static final String SERVICE_URI = "http://10.0.10.13:7070/api/v0.1/";
+ private String CONTEXT_TYPE = "sentence";
+
+ private int defaultNumOfRecords = 10;
+ private int defaultMaxRecords = 10;
+
+ private static ObjectMapper objectMapper = new ObjectMapper();
+ private static Logger logger = (Logger) LoggerFactory.getLogger(KorapClient.class);
+
+ public KorapClient (int numOfRecords, int maxRecords) {
+ this.defaultNumOfRecords = numOfRecords;
+ this.defaultMaxRecords = maxRecords;
+ }
+
+ public JsonNode retrieveResources() throws URISyntaxException, IOException {
+
+ URIBuilder builder = new URIBuilder(SERVICE_URI + "VirtualCollection");
+ // builder.addParameter("type", "VirtualCollection");
+ URI uri = builder.build();
+ logger.info("Resource URI: " + uri.toString());
+ HttpGet httpRequest = new HttpGet(uri);
+
+ CloseableHttpClient client = HttpClients.createDefault();
+ CloseableHttpResponse response = null;
+ JsonNode resources = null;
+
+ try {
+ response = client.execute(httpRequest);
+
+ int statusCode = response.getStatusLine().getStatusCode();
+ if (statusCode != HttpStatus.SC_OK) {
+ logger.warn("Error response code: " + statusCode);
+ logger.warn("Error message: "
+ + response.getStatusLine().getReasonPhrase());
+ throw new HttpResponseException(statusCode, response
+ .getStatusLine().getReasonPhrase());
+ }
+
+ BufferedInputStream jsonStream = new BufferedInputStream(response
+ .getEntity().getContent());
+ try {
+ resources = objectMapper.readValue(jsonStream, JsonNode.class);
+ }
+ catch (JsonParseException | JsonMappingException e) {
+ throw e;
+ }
+ finally {
+ jsonStream.close();
+ }
+ }
+ finally {
+ response.close();
+ }
+
+ return resources;
+ }
+
+ public KorapResult query(String query, QueryLanguage queryLanguage,
+ String version, int startRecord, int maximumRecords,
+ String[] corpora) throws HttpResponseException, IOException {
+
+ checkQuery(query, startRecord, maximumRecords);
+
+ HttpUriRequest httpRequest = null;
+
+ /*
+ * if (corpora != null){ // create virtual collection
+ * logger.info("Select collection"); CollectionQuery
+ * collectionQuery = new CollectionQuery()
+ * .addMetaFilter("corpusID", DEFAULT_COLLECTION);
+ *
+ * logger.info("create JsonLD"); QuerySerializer ss = new
+ * QuerySerializer() .setQuery(query, QUERY_LANGUAGE,version)
+ * .setCollection(collectionQuery) .setMeta(CONTEXT_TYPE,
+ * CONTEXT_TYPE, CONTEXT_SIZE, CONTEXT_SIZE, 5,
+ * startRecord-1);
+ *
+ * String jsonld=ss.build(); logger.info(jsonld);
+ *
+ * HttpPost post = new HttpPost(SERVICE_URI+"_raw");
+ * post.setEntity(new StringEntity(jsonld)); httpRequest =
+ * post; } else {
+ */
+
+ try {
+ httpRequest = createSearchRequest(query, queryLanguage, version,
+ startRecord - 1, maximumRecords);
+ }
+ catch (URISyntaxException e) {
+ throw new IOException("Failed creating http request.");
+ }
+ // }
+
+ CloseableHttpClient client = HttpClients.createDefault();
+ CloseableHttpResponse response = null;
+ KorapResult result = null;
+ try {
+ response = client.execute(httpRequest);
+
+ int statusCode = response.getStatusLine().getStatusCode();
+ if (statusCode != HttpStatus.SC_OK) {
+ logger.warn("Error response code: " + statusCode);
+ logger.warn("Error message: "
+ + response.getStatusLine().getReasonPhrase());
+ String[] errorMsg = parseError(response);
+ logger.warn(errorMsg[0] + "#" + errorMsg[1]);
+ throw new HttpResponseException(Integer.parseInt(errorMsg[0]),
+ errorMsg[1]);
+ }
+
+ BufferedInputStream jsonStream = new BufferedInputStream(response
+ .getEntity().getContent());
+ try {
+ result = objectMapper.readValue(jsonStream, KorapResult.class);
+ }
+ catch (IOException e) {
+ throw new IOException("Failed processing response.");
+ }
+ finally {
+ jsonStream.close();
+ }
+ }
+ finally {
+ response.close();
+ }
+
+ return result;
+ }
+
+ private static String[] parseError(CloseableHttpResponse response)
+ throws IOException {
+ InputStream is = response.getEntity().getContent();
+ JsonNode node = objectMapper.readTree(is);
+ String message = node.get("error").textValue();
+ String[] errorItems;
+ if (message.contains("SRU diagnostic")) {
+ errorItems = message.split(":", 2);
+ errorItems[0] = errorItems[0].replace("SRU diagnostic ", "");
+ errorItems[1] = errorItems[1].trim();
+ }
+ else if (message.contains("not a supported query language")) {
+ errorItems = new String[] { "4",
+ "KorAP does not support the query language." };
+ }
+ else {
+ errorItems = new String[] { "1", message };
+ }
+
+ return errorItems;
+ }
+
+ private HttpGet createSearchRequest(String query,
+ QueryLanguage queryLanguage, String version, int startRecord,
+ int maximumRecords) throws URISyntaxException {
+
+ if (maximumRecords <= 0) {
+ maximumRecords = defaultNumOfRecords;
+ }
+ else if (maximumRecords > defaultMaxRecords) {
+ logger.info("limit truncated from {} to {}", maximumRecords,
+ defaultMaxRecords);
maximumRecords = defaultMaxRecords;
- }
-
- List<NameValuePair> params = new ArrayList<NameValuePair>();
- params.add(new BasicNameValuePair("q", query));
- params.add(new BasicNameValuePair("ql", queryLanguage.toString()));
- params.add(new BasicNameValuePair("v", version));
- params.add(new BasicNameValuePair("context", CONTEXT_TYPE));
- params.add(new BasicNameValuePair("count", String.valueOf(maximumRecords)));
- params.add(new BasicNameValuePair("offset", String.valueOf(startRecord)));
-
- URIBuilder builder = new URIBuilder(SERVICE_URI + "search");
- builder.addParameters(params);
- URI uri = builder.build();
- logger.info("Query URI: "+ uri.toString());
- HttpGet request = new HttpGet(uri);
- return request;
- }
+ }
- private void checkQuery(String query, int startRecord, int maxRecord) {
+ List<NameValuePair> params = new ArrayList<NameValuePair>();
+ params.add(new BasicNameValuePair("q", query));
+ params.add(new BasicNameValuePair("ql", queryLanguage.toString()));
+ params.add(new BasicNameValuePair("v", version));
+ params.add(new BasicNameValuePair("context", CONTEXT_TYPE));
+ params.add(new BasicNameValuePair("count", String
+ .valueOf(maximumRecords)));
+ params.add(new BasicNameValuePair("offset", String.valueOf(startRecord)));
+
+ URIBuilder builder = new URIBuilder(SERVICE_URI + "search");
+ builder.addParameters(params);
+ URI uri = builder.build();
+ logger.info("Query URI: " + uri.toString());
+ HttpGet request = new HttpGet(uri);
+ return request;
+ }
+
+ private void checkQuery(String query, int startRecord, int maxRecord) {
if (query == null) {
throw new NullPointerException("Query == null.");
}
@@ -221,6 +221,74 @@
}
if (maxRecord < 1) {
throw new IllegalArgumentException("Maximum records is too low.");
- }
- }
+ }
+ }
+
+
+ public static String retrieveAnnotations(KorapMatch match) throws IOException {
+ HttpUriRequest httpRequest;
+ try {
+ httpRequest = createMatchInfoRequest(match.getCorpusID(), match.getDocID(), match.getPositionID(), "*");
+ }
+ catch (URISyntaxException e) {
+ throw new IOException("Failed creating http request for retrieving annotations.");
+ }
+
+ String annotationSnippet = null;
+
+ CloseableHttpClient client = HttpClients.createDefault();
+ CloseableHttpResponse response = null;
+ try {
+ response = client.execute(httpRequest);
+
+ int statusCode = response.getStatusLine().getStatusCode();
+ if (statusCode != HttpStatus.SC_OK) {
+ logger.warn("Error response code: " + statusCode);
+ logger.warn("Error message: "
+ + response.getStatusLine().getReasonPhrase());
+ String[] errorMsg = parseError(response);
+ logger.warn(errorMsg[0] + "#" + errorMsg[1]);
+ throw new HttpResponseException(Integer.parseInt(errorMsg[0]),
+ errorMsg[1]);
+ }
+
+ BufferedInputStream jsonStream = new BufferedInputStream(response
+ .getEntity().getContent());
+ try {
+ JsonNode root = objectMapper.readTree(jsonStream);
+ annotationSnippet = "<snippet>" + root.at("/snippet").asText() + "</snippet>";
+ }
+ catch (IOException e) {
+ throw new IOException("Failed processing response from KorAP match info API.");
+ }
+ finally {
+ jsonStream.close();
+ }
+ }
+ finally {
+ response.close();
+ }
+ return annotationSnippet;
+ }
+
+ private static HttpGet createMatchInfoRequest(String resourceId,
+ String documentId, String matchId, String foundry) throws URISyntaxException {
+ StringBuilder sb = new StringBuilder();
+ sb.append("corpus/");
+ sb.append(resourceId);
+ sb.append("/");
+ sb.append(documentId);
+ sb.append("/");
+ sb.append(matchId);
+ sb.append("/matchInfo?foundry=");
+ sb.append(foundry);
+ sb.append("&spans=false");
+
+ URIBuilder builder = new URIBuilder(SERVICE_URI + sb.toString());
+ URI uri = builder.build();
+ logger.info("Query URI: " + uri.toString());
+ HttpGet request = new HttpGet(uri);
+ return request;
+
+ }
}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/KorapEndpointDescription.java b/src/main/java/de/mannheim/ids/korap/sru/KorapEndpointDescription.java
index 76a36f6..ab6bbe7 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/KorapEndpointDescription.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/KorapEndpointDescription.java
@@ -18,136 +18,173 @@
import eu.clarin.sru.server.SRUConstants;
import eu.clarin.sru.server.SRUException;
import eu.clarin.sru.server.fcs.DataView;
-import eu.clarin.sru.server.fcs.DataView.DeliveryPolicy;
import eu.clarin.sru.server.fcs.utils.SimpleEndpointDescriptionParser;
+import eu.clarin.sru.server.fcs.DataView.DeliveryPolicy;
import eu.clarin.sru.server.fcs.EndpointDescription;
import eu.clarin.sru.server.fcs.Layer;
import eu.clarin.sru.server.fcs.ResourceInfo;
public class KorapEndpointDescription implements EndpointDescription {
- private List<DataView> dataviews;
- private List<URI> capabilities;
- private List<String> languages;
-
- private String defaultDataview = "hits";
- private List<Layer> layers;
+ private List<DataView> dataviews;
+ private List<URI> capabilities;
+ private List<String> languages;
- public KorapEndpointDescription(ServletContext context)
- throws SRUConfigException {
- try {
- URL url = context.getResource("/WEB-INF/endpoint-description.xml");
- EndpointDescription simpleEndpointDescription = SimpleEndpointDescriptionParser
- .parse(url);
- if (simpleEndpointDescription != null) {
- setSupportedLayers(simpleEndpointDescription
- .getSupportedLayers());
- setSupportedDataViews(simpleEndpointDescription
- .getSupportedDataViews());
- setCapabilities(simpleEndpointDescription.getCapabilities());
- }
+ private List<String> defaultDataviews;
+ private List<Layer> layers;
+
+ private List<AnnotationLayer> annotationLayers;
- } catch (MalformedURLException e) {
- throw new SRUConfigException(
- "error initializing resource info inventory", e);
- }
- setLanguages();
- }
+ public KorapEndpointDescription (ServletContext context)
+ throws SRUConfigException {
+ try {
+ URL url = context.getResource("/WEB-INF/endpoint-description.xml");
+ EndpointDescription simpleEndpointDescription = SimpleEndpointDescriptionParser
+ .parse(url);
+ if (simpleEndpointDescription != null) {
+ setSupportedLayers(simpleEndpointDescription
+ .getSupportedLayers());
+ setAnnotationLayers(simpleEndpointDescription.getSupportedLayers());
+ setSupportedDataViews(simpleEndpointDescription
+ .getSupportedDataViews());
+ setDefaultDataViews(simpleEndpointDescription
+ .getSupportedDataViews());
+ setCapabilities(simpleEndpointDescription.getCapabilities());
+ }
- @Override
- public void destroy() {
- dataviews.clear();
- capabilities.clear();
- languages.clear();
- }
+ }
+ catch (MalformedURLException e) {
+ throw new SRUConfigException(
+ "error initializing resource info inventory", e);
+ }
+ setLanguages();
+ }
- public void setLanguages() {
- languages = new ArrayList<String>();
- languages.add("deu");
- }
+ @Override
+ public void destroy() {
+ dataviews.clear();
+ capabilities.clear();
+ languages.clear();
+ }
- @Override
- public List<URI> getCapabilities() {
- return capabilities;
- }
+ public void setLanguages() {
+ languages = new ArrayList<String>();
+ languages.add("deu");
+ }
- public void setCapabilities(List<URI> list) throws SRUConfigException {
- capabilities = list;
-// new ArrayList<URI>();
-// try {
-// capabilities.add(new URI(
-// "http://clarin.eu/fcs/capability/basic-search"));
-// } catch (URISyntaxException e) {
-// throw new SRUConfigException("Found an invalid capability URI.");
-// }
- }
+ @Override
+ public List<URI> getCapabilities() {
+ return capabilities;
+ }
- @Override
- public List<DataView> getSupportedDataViews() {
- return dataviews;
- }
+ public void setCapabilities(List<URI> list) throws SRUConfigException {
+ capabilities = list;
+ }
- public void setSupportedDataViews(List<DataView> list) {
- dataviews = list;
+ @Override
+ public List<DataView> getSupportedDataViews() {
+ return dataviews;
+ }
- // new ArrayList<DataView>();
- // dataviews.add(new DataView("hits",
- // "application/x-clarin-fcs-hits+xml",
- // DeliveryPolicy.SEND_BY_DEFAULT));
- // dataviews.add(new DataView("kwic",
- // "application/x-clarin-fcs-kwic+xml",
- // DeliveryPolicy.NEED_TO_REQUEST));
- }
+ public void setSupportedDataViews(List<DataView> list) {
+ dataviews = list;
+ }
- @Override
- public List<ResourceInfo> getResourceList(String pid) throws SRUException {
+ @Override
+ public List<ResourceInfo> getResourceList(String pid) throws SRUException {
- List<ResourceInfo> resourceList = new ArrayList<ResourceInfo>();
-
- Map<String,String> title;
- Map<String,String> description;
+ List<ResourceInfo> resourceList = new ArrayList<ResourceInfo>();
- JsonNode resources;
+ Map<String, String> title;
+ Map<String, String> description;
- try {
- resources = KorapSRU.korapClient.retrieveResources();
- } catch (URISyntaxException | IOException e) {
- throw new SRUException(SRUConstants.SRU_GENERAL_SYSTEM_ERROR,
- "Failed retrieving resources.");
- }
+ JsonNode resources;
- for (JsonNode r : resources) {
- title = new HashMap<String, String>();
- title.put("de", r.get("name").asText());
- title.put("en", r.get("name").asText());
+ try {
+ resources = KorapSRU.korapClient.retrieveResources();
+ }
+ catch (URISyntaxException | IOException e) {
+ throw new SRUException(SRUConstants.SRU_GENERAL_SYSTEM_ERROR,
+ "Failed retrieving resources.");
+ }
- description = new HashMap<String, String>();
- description.put("de", r.get("description").asText());
+ for (JsonNode r : resources) {
+ title = new HashMap<String, String>();
+ title.put("de", r.get("name").asText());
+ title.put("en", r.get("name").asText());
- ResourceInfo ri = new ResourceInfo(r.get("id").asText(), title,
- description, KorapSRU.KORAP_WEB_URL, languages, dataviews,
- this.getSupportedLayers(), null);
- resourceList.add(ri);
- }
-
- return resourceList;
- }
+ description = new HashMap<String, String>();
+ description.put("de", r.get("description").asText());
- public String getDefaultDataView() {
- return defaultDataview;
- }
+ ResourceInfo ri = new ResourceInfo(r.get("id").asText(), title,
+ description, KorapSRU.KORAP_WEB_URL, languages, dataviews,
+ this.getSupportedLayers(), null);
+ resourceList.add(ri);
+ }
- public void setDefaultDataView(String defaultDataview) {
- this.defaultDataview = defaultDataview;
- }
+ return resourceList;
+ }
- public void setSupportedLayers(List<Layer> list) {
- this.layers = list;
- }
+ public List<String> getDefaultDataViews() {
+ return defaultDataviews;
+ }
- @Override
- public List<Layer> getSupportedLayers() {
- return layers;
- }
+ public void setDefaultDataViews(List<DataView> supportedDataViews) {
+ defaultDataviews = new ArrayList<String>();
+ for (DataView d : supportedDataViews) {
+ if (d.getDeliveryPolicy() == DeliveryPolicy.SEND_BY_DEFAULT) {
+ defaultDataviews.add(d.getIdentifier());
+ }
+ }
+ }
+ public void setSupportedLayers(List<Layer> layers) {
+ this.layers = layers;
+ }
+
+ @Override
+ public List<Layer> getSupportedLayers() {
+ return layers;
+ }
+
+ public List<AnnotationLayer> getAnnotationLayers() {
+ return annotationLayers;
+ }
+
+ public void setAnnotationLayers(List<Layer> layers) {
+ annotationLayers = new ArrayList<AnnotationLayer>(layers.size());
+
+ String layerCode;
+
+ for (Layer l : layers) {
+
+ String type = l.getType();
+ if (type.equals(AnnotationLayer.TYPE.TEXT.toString())){
+ layerCode = type;
+ }
+ else{
+ StringBuilder sb = new StringBuilder();
+ String qualifier = l.getQualifier();
+
+ if (qualifier != null) {
+ sb.append(qualifier);
+
+ if (type.equals(AnnotationLayer.TYPE.POS.toString())) {
+ sb.append("/p");
+ }
+ else if (type.equals(AnnotationLayer.TYPE.LEMMA.toString())) {
+ sb.append("/l");
+ }
+ else {
+ continue;
+ }
+ }
+ layerCode = sb.toString();
+ }
+
+ AnnotationLayer annotationLayer = new AnnotationLayer(
+ layerCode, l.getResultId());
+ annotationLayers.add(annotationLayer);
+ }
+ }
}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/KorapMatch.java b/src/main/java/de/mannheim/ids/korap/sru/KorapMatch.java
index 5c960af..cf1db12 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/KorapMatch.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/KorapMatch.java
@@ -1,83 +1,108 @@
package de.mannheim.ids.korap.sru;
+import java.util.ArrayList;
+import java.util.List;
+
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
@JsonIgnoreProperties(ignoreUnknown = true)
public class KorapMatch {
-
- private String ID;
- private String docID;
- private String leftContext;
- private String keyword;
- private String rightContext;
- private String snippet;
- private String text;
-
- public KorapMatch() {}
-
- public KorapMatch(String source, String leftContext, String keyword,
- String rightContext) {
- this.docID = source;
- this.leftContext = leftContext;
- this.keyword = keyword;
- this.rightContext = rightContext;
- }
- @JsonProperty("ID")
- public String getID() {
- return ID;
- }
- public void setID(String id) {
- this.ID = id;
- }
+ private String ID;
+ private String positionID;
+ private String docID;
+ private String corpusID;
+ private String leftContext;
+ private String keyword;
+ private String rightContext;
+ private String snippet;
+ private String text;
- public String getDocID() {
- return docID;
- }
+ private List<AnnotationLayer> annotationLayers = new ArrayList<AnnotationLayer>();
- public void setDocID(String docID) {
- this.docID = docID;
- }
+ public KorapMatch () {}
- public String getLeftContext() {
- return leftContext;
- }
+ @JsonProperty("ID")
+ public String getID() {
+ return ID;
+ }
- public String getKeyword() {
- return keyword;
- }
+ public void setID(String id) {
+ this.ID = id;
+ }
- public String getRightContext() {
- return rightContext;
- }
+ public void setPositionID() {
+ String[] idParts = ID.split("-");
+ this.positionID = idParts[2] + "-" + idParts[3];
+ }
- public void setLeftContext(String leftContext) {
- this.leftContext = leftContext;
- }
+ public String getPositionID() {
+ return positionID;
+ }
- public void setKeyword(String keyword) {
- this.keyword = keyword;
- }
+ public String getDocID() {
+ return docID;
+ }
- public void setRightContext(String rightContext) {
- this.rightContext = rightContext;
- }
+ public void setDocID(String docID) {
+ this.docID = docID.replace(corpusID + "_", "");
+ }
- public String getSnippet() {
- return snippet;
- }
+ public String getCorpusID() {
+ return corpusID;
+ }
- public void setSnippet(String snippet) {
- this.snippet = snippet;
- }
+ public void setCorpusID(String corpusID) {
+ this.corpusID = corpusID;
+ }
- public String getText() {
- return text;
- }
+ public String getLeftContext() {
+ return leftContext;
+ }
- public void setText(String text) {
- this.text = text;
- }
+ public String getKeyword() {
+ return keyword;
+ }
+
+ public String getRightContext() {
+ return rightContext;
+ }
+
+ public void setLeftContext(String leftContext) {
+ this.leftContext = leftContext;
+ }
+
+ public void setKeyword(String keyword) {
+ this.keyword = keyword;
+ }
+
+ public void setRightContext(String rightContext) {
+ this.rightContext = rightContext;
+ }
+
+ public String getSnippet() {
+ return snippet;
+ }
+
+ public void setSnippet(String snippet) {
+ this.snippet = snippet;
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public void setText(String text) {
+ this.text = text;
+ }
+
+ public List<AnnotationLayer> getAnnotationLayers() {
+ return annotationLayers;
+ }
+
+ public void setAnnotationLayers(List<AnnotationLayer> annotationLayers) {
+ this.annotationLayers = annotationLayers;
+ }
}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/KorapSRU.java b/src/main/java/de/mannheim/ids/korap/sru/KorapSRU.java
index f57f6f9..4982161 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/KorapSRU.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/KorapSRU.java
@@ -1,6 +1,7 @@
package de.mannheim.ids.korap.sru;
import java.io.IOException;
+import java.util.List;
import java.util.Map;
import javax.servlet.ServletContext;
@@ -80,10 +81,12 @@
checkRequestRecordSchema(request);
- String dataview = korapEndpointDescription.getDefaultDataView();
+ List<String> dataviews = korapEndpointDescription.getDefaultDataViews();
if (request.getExtraRequestDataNames().contains("x-fcs-dataviews")) {
- dataview = getRequestDataView(
- request.getExtraRequestData("x-fcs-dataviews"), diagnostics);
+ String extraDataview = getRequestDataView(
+ request.getExtraRequestData("x-fcs-dataviews"), diagnostics);
+ if (extraDataview!=null)
+ dataviews.add(extraDataview);
}
String queryType = request.getQueryType();
@@ -161,7 +164,8 @@
e.getMessage());
}
- return new KorapSRUSearchResultSet(diagnostics, korapResult, dataview);
+ return new KorapSRUSearchResultSet(diagnostics, korapResult, dataviews,
+ korapEndpointDescription);
}
private String[] getCorporaList(SRURequest request) {
@@ -204,10 +208,10 @@
diagnostics.addDiagnostic(
Constants.FCS_DIAGNOSTIC_REQUESTED_DATA_VIEW_INVALID,
"The requested Data View " + requestDataview
- + " is not supported.", "The default Data View "
- + korapEndpointDescription.getDefaultDataView()
- + " is used.");
+ + " is not supported.", "Using the default Data View(s): "
+ + korapEndpointDescription.getDefaultDataViews()
+ + " .");
}
- return korapEndpointDescription.getDefaultDataView();
+ return null;
}
}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/KorapSRUSearchResultSet.java b/src/main/java/de/mannheim/ids/korap/sru/KorapSRUSearchResultSet.java
index 78a2309..16b3a1b 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/KorapSRUSearchResultSet.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/KorapSRUSearchResultSet.java
@@ -3,6 +3,10 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
@@ -18,81 +22,196 @@
import eu.clarin.sru.server.SRUDiagnosticList;
import eu.clarin.sru.server.SRUException;
import eu.clarin.sru.server.SRUSearchResultSet;
+import eu.clarin.sru.server.fcs.AdvancedDataViewWriter;
+import eu.clarin.sru.server.fcs.Layer;
import eu.clarin.sru.server.fcs.XMLStreamWriterHelper;
public class KorapSRUSearchResultSet extends SRUSearchResultSet {
- private Logger logger = (Logger) LoggerFactory
- .getLogger(KorapSRUSearchResultSet.class);
-
- private int i = -1;
- private KorapResult korapResult;
- private String dataview;
- private SAXParser saxParser;
+ private Logger logger = (Logger) LoggerFactory
+ .getLogger(KorapSRUSearchResultSet.class);
- public KorapSRUSearchResultSet(SRUDiagnosticList diagnostics,
- KorapResult korapResult, String dataview)
- throws SRUException {
- super(diagnostics);
+ private int i = -1;
+ private KorapResult korapResult;
+ private List<String> dataviews;
+ private KorapEndpointDescription endpointDescription;
+ private SAXParser saxParser;
+
+ Layer textLayer;
- SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
- try {
- saxParser = saxParserFactory.newSAXParser();
- } catch (ParserConfigurationException | SAXException e) {
- throw new SRUException(SRUConstants.SRU_GENERAL_SYSTEM_ERROR);
- }
+ public KorapSRUSearchResultSet (SRUDiagnosticList diagnostics,
+ KorapResult korapResult, List<String> dataviews,
+ KorapEndpointDescription korapEndpointDescription)
+ throws SRUException {
+ super(diagnostics);
- this.korapResult = korapResult;
- this.dataview = dataview;
- }
+ SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
+ try {
+ saxParser = saxParserFactory.newSAXParser();
+ }
+ catch (ParserConfigurationException | SAXException e) {
+ throw new SRUException(SRUConstants.SRU_GENERAL_SYSTEM_ERROR,e);
+ }
- @Override
- public int getTotalRecordCount() {
- return korapResult.getTotalResults();
- }
+ this.korapResult = korapResult;
+ this.dataviews = dataviews;
+ this.endpointDescription = korapEndpointDescription;
- @Override
- public int getRecordCount() {
- return korapResult.getMatchSize();
- }
+ textLayer = endpointDescription.getSupportedLayers().get(0);
+ }
- @Override
- public String getRecordSchemaIdentifier() {
- return KorapSRU.CLARIN_FCS_RECORD_SCHEMA;
- }
+ @Override
+ public int getTotalRecordCount() {
+ return korapResult.getTotalResults();
+ }
- @Override
- public boolean nextRecord() throws SRUException {
- return (++i < korapResult.getMatchSize() ? true : false);
- }
+ @Override
+ public int getRecordCount() {
+ return korapResult.getMatchSize();
+ }
- @Override
- public String getRecordIdentifier() {
- return korapResult.getMatch(i).getID();
- }
+ @Override
+ public String getRecordSchemaIdentifier() {
+ return KorapSRU.CLARIN_FCS_RECORD_SCHEMA;
+ }
- @Override
- public void writeRecord(XMLStreamWriter writer) throws XMLStreamException {
- KorapMatch match = korapResult.getMatch(i);
- String snippet = "<snippet>" + match.getSnippet() + "</snippet>";
- InputStream is = new ByteArrayInputStream(snippet.getBytes());
- try {
- saxParser.parse(is, new KorapMatchHandler(match));
- } catch (SAXException | IOException e) {
- // /throw e;
- }
-
- if (dataview.equals("kwic")) {
- XMLStreamWriterHelper.writeResourceWithKWICDataView(writer,
- match.getID(), KorapSRU.redirectBaseURI + match.getID(),
- match.getLeftContext(), match.getKeyword(),
- match.getRightContext());
-
- } else {
- XMLStreamWriterHelper.writeResourceWithHitsDataView(writer,
- match.getID(), KorapSRU.redirectBaseURI + match.getID(),
- match.getLeftContext(), match.getKeyword(),
- match.getRightContext());
- }
- }
+ @Override
+ public boolean nextRecord() throws SRUException {
+ return (++i < korapResult.getMatchSize() ? true : false);
+ }
+
+ @Override
+ public String getRecordIdentifier() {
+ return korapResult.getMatch(i).getID();
+ }
+
+ @Override
+ public void writeRecord(XMLStreamWriter writer) throws XMLStreamException {
+ KorapMatch match;
+ match = parseMatch();
+ match.setPositionID();
+
+ XMLStreamWriterHelper.writeStartResource(writer, match.getID(), null);
+ XMLStreamWriterHelper.writeStartResourceFragment(writer, null, null);
+
+ List<AnnotationLayer> annotationLayers;
+ try {
+ annotationLayers = parseAnnotations(match);
+ }
+ catch (SRUException e) {
+ throw new XMLStreamException(e);
+ }
+
+ writeAdvancedDataView(writer, annotationLayers);
+
+ XMLStreamWriterHelper.writeEndResourceFragment(writer);
+ XMLStreamWriterHelper.writeEndResource(writer);
+ }
+
+ private KorapMatch parseMatch() throws XMLStreamException {
+ KorapMatch match = korapResult.getMatch(i);
+ String snippet = "<snippet>" + match.getSnippet() + "</snippet>";
+ InputStream is = new ByteArrayInputStream(snippet.getBytes());
+ try {
+ saxParser.parse(is, new KorapMatchHandler(match));
+ }
+ catch (SAXException | IOException e) {
+ throw new XMLStreamException(e);
+ }
+ return match;
+ }
+
+ private List<AnnotationLayer> parseAnnotations(KorapMatch match)
+ throws SRUException {
+ String annotationSnippet;
+ AnnotationHandler annotationHandler = new AnnotationHandler(endpointDescription.getAnnotationLayers());
+ InputStream is;
+
+ try {
+ annotationSnippet = KorapClient.retrieveAnnotations(match);
+ }
+ catch (IOException e) {
+ throw new SRUException(SRUConstants.SRU_GENERAL_SYSTEM_ERROR,
+ e);
+ }
+
+ is = new ByteArrayInputStream(annotationSnippet.getBytes());
+
+ try {
+ saxParser.parse(is, annotationHandler);
+ }
+ catch (SAXException | IOException e) {
+ throw new SRUException(SRUConstants.SRU_GENERAL_SYSTEM_ERROR, e);
+ }
+
+ return annotationHandler.getAnnotationLayers();
+ }
+
+ private void writeAdvancedDataView(XMLStreamWriter writer,
+ List<AnnotationLayer> annotationLayers)
+ throws XMLStreamException {
+
+ AdvancedDataViewWriter helper = new AdvancedDataViewWriter(
+ AdvancedDataViewWriter.Unit.ITEM);
+
+ addAnnotationsToWriter(helper, annotationLayers);
+
+ helper.writeHitsDataView(writer, textLayer.getResultId());
+
+ if (dataviews.contains("adv")) {
+ helper.writeAdvancedDataView(writer);
+ }
+
+ }
+
+ private void addAnnotationsToWriter(AdvancedDataViewWriter helper,
+ List<AnnotationLayer> annotationLayers) {
+
+ boolean isKeywordAktive = false;
+ String keyword = "";
+ long start=0,end=0;
+
+ Map<Integer,List<Annotation>> map;
+ for (AnnotationLayer annotationLayer : annotationLayers) {
+ map = annotationLayer.getAnnotationMap();
+ Set<Integer> keyset = map.keySet();
+ Integer[] keyArray = keyset.toArray(new Integer[keyset.size()]);
+ Arrays.sort(keyArray);
+ for (int key : keyArray) {
+ List<Annotation> annotations = map.get(key);
+ if (annotations == null) {
+ continue;
+ }
+
+ // FCS advanced dataview does not allow multiple
+ // annotations on the same segment.
+ // for (Annotation annotation : annotations){
+ Annotation annotation = annotations.get(0);
+
+ if (annotation.isKeyword()) {
+ if (!isKeywordAktive){
+ isKeywordAktive = true;
+ start = annotation.getStart();
+ }
+ end = annotation.getEnd();
+ keyword += annotation.getValue();
+// helper.addSpan(annotationLayer.getLayerId(),
+// annotation.getStart(), annotation.getEnd(),
+// annotation.getValue(), 1);
+ }
+ else {
+ if (isKeywordAktive && annotationLayer.getLayerCode().equals(AnnotationLayer.TYPE.TEXT.toString())){
+ helper.addSpan(annotationLayer.getLayerId(),
+ start, end,
+ keyword, 1);
+ isKeywordAktive = false;
+ }
+ helper.addSpan(annotationLayer.getLayerId(),
+ annotation.getStart(), annotation.getEnd(),
+ annotation.getValue());
+ }
+ // }
+ }
+ }
+ }
}