Added comments.
Change-Id: I9222eb8da4eb018c48e3d2f29163195b343b6548
diff --git a/src/main/java/de/mannheim/ids/korap/sru/Annotation.java b/src/main/java/de/mannheim/ids/korap/sru/Annotation.java
index 65f6030..7580ef6 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/Annotation.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/Annotation.java
@@ -1,33 +1,24 @@
package de.mannheim.ids.korap.sru;
/**
+ *
* @author margaretha
*
*/
public class Annotation {
- private int id;
private long start;
private long end;
private String value;
private int hitLevel;
- public Annotation (int id, String value, long start, long end, int hitLevel) {
- this.id = id;
+ public Annotation (String value, long start, long end, int hitLevel) {
this.value = value;
this.start = start;
this.end = end;
this.hitLevel = hitLevel;
}
- public int getId() {
- return id;
- }
-
- public void setId(int id) {
- this.id = id;
- }
-
public long getStart() {
return start;
}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/AnnotationHandler.java b/src/main/java/de/mannheim/ids/korap/sru/AnnotationHandler.java
index 4cc66b4..3d23eb0 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/AnnotationHandler.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/AnnotationHandler.java
@@ -10,6 +10,13 @@
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
+/**
+ * Handler class for parsing the match snippet and extracting the
+ * annotations from Korap MatchInfo service.
+ *
+ * @author margaretha
+ *
+ */
public class AnnotationHandler extends DefaultHandler {
private Logger logger = (Logger) LoggerFactory
@@ -18,12 +25,11 @@
private boolean startSegment = true;
private boolean startSentence = false;
- private int matchLevel = 0;
+ private int matchLevel;
private List<AnnotationLayer> annotationLayers;
private List<String> annotationStrings;
- private StringBuilder segmentBuilder = new StringBuilder();
private StringBuilder textBuilder = new StringBuilder();
private String text = "";
@@ -31,6 +37,12 @@
long segmentStartOffset = 0, segmentEndOffset = 0;
long textStartOffset = 0, textEndOffset = 0;
+ /**
+ * Constructs an AnnotationHandler for the given annotation layer
+ * list.
+ *
+ * @param annotationLayers
+ */
public AnnotationHandler (List<AnnotationLayer> annotationLayers) {
this.annotationLayers = annotationLayers;
annotationStrings = new ArrayList<String>();
@@ -41,35 +53,43 @@
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
- if (startSentence && attributes.getValue("title") != null && qName.equals("span")) {
+ // Collects the annotations within <span class="match">
+ if (startSentence && attributes.getValue("title") != null
+ && qName.equals("span")) {
if (startSegment) {
segmentStartOffset = segmentEndOffset;
startSegment = false;
}
annotationStrings.add(attributes.getValue("title"));
}
- else if (attributes.getValue("class") !=null && qName.equals("span")){
- if (attributes.getValue("class").equals("match")){
+ // determine the start of collecting annotations
+ else if (attributes.getValue("class") != null && qName.equals("span")) {
+ if (attributes.getValue("class").equals("match")) {
startSentence = true;
}
else {
startSentence = false;
}
}
+ // add a text segment to the text layer
else if (qName.equals("mark")) {
- text = textBuilder.toString();
+ text = textBuilder.toString();
textBuilder = new StringBuilder();
if (!text.isEmpty()) {
- addAnnotationToMap(text, annotationLayers.get(0),
- matchLevel, textStartOffset, textEndOffset);
+ addAnnotationToMap(text, annotationLayers.get(0), matchLevel,
+ textStartOffset, textEndOffset);
textStartOffset = textEndOffset;
}
matchLevel++;
}
- super.startElement(uri, localName, qName, attributes);
-
}
+ /**
+ * Parses and extracts the layer code and its value from the given
+ * annotation string.
+ *
+ * @param annotationStr
+ */
private void parseAnnotation(String annotationStr) {
if (annotationStr == null || annotationStr.isEmpty()) return;
@@ -80,11 +100,9 @@
String value = strArr[1];
for (AnnotationLayer annotationLayer : annotationLayers) {
- if (annotationLayer.getLayerCode().equals(
- AnnotationLayer.TYPE.TEXT.toString())) {
- segmentBuilder = new StringBuilder();
- }
- else if (annotationLayer.getLayerCode().equals(layerCode)) {
+ if (annotationLayer.getLayerCode().equals(layerCode)
+ && !annotationLayer.getLayerCode().equals(
+ AnnotationLayer.TYPE.TEXT.toString())) {
addAnnotationToMap(value, annotationLayer, 0);
break;
}
@@ -93,14 +111,16 @@
private void addAnnotationToMap(String value,
AnnotationLayer annotationLayer, int hitLevel) {
- addAnnotationToMap(value, annotationLayer, hitLevel, segmentStartOffset, segmentEndOffset);
+ addAnnotationToMap(value, annotationLayer, hitLevel,
+ segmentStartOffset, segmentEndOffset);
}
-
- private void addAnnotationToMap(String value,
- AnnotationLayer annotationLayer, int hitLevel, long startOffset, long endOffset) {
- Annotation annotation = new Annotation(id, value, startOffset,
- endOffset, hitLevel);
+ private void addAnnotationToMap(String value,
+ AnnotationLayer annotationLayer, int hitLevel, long startOffset,
+ long endOffset) {
+
+ Annotation annotation = new Annotation(value, startOffset, endOffset,
+ hitLevel);
Map<Integer, List<Annotation>> map = annotationLayer.getAnnotationMap();
@@ -118,15 +138,16 @@
public void endElement(String uri, String localName, String qName)
throws SAXException {
+ // add a text segment to the text layer
if (qName.equals("mark")) {
-
text = textBuilder.toString();
textBuilder = new StringBuilder();
- addAnnotationToMap(text, annotationLayers.get(0),
- matchLevel, textStartOffset, textEndOffset);
+ addAnnotationToMap(text, annotationLayers.get(0), matchLevel,
+ textStartOffset, textEndOffset);
textStartOffset = textEndOffset;
matchLevel--;
}
+ // parses all the annotations for a span at one position.
else if (!startSegment && qName.equals("span")) {
for (String annotationStr : annotationStrings) {
parseAnnotation(annotationStr);
@@ -136,21 +157,20 @@
annotationStrings.clear();
}
}
-
+
@Override
public void endDocument() throws SAXException {
+ // add a text segment to the text layer
text = textBuilder.toString();
- addAnnotationToMap(text, annotationLayers.get(0),
- matchLevel, textStartOffset, textEndOffset);
+ textBuilder = new StringBuilder();
+ addAnnotationToMap(text, annotationLayers.get(0), matchLevel,
+ textStartOffset, textEndOffset);
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
- if (!startSegment) {
- segmentBuilder.append(ch, start, length);
- }
- textBuilder.append(ch, start, length);
+ textBuilder.append(ch, start, length);
segmentEndOffset += length;
textEndOffset += length;
}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/AnnotationLayer.java b/src/main/java/de/mannheim/ids/korap/sru/AnnotationLayer.java
index f79ffd8..10cdf53 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/AnnotationLayer.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/AnnotationLayer.java
@@ -5,42 +5,62 @@
import java.util.List;
import java.util.Map;
+/**
+ * This class contains properties of an annotation layer used for
+ * extracting and collecting annotations, as well as generating the
+ * advanced dataview in the SRU response.
+ *
+ * Layer code is used to extract the annotations of the annotation
+ * layer from the match snippet in a KorAP MatchInfo response.
+ *
+ * Annotation map collects the extracted annotations. Since a token or
+ * a text segment may have more than one annotation per annotation
+ * layer, the annotations are structured as a map whose keys are ids
+ * referring to the token positions.
+ *
+ * @author margaretha
+ *
+ */
public class AnnotationLayer {
public static enum TYPE {
TEXT, POS, LEMMA;
-
+
public String toString() {
return super.toString().toLowerCase();
};
}
-
+
private String layerCode;
private URI layerId;
- private Map<Integer,List<Annotation>> annotationMap;
-
+ private Map<Integer, List<Annotation>> annotationMap;
+
public AnnotationLayer (String layerCode, URI layerId) {
this.layerCode = layerCode;
this.layerId = layerId;
- this.annotationMap = new HashMap<Integer,List<Annotation>>();
+ this.annotationMap = new HashMap<Integer, List<Annotation>>();
}
+
public String getLayerCode() {
return layerCode;
}
+
public void setLayerCode(String layerCode) {
this.layerCode = layerCode;
}
- public Map<Integer,List<Annotation>> getAnnotationMap() {
+ public Map<Integer, List<Annotation>> getAnnotationMap() {
return annotationMap;
}
- public void setAnnotationMap(Map<Integer,List<Annotation>> annotationMap) {
+
+ public void setAnnotationMap(Map<Integer, List<Annotation>> annotationMap) {
this.annotationMap = annotationMap;
}
-
+
public URI getLayerId() {
return layerId;
}
+
public void setLayerId(URI layerId) {
this.layerId = layerId;
}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/KorapClient.java b/src/main/java/de/mannheim/ids/korap/sru/KorapClient.java
index be3ffb9..00f95f9 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/KorapClient.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/KorapClient.java
@@ -26,23 +26,50 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
+/**
+ * Client to KorAP public services supporting calls to the resource,
+ * search and matchInfo APIs.
+ *
+ * @author margaretha
+ *
+ */
public class KorapClient {
- private static final String SERVICE_URI = "http://localhost:8089/api/v0.1/";
- //"http://10.0.10.13:7070/api/v0.1/";
- private String CONTEXT_TYPE = "sentence";
+ private static final String SERVICE_URI = "http://localhost:8089/api/v0.1/";
+ // "http://10.0.10.13:7070/api/v0.1/";
+ private static final String DEFAULT_CONTEXT_TYPE = "sentence";
+ private static final String DEFAULT_FOUNDRY = "*";
private int defaultNumOfRecords = 10;
private int defaultMaxRecords = 10;
private static ObjectMapper objectMapper = new ObjectMapper();
- private static Logger logger = (Logger) LoggerFactory.getLogger(KorapClient.class);
+ private static Logger logger = (Logger) LoggerFactory
+ .getLogger(KorapClient.class);
+ /**
+ * Constructs a KorapClient with the given number of records per
+ * page and the maximum number of records.
+ *
+ * @param numOfRecords
+ * the number of records per page
+ * @param maxRecords
+ * the number of maximum records/matches to retrieve
+ */
public KorapClient (int numOfRecords, int maxRecords) {
this.defaultNumOfRecords = numOfRecords;
this.defaultMaxRecords = maxRecords;
}
+ /**
+ * Gets information about available resources to search through
+ * the KorAP public services.
+ *
+ * @return a JSON node containing information about the resources
+ *
+ * @throws URISyntaxException
+ * @throws IOException
+ */
public JsonNode retrieveResources() throws URISyntaxException, IOException {
URIBuilder builder = new URIBuilder(SERVICE_URI + "VirtualCollection");
@@ -86,33 +113,45 @@
return resources;
}
+ /**
+ * Sends the given query to KorAP search API and creates a
+ * KorapResult from the response.
+ *
+ * @param query
+ * a query string
+ * @param queryLanguage
+ * the query language
+ * @param version
+ * the query language version
+ * @param startRecord
+ * the starting record/match number to retrieve
+ * @param maximumRecords
+ * the number of maximum records/matches to retrieve
+ * @param corpora
+ * the corpora to search on
+ * @return a KorapResult
+ *
+ * @throws HttpResponseException
+ * @throws IOException
+ */
public KorapResult query(String query, QueryLanguage queryLanguage,
String version, int startRecord, int maximumRecords,
String[] corpora) throws HttpResponseException, IOException {
- checkQuery(query, startRecord, maximumRecords);
+ if (query == null) {
+ throw new NullPointerException("Query is null.");
+ }
+ if (query.isEmpty()) {
+ throw new IllegalArgumentException("Query is empty.");
+ }
+ if (startRecord < 1) {
+ throw new IllegalArgumentException("Start record begins from 1.");
+ }
+ if (maximumRecords < 1) {
+ throw new IllegalArgumentException("Maximum records is too low.");
+ }
HttpUriRequest httpRequest = null;
-
- /*
- * if (corpora != null){ // create virtual collection
- * logger.info("Select collection"); CollectionQuery
- * collectionQuery = new CollectionQuery()
- * .addMetaFilter("corpusID", DEFAULT_COLLECTION);
- *
- * logger.info("create JsonLD"); QuerySerializer ss = new
- * QuerySerializer() .setQuery(query, QUERY_LANGUAGE,version)
- * .setCollection(collectionQuery) .setMeta(CONTEXT_TYPE,
- * CONTEXT_TYPE, CONTEXT_SIZE, CONTEXT_SIZE, 5,
- * startRecord-1);
- *
- * String jsonld=ss.build(); logger.info(jsonld);
- *
- * HttpPost post = new HttpPost(SERVICE_URI+"_raw");
- * post.setEntity(new StringEntity(jsonld)); httpRequest =
- * post; } else {
- */
-
try {
httpRequest = createSearchRequest(query, queryLanguage, version,
startRecord - 1, maximumRecords);
@@ -120,7 +159,6 @@
catch (URISyntaxException e) {
throw new IOException("Failed creating http request.");
}
- // }
CloseableHttpClient client = HttpClients.createDefault();
CloseableHttpResponse response = null;
@@ -133,7 +171,7 @@
logger.warn("Error response code: " + statusCode);
parseError(response);
}
-
+
BufferedInputStream jsonStream = new BufferedInputStream(response
.getEntity().getContent());
try {
@@ -153,12 +191,19 @@
return result;
}
+ /**
+ * Parses the error message from Kustvakt (probably an old format).
+ *
+ * @param response
+ * a response from Kustvakt
+ * @throws IOException
+ */
private static void parseError(CloseableHttpResponse response)
throws IOException {
-
+
logger.warn("Error message: "
+ response.getStatusLine().getReasonPhrase());
-
+
InputStream is = response.getEntity().getContent();
JsonNode node = objectMapper.readTree(is);
String message = node.get("error").textValue();
@@ -175,11 +220,27 @@
else {
errorItems = new String[] { "1", message };
}
-
+
throw new HttpResponseException(Integer.parseInt(errorItems[0]),
errorItems[1]);
}
+ /**
+ * Builds a search retrieve GET request for the given parameters.
+ *
+ * @param query
+ * a query string
+ * @param queryLanguage
+ * the query language
+ * @param version
+ * the query language version
+ * @param startRecord
+ * the starting number of records/matches to return
+ * @param maximumRecords
+ * the number of maximum records to return
+ * @return a HttpGet request
+ * @throws URISyntaxException
+ */
private HttpGet createSearchRequest(String query,
QueryLanguage queryLanguage, String version, int startRecord,
int maximumRecords) throws URISyntaxException {
@@ -197,46 +258,77 @@
params.add(new BasicNameValuePair("q", query));
params.add(new BasicNameValuePair("ql", queryLanguage.toString()));
params.add(new BasicNameValuePair("v", version));
- params.add(new BasicNameValuePair("context", CONTEXT_TYPE));
+ params.add(new BasicNameValuePair("context", DEFAULT_CONTEXT_TYPE));
params.add(new BasicNameValuePair("count", String
.valueOf(maximumRecords)));
params.add(new BasicNameValuePair("offset", String.valueOf(startRecord)));
URIBuilder builder = new URIBuilder(SERVICE_URI + "search");
builder.addParameters(params);
+
URI uri = builder.build();
logger.info("Query URI: " + uri.toString());
HttpGet request = new HttpGet(uri);
return request;
}
- private void checkQuery(String query, int startRecord, int maxRecord) {
- if (query == null) {
- throw new NullPointerException("Query == null.");
- }
- if (query.isEmpty()) {
- throw new IllegalArgumentException("Query is empty.");
- }
- if (startRecord < 1) {
- throw new IllegalArgumentException("Start record begins from 1.");
- }
- if (maxRecord < 1) {
- throw new IllegalArgumentException("Maximum records is too low.");
- }
- }
+ /**
+ * Sends a request to the MatchInfo API to get the annotations of
+ * a particular match identified with corpus/resource id, document
+ * id, and position id in the document in one or multiple
+ * foundries.
+ *
+ * @param resourceId
+ * the id of the corpus
+ * @param documentId
+ * the id of the document
+ * @param matchId
+ * the id of the match
+ * @param foundry
+ * the annotation layer
+ * @return the annotation snippet
+ *
+ * @throws IOException
+ * @throws URISyntaxException
+ */
+ public static String retrieveAnnotations(String resourceId,
+ String documentId, String matchId, String foundry)
+ throws IOException, URISyntaxException {
-
- public static String retrieveAnnotations(KorapMatch match) throws IOException {
+ if (resourceId == null) {
+ throw new NullPointerException("Corpus id of the match is null.");
+ }
+ else if (resourceId.isEmpty()) {
+ throw new IllegalArgumentException(
+ "Corpus id of the match is empty.");
+ }
+
+ if (documentId == null) {
+ throw new NullPointerException("Document id of the match is null.");
+ }
+ else if (documentId.isEmpty()) {
+ throw new IllegalArgumentException(
+ "Document id of the match is empty.");
+ }
+
+ if (matchId == null) {
+ throw new NullPointerException("Position id of the match is null.");
+ }
+ else if (matchId.isEmpty()) {
+ throw new IllegalArgumentException(
+ "Position id of the match is empty.");
+ }
+
+ if (foundry == null | foundry.isEmpty()) {
+ foundry = DEFAULT_FOUNDRY;
+ }
+
HttpUriRequest httpRequest;
- try {
- httpRequest = createMatchInfoRequest(match.getCorpusId(), match.getDocId(), match.getPositionId(), "*");
- }
- catch (URISyntaxException e) {
- throw new IOException("Failed creating http request for retrieving annotations.");
- }
+ httpRequest = createMatchInfoRequest(resourceId, documentId, matchId,
+ foundry);
String annotationSnippet = null;
-
+
CloseableHttpClient client = HttpClients.createDefault();
CloseableHttpResponse response = null;
try {
@@ -251,11 +343,13 @@
BufferedInputStream jsonStream = new BufferedInputStream(response
.getEntity().getContent());
try {
- JsonNode root = objectMapper.readTree(jsonStream);
- annotationSnippet = "<snippet>" + root.at("/snippet").asText() + "</snippet>";
+ JsonNode root = objectMapper.readTree(jsonStream);
+ annotationSnippet = "<snippet>" + root.at("/snippet").asText()
+ + "</snippet>";
}
catch (IOException e) {
- throw new IOException("Failed processing response from KorAP match info API.");
+ throw new IOException(
+ "Failed processing response from KorAP match info API.");
}
finally {
jsonStream.close();
@@ -266,12 +360,28 @@
}
return annotationSnippet;
}
-
+
+ /**
+ * Builds a request URL to send to the KorAP MatchInfo service.
+ *
+ * @param resourceId
+ * the id of the corpus
+ * @param documentId
+ * the id of the document
+ * @param matchId
+ * the id of the match
+ * @param foundry
+ * the annotation layer
+ * @return a HttpGet request
+ * @throws URISyntaxException
+ */
private static HttpGet createMatchInfoRequest(String resourceId,
- String documentId, String matchId, String foundry) throws URISyntaxException {
+ String documentId, String matchId, String foundry)
+ throws URISyntaxException {
+
StringBuilder sb = new StringBuilder();
sb.append(SERVICE_URI);
-// sb.append("http://localhost:8089/api/v0.1/");
+ // sb.append("http://localhost:8089/api/v0.1/");
sb.append("corpus/");
sb.append(resourceId);
sb.append("/");
diff --git a/src/main/java/de/mannheim/ids/korap/sru/KorapEndpointDescription.java b/src/main/java/de/mannheim/ids/korap/sru/KorapEndpointDescription.java
index ab6bbe7..1a7f219 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/KorapEndpointDescription.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/KorapEndpointDescription.java
@@ -24,6 +24,17 @@
import eu.clarin.sru.server.fcs.Layer;
import eu.clarin.sru.server.fcs.ResourceInfo;
+/**
+ * Contains information for generating a response of SRU explain
+ * operation with endpoint description.
+ *
+ * Example:
+ * http://localhost:8080/KorapSRU?operation=explain&x-fcs-endpoint
+ * -description=true
+ *
+ * @author margaretha
+ *
+ */
public class KorapEndpointDescription implements EndpointDescription {
private List<DataView> dataviews;
@@ -32,7 +43,8 @@
private List<String> defaultDataviews;
private List<Layer> layers;
-
+ private Layer textLayer;
+
private List<AnnotationLayer> annotationLayers;
public KorapEndpointDescription (ServletContext context)
@@ -44,7 +56,8 @@
if (simpleEndpointDescription != null) {
setSupportedLayers(simpleEndpointDescription
.getSupportedLayers());
- setAnnotationLayers(simpleEndpointDescription.getSupportedLayers());
+ setAnnotationLayers(simpleEndpointDescription
+ .getSupportedLayers());
setSupportedDataViews(simpleEndpointDescription
.getSupportedDataViews());
setDefaultDataViews(simpleEndpointDescription
@@ -153,23 +166,24 @@
public void setAnnotationLayers(List<Layer> layers) {
annotationLayers = new ArrayList<AnnotationLayer>(layers.size());
-
+
String layerCode;
-
+
for (Layer l : layers) {
-
+
String type = l.getType();
- if (type.equals(AnnotationLayer.TYPE.TEXT.toString())){
+ if (type.equals(AnnotationLayer.TYPE.TEXT.toString())) {
layerCode = type;
+ this.textLayer = l;
}
- else{
+ else {
StringBuilder sb = new StringBuilder();
String qualifier = l.getQualifier();
if (qualifier != null) {
sb.append(qualifier);
-
- if (type.equals(AnnotationLayer.TYPE.POS.toString())) {
+
+ if (type.equals(AnnotationLayer.TYPE.POS.toString())) {
sb.append("/p");
}
else if (type.equals(AnnotationLayer.TYPE.LEMMA.toString())) {
@@ -182,9 +196,17 @@
layerCode = sb.toString();
}
- AnnotationLayer annotationLayer = new AnnotationLayer(
- layerCode, l.getResultId());
+ AnnotationLayer annotationLayer = new AnnotationLayer(layerCode,
+ l.getResultId());
annotationLayers.add(annotationLayer);
}
}
+
+ public Layer getTextLayer() {
+ return textLayer;
+ }
+
+ public void setTextLayer(Layer textLayer) {
+ this.textLayer = textLayer;
+ }
}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/KorapMatchHandler.java b/src/main/java/de/mannheim/ids/korap/sru/KorapMatchHandler.java
index 1cd91ee..39df47d 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/KorapMatchHandler.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/KorapMatchHandler.java
@@ -4,6 +4,11 @@
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
+/** Handler for parsing the match snippet from KorAP search API.
+ *
+ * @author margaretha
+ *
+ */
public class KorapMatchHandler extends DefaultHandler{
private KorapMatch match;
diff --git a/src/main/java/de/mannheim/ids/korap/sru/KorapSRU.java b/src/main/java/de/mannheim/ids/korap/sru/KorapSRU.java
index 871c3d2..4e1c4db 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/KorapSRU.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/KorapSRU.java
@@ -18,12 +18,16 @@
import eu.clarin.sru.server.SRURequest;
import eu.clarin.sru.server.SRUSearchResultSet;
import eu.clarin.sru.server.SRUServerConfig;
+import eu.clarin.sru.server.SRUVersion;
import eu.clarin.sru.server.fcs.Constants;
import eu.clarin.sru.server.fcs.DataView;
import eu.clarin.sru.server.fcs.EndpointDescription;
import eu.clarin.sru.server.fcs.SimpleEndpointSearchEngineBase;
/**
+ * KorAP search engine endpoint implementation supporting SRU calls
+ * with operation explain and search retrieve.
+ *
* @author margaretha
* */
public class KorapSRU extends SimpleEndpointSearchEngineBase {
@@ -34,7 +38,7 @@
public static String redirectBaseURI;
public static KorapClient korapClient;
private KorapEndpointDescription korapEndpointDescription;
- private SRUServerConfig serverConfig;
+ // private SRUServerConfig serverConfig;
private Logger logger = (Logger) LoggerFactory.getLogger(KorapSRU.class);
@@ -50,7 +54,7 @@
protected void doInit(ServletContext context, SRUServerConfig config,
SRUQueryParserRegistry.Builder parserRegistryBuilder,
Map<String, String> params) throws SRUConfigException {
- serverConfig = config;
+ // serverConfig = config;
korapClient = new KorapClient(config.getNumberOfRecords(),
config.getMaximumRecords());
@@ -73,28 +77,53 @@
checkRequestRecordSchema(request);
- List<String> dataviews = korapEndpointDescription.getDefaultDataViews();
- if (request.getExtraRequestDataNames().contains("x-fcs-dataviews")) {
- String extraDataview = getRequestDataView(
- request.getExtraRequestData("x-fcs-dataviews"), diagnostics);
- if (extraDataview != null) dataviews.add(extraDataview);
- }
+ List<String> dataviews = createRequestDataview(request, diagnostics);
+ QueryLanguage queryLanguage = parseQueryLanguage(request);
- boolean isRewitesAllowed = false;
- if (request.getExtraRequestDataNames().contains("x-fcs-rewrites-allowed")) {
- isRewitesAllowed = getRequestDataView(
- request.getExtraRequestData("x-fcs-rewrites-allowed"), diagnostics).
- equals("true");
- }
-
String queryType = request.getQueryType();
logger.info("Query language: " + queryType);
- QueryLanguage queryLanguage;
+
+ String queryStr = request.getQuery().getRawQuery();
+ if ((queryStr == null) || queryStr.isEmpty()) {
+ throw new SRUException(SRUConstants.SRU_EMPTY_TERM_UNSUPPORTED,
+ "An empty term is not supported.");
+ }
+ logger.info("korapsru query: " + queryStr);
+
+ String version = parseVersion(request.getVersion());
+
+ KorapResult korapResult = sendQuery(queryStr, request, version,
+ queryLanguage);
+ checkKorapResultError(korapResult, queryLanguage,
+ isRewitesAllowed(request), diagnostics);
+
+ return new KorapSRUSearchResultSet(diagnostics, korapResult, dataviews,
+ korapEndpointDescription.getTextLayer(),
+ korapEndpointDescription.getAnnotationLayers());
+ }
+
+ private String parseVersion(SRUVersion version) throws SRUException {
+ if (version == SRUVersion.VERSION_1_1) {
+ return "1.1";
+ }
+ else if (version == SRUVersion.VERSION_1_2) {
+ return "1.2";
+ }
+ else if (version == SRUVersion.VERSION_2_0) {
+ return "2.0";
+ }
+ else {
+ throw new SRUException(SRUConstants.SRU_UNSUPPORTED_VERSION);
+ }
+ }
+
+ private QueryLanguage parseQueryLanguage(SRURequest request)
+ throws SRUException {
if (request.isQueryType(Constants.FCS_QUERY_TYPE_CQL)) {
- queryLanguage = QueryLanguage.CQL;
+ return QueryLanguage.CQL;
}
else if (request.isQueryType(Constants.FCS_QUERY_TYPE_FCS)) {
- queryLanguage = QueryLanguage.FCSQL;
+ return QueryLanguage.FCSQL;
}
else {
throw new SRUException(
@@ -103,30 +132,55 @@
+ request.getQueryType()
+ "' are not supported by this CLARIN-FCS Endpoint.");
}
+ }
- String queryStr = null;
- queryStr = request.getQuery().getRawQuery();
- if ((queryStr == null) || queryStr.isEmpty()) {
- throw new SRUException(SRUConstants.SRU_EMPTY_TERM_UNSUPPORTED,
- "An empty term is not supported.");
+ private boolean isRewitesAllowed(SRURequest request) {
+ if (request.getExtraRequestDataNames().contains(
+ "x-fcs-rewrites-allowed")) {
+
+ String rewrites = request
+ .getExtraRequestData("x-fcs-rewrites-allowed");
+ if (rewrites != null && !rewrites.isEmpty()) {
+
+ if (rewrites.equals("true")) return true;
+ }
}
- logger.info("korapsru query: " + queryStr);
+ return false;
+ }
- String version = null;
- switch (request.getVersion()) {
- case VERSION_1_1:
- version = "1.1";
- case VERSION_1_2:
- version = "1.2";
- case VERSION_2_0:
- version = "2.0";
- default:
- serverConfig.getDefaultVersion();
+ private List<String> createRequestDataview(SRURequest request,
+ SRUDiagnosticList diagnostics) {
+
+ List<String> dataviews = korapEndpointDescription.getDefaultDataViews();
+
+ if (request.getExtraRequestDataNames().contains("x-fcs-dataviews")) {
+ String requestDataview = request
+ .getExtraRequestData("x-fcs-dataviews");
+ if (requestDataview != null & !requestDataview.isEmpty()) {
+ for (DataView dv : korapEndpointDescription
+ .getSupportedDataViews()) {
+ if (dv.getIdentifier().equals(requestDataview)) {
+ dataviews.add(requestDataview);
+ }
+ }
+ diagnostics.addDiagnostic(
+ Constants.FCS_DIAGNOSTIC_REQUESTED_DATA_VIEW_INVALID,
+ "The requested Data View " + requestDataview
+ + " is not supported.",
+ "Using the default Data View(s): "
+ + korapEndpointDescription
+ .getDefaultDataViews() + " .");
+ }
}
- KorapResult korapResult = new KorapResult();
+ return dataviews;
+ }
+
+ private KorapResult sendQuery(String queryStr, SRURequest request,
+ String version, QueryLanguage queryLanguage) throws SRUException {
+
try {
- korapResult = korapClient.query(queryStr, queryLanguage, version,
+ return korapClient.query(queryStr, queryLanguage, version,
request.getStartRecord(), request.getMaximumRecords(),
getCorporaList(request));
}
@@ -164,7 +218,11 @@
throw new SRUException(SRUConstants.SRU_GENERAL_SYSTEM_ERROR,
e.getMessage());
}
+ }
+ private void checkKorapResultError(KorapResult korapResult,
+ QueryLanguage queryLanguage, boolean isRewitesAllowed,
+ SRUDiagnosticList diagnostics) throws SRUException {
if (korapResult.getErrors() != null) {
for (List<Object> error : korapResult.getErrors()) {
int errorCode = (int) error.get(0);
@@ -216,13 +274,20 @@
SRUConstants.SRU_QUERY_SYNTAX_ERROR,
(String) error.get(1));
}
+ case 780:
+ throw new SRUException(
+ SRUConstants.SRU_RESULT_SET_NOT_CREATED_TOO_MANY_MATCHING_RECORDS,
+ (String) error.get(1));
case 781:
- if (isRewitesAllowed){
- diagnostics.addDiagnostic(FCSConstants.FCS_QUERY_REWRITTEN,"",(String) error.get(1));
+ if (isRewitesAllowed) {
+ diagnostics.addDiagnostic(
+ FCSConstants.FCS_QUERY_REWRITTEN, "",
+ (String) error.get(1));
}
else {
throw new SRUException(
- SRUConstants.SRU_RESULT_SET_NOT_CREATED_TOO_MANY_MATCHING_RECORDS);
+ SRUConstants.SRU_RESULT_SET_NOT_CREATED_TOO_MANY_MATCHING_RECORDS,
+ "Too many matching records.");
}
default:
break;
@@ -230,9 +295,6 @@
}
}
-
- return new KorapSRUSearchResultSet(diagnostics, korapResult, dataviews,
- korapEndpointDescription);
}
private String[] getCorporaList(SRURequest request) {
@@ -265,22 +327,4 @@
}
}
- private String getRequestDataView(String requestDataview,
- SRUDiagnosticList diagnostics) {
- if (requestDataview != null & !requestDataview.isEmpty()) {
- for (DataView dv : korapEndpointDescription.getSupportedDataViews()) {
- if (dv.getIdentifier().equals(requestDataview)) {
- return requestDataview;
- }
- }
- diagnostics.addDiagnostic(
- Constants.FCS_DIAGNOSTIC_REQUESTED_DATA_VIEW_INVALID,
- "The requested Data View " + requestDataview
- + " is not supported.",
- "Using the default Data View(s): "
- + korapEndpointDescription.getDefaultDataViews()
- + " .");
- }
- return null;
- }
}
diff --git a/src/main/java/de/mannheim/ids/korap/sru/KorapSRUSearchResultSet.java b/src/main/java/de/mannheim/ids/korap/sru/KorapSRUSearchResultSet.java
index a260584..7f0d3b6 100644
--- a/src/main/java/de/mannheim/ids/korap/sru/KorapSRUSearchResultSet.java
+++ b/src/main/java/de/mannheim/ids/korap/sru/KorapSRUSearchResultSet.java
@@ -3,6 +3,7 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
@@ -26,6 +27,13 @@
import eu.clarin.sru.server.fcs.Layer;
import eu.clarin.sru.server.fcs.XMLStreamWriterHelper;
+/**
+ * Prepares and creates a search result set for a search retrieve URL
+ * call.
+ *
+ * @author margaretha
+ *
+ */
public class KorapSRUSearchResultSet extends SRUSearchResultSet {
private Logger logger = (Logger) LoggerFactory
@@ -34,15 +42,28 @@
private int i = -1;
private KorapResult korapResult;
private List<String> dataviews;
- private KorapEndpointDescription endpointDescription;
private SAXParser saxParser;
+ private Layer textLayer;
+ private AnnotationHandler annotationHandler;
- Layer textLayer;
-
+ /**
+ * Constructs a KorapSRUSearchResultSet for the given KorapResult.
+ *
+ * @param diagnostics
+ * a list of SRU diagnostics
+ * @param korapResult
+ * the query result
+ * @param dataviews
+ * the required dataviews to generate
+ * @param textlayer
+ * the text layer
+ * @param annotationLayers
+ * the list of annotation layers
+ * @throws SRUException
+ */
public KorapSRUSearchResultSet (SRUDiagnosticList diagnostics,
- KorapResult korapResult, List<String> dataviews,
- KorapEndpointDescription korapEndpointDescription)
- throws SRUException {
+ KorapResult korapResult, List<String> dataviews, Layer textlayer,
+ List<AnnotationLayer> annotationLayers) throws SRUException {
super(diagnostics);
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
@@ -55,9 +76,8 @@
this.korapResult = korapResult;
this.dataviews = dataviews;
- this.endpointDescription = korapEndpointDescription;
-
- textLayer = endpointDescription.getSupportedLayers().get(0);
+ this.textLayer = textlayer;
+ annotationHandler = new AnnotationHandler(annotationLayers);
}
@Override
@@ -87,8 +107,7 @@
@Override
public void writeRecord(XMLStreamWriter writer) throws XMLStreamException {
- KorapMatch match;
- match = parseMatch();
+ KorapMatch match = korapResult.getMatch(i);
match.parseMatchId();
XMLStreamWriterHelper.writeStartResource(writer, match.getMatchId(),
@@ -104,6 +123,14 @@
XMLStreamWriterHelper.writeEndResource(writer);
}
+ /**
+ * Parses the current match snippet from KorAP search API into
+ * keyword, left context and right context.
+ *
+ * @return a KorapMatch
+ * @throws XMLStreamException
+ */
+ @Deprecated
private KorapMatch parseMatch() throws XMLStreamException {
KorapMatch match = korapResult.getMatch(i);
String snippet = "<snippet>" + match.getSnippet() + "</snippet>";
@@ -117,22 +144,47 @@
return match;
}
+ /**
+ * Retrieves and parses the annotations of a match from KorAP
+ * MatchInfo API.
+ *
+ * @param match
+ * a KorapMatch
+ * @return a list of annotation layers containing the match
+ * annotations.
+ * @throws XMLStreamException
+ */
private List<AnnotationLayer> parseAnnotations(KorapMatch match)
throws XMLStreamException {
- AnnotationHandler annotationHandler = new AnnotationHandler(
- endpointDescription.getAnnotationLayers());
+ if (match == null) {
+ throw new NullPointerException("KorapMatch is null.");
+ }
+
try {
- String annotationSnippet = KorapClient.retrieveAnnotations(match);
- InputStream is = new ByteArrayInputStream(annotationSnippet.getBytes());
+ String annotationSnippet = KorapClient.retrieveAnnotations(
+ match.getCorpusId(), match.getDocId(),
+ match.getPositionId(), "*");
+ InputStream is = new ByteArrayInputStream(
+ annotationSnippet.getBytes());
saxParser.parse(is, annotationHandler);
}
- catch (SAXException | IOException e) {
+ catch (SAXException | IOException | URISyntaxException e) {
throw new XMLStreamException(e);
}
return annotationHandler.getAnnotationLayers();
}
+ /**
+ * Writes advanced data views, namely segment annotations for each
+ * annotation layer.
+ *
+ * @param writer
+ * an XMLStreamWriter
+ * @param annotationLayers
+ * a list of annotation layers
+ * @throws XMLStreamException
+ */
private void writeAdvancedDataView(XMLStreamWriter writer,
List<AnnotationLayer> annotationLayers) throws XMLStreamException {
@@ -148,6 +200,15 @@
}
}
+ /**
+ * Adds all annotations to the AdvancedDataViewWriter.
+ *
+ * @param helper
+ * an AdvancedDataViewWriter
+ * @param annotationLayers
+ * a list of annotation layers containing match
+ * annotations.
+ */
private void addAnnotationsToWriter(AdvancedDataViewWriter helper,
List<AnnotationLayer> annotationLayers) {
@@ -168,13 +229,7 @@
// for (Annotation annotation : annotations){
Annotation annotation = annotations.get(0);
- // if
- // (annotationLayer.getLayerCode().equals(AnnotationLayer.TYPE.TEXT.toString())){
- // logger.info(annotation.getStart()+" "+
- // annotation.getEnd()+" "+
- // annotation.getValue());
- // }
- if (annotation.getHitLevel()>0) {
+ if (annotation.getHitLevel() > 0) {
helper.addSpan(annotationLayer.getLayerId(),
annotation.getStart(), annotation.getEnd(),
annotation.getValue(), annotation.getHitLevel());
diff --git a/src/main/webapp/WEB-INF/web.xml b/src/main/webapp/WEB-INF/web.xml
index d651b31..45ff7f1 100644
--- a/src/main/webapp/WEB-INF/web.xml
+++ b/src/main/webapp/WEB-INF/web.xml
@@ -10,10 +10,6 @@
<display-name>KorAP SRU/CQL Service (HTTP Interface)</display-name>
<servlet-name>KorapSRU</servlet-name>
<servlet-class>eu.clarin.sru.server.utils.SRUServerServlet</servlet-class>
- <!-- <init-param>
- <param-name>de.mannheim.ids.korap.sru.indexDir</param-name>
- <param-value>/usr/local/java/apps/korapsru/index</param-value>
- </init-param> -->
<init-param>
<param-name>eu.clarin.sru.server.numberOfRecords</param-name>
<param-value>25</param-value>