| package de.mannheim.ids.korap.sru; |
| |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.util.List; |
| import java.util.Map; |
| |
| import javax.servlet.ServletContext; |
| |
| import org.apache.http.client.HttpResponseException; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import eu.clarin.sru.server.SRUConfigException; |
| import eu.clarin.sru.server.SRUConstants; |
| import eu.clarin.sru.server.SRUDiagnosticList; |
| import eu.clarin.sru.server.SRUException; |
| import eu.clarin.sru.server.SRUQueryParserRegistry; |
| import eu.clarin.sru.server.SRURequest; |
| import eu.clarin.sru.server.SRUSearchResultSet; |
| import eu.clarin.sru.server.SRUServerConfig; |
| import eu.clarin.sru.server.SRUVersion; |
| import eu.clarin.sru.server.fcs.Constants; |
| import eu.clarin.sru.server.fcs.DataView; |
| import eu.clarin.sru.server.fcs.EndpointDescription; |
| import eu.clarin.sru.server.fcs.SimpleEndpointSearchEngineBase; |
| |
| /** |
| * KorAP search engine endpoint implementation supporting SRU calls |
| * with operation explain and search retrieve. |
| * |
| * @author margaretha |
| * */ |
| public class KorapSRU extends SimpleEndpointSearchEngineBase { |
| |
| public static final String CLARIN_FCS_RECORD_SCHEMA = "http://clarin.eu/fcs/resource"; |
| public static final String KORAP_WEB_URL = "http://korap.ids-mannheim.de/kalamar"; |
| |
| public static String redirectBaseURI; |
| public static KorapClient korapClient; |
| private KorapEndpointDescription korapEndpointDescription; |
| // private SRUServerConfig serverConfig; |
| |
| private Logger logger = (Logger) LoggerFactory.getLogger(KorapSRU.class); |
| |
| @Override |
| protected EndpointDescription createEndpointDescription( |
| ServletContext context, SRUServerConfig config, |
| Map<String, String> params) throws SRUConfigException { |
| korapEndpointDescription = new KorapEndpointDescription(context); |
| return korapEndpointDescription; |
| } |
| |
| @Override |
| protected void doInit(ServletContext context, SRUServerConfig config, |
| SRUQueryParserRegistry.Builder parserRegistryBuilder, |
| Map<String, String> params) throws SRUConfigException { |
| // serverConfig = config; |
| try { |
| korapClient = new KorapClient(config.getNumberOfRecords(), |
| config.getMaximumRecords()); |
| } |
| catch (FileNotFoundException e) { |
| throw new SRUConfigException(e.getMessage()); |
| } |
| |
| StringBuilder sb = new StringBuilder(); |
| sb.append(config.getTransports()); |
| sb.append("://"); |
| sb.append(config.getHost()); |
| if (config.getPort() != 80) { |
| sb.append(":").append(config.getPort()); |
| } |
| sb.append("/").append(config.getDatabase()); |
| sb.append("/").append("redirect/"); |
| this.redirectBaseURI = sb.toString(); |
| } |
| |
| @Override |
| public SRUSearchResultSet search(SRUServerConfig config, |
| SRURequest request, SRUDiagnosticList diagnostics) |
| throws SRUException { |
| |
| checkRequestRecordSchema(request); |
| |
| List<String> dataviews = createRequestDataview(request, diagnostics); |
| QueryLanguage queryLanguage = parseQueryLanguage(request); |
| |
| String queryType = request.getQueryType(); |
| logger.info("Query language: " + queryType); |
| |
| String queryStr = request.getQuery().getRawQuery(); |
| if ((queryStr == null) || queryStr.isEmpty()) { |
| throw new SRUException(SRUConstants.SRU_EMPTY_TERM_UNSUPPORTED, |
| "An empty term is not supported."); |
| } |
| logger.info("korapsru query: " + queryStr); |
| |
| String version = parseVersion(request.getVersion()); |
| |
| KorapResult korapResult = sendQuery(queryStr, request, version, |
| queryLanguage); |
| checkKorapResultError(korapResult, queryLanguage, |
| isRewitesAllowed(request), diagnostics); |
| logger.info("Number of records: "+korapResult.getTotalResults()); |
| |
| return new KorapSRUSearchResultSet(diagnostics, korapResult, dataviews, |
| korapEndpointDescription.getTextLayer(), |
| korapEndpointDescription.getAnnotationLayers()); |
| } |
| |
| private String parseVersion(SRUVersion version) throws SRUException { |
| if (version == SRUVersion.VERSION_1_1) { |
| return "1.1"; |
| } |
| else if (version == SRUVersion.VERSION_1_2) { |
| return "1.2"; |
| } |
| else if (version == SRUVersion.VERSION_2_0) { |
| return "2.0"; |
| } |
| else { |
| throw new SRUException(SRUConstants.SRU_UNSUPPORTED_VERSION); |
| } |
| } |
| |
| private QueryLanguage parseQueryLanguage(SRURequest request) |
| throws SRUException { |
| if (request.isQueryType(Constants.FCS_QUERY_TYPE_CQL)) { |
| return QueryLanguage.CQL; |
| } |
| else if (request.isQueryType(Constants.FCS_QUERY_TYPE_FCS)) { |
| return QueryLanguage.FCSQL; |
| } |
| else { |
| throw new SRUException( |
| SRUConstants.SRU_CANNOT_PROCESS_QUERY_REASON_UNKNOWN, |
| "Queries with queryType '" |
| + request.getQueryType() |
| + "' are not supported by this CLARIN-FCS Endpoint."); |
| } |
| } |
| |
| private boolean isRewitesAllowed(SRURequest request) { |
| if (request.getExtraRequestDataNames().contains( |
| "x-fcs-rewrites-allowed")) { |
| |
| String rewrites = request |
| .getExtraRequestData("x-fcs-rewrites-allowed"); |
| if (rewrites != null && !rewrites.isEmpty()) { |
| |
| if (rewrites.equals("true")) return true; |
| } |
| } |
| return false; |
| } |
| |
| private List<String> createRequestDataview(SRURequest request, |
| SRUDiagnosticList diagnostics) { |
| |
| List<String> dataviews = korapEndpointDescription.getDefaultDataViews(); |
| |
| if (request.getExtraRequestDataNames().contains("x-fcs-dataviews")) { |
| String requestDataview = request |
| .getExtraRequestData("x-fcs-dataviews"); |
| if (requestDataview != null & !requestDataview.isEmpty()) { |
| for (DataView dv : korapEndpointDescription |
| .getSupportedDataViews()) { |
| if (dv.getIdentifier().equals(requestDataview)) { |
| if (!dataviews.contains(requestDataview)){ |
| dataviews.add(requestDataview); |
| } |
| return dataviews; |
| } |
| } |
| diagnostics.addDiagnostic( |
| Constants.FCS_DIAGNOSTIC_REQUESTED_DATA_VIEW_INVALID, |
| "The requested Data View " + requestDataview |
| + " is not supported.", |
| "Using the default Data View(s): " |
| + korapEndpointDescription |
| .getDefaultDataViews() + " ."); |
| } |
| } |
| |
| return dataviews; |
| } |
| |
| private KorapResult sendQuery(String queryStr, SRURequest request, |
| String version, QueryLanguage queryLanguage) throws SRUException { |
| |
| try { |
| return korapClient.query(queryStr, queryLanguage, version, |
| request.getStartRecord(), request.getMaximumRecords(), |
| getCorporaList(request)); |
| } |
| catch (HttpResponseException e) { |
| logger.warn("HttpResponseException: " + e.getStatusCode() + " " |
| + e.getMessage()); |
| switch (e.getStatusCode()) { |
| case 16: |
| throw new SRUException(SRUConstants.SRU_UNSUPPORTED_INDEX, |
| e.getMessage()); |
| case 19: |
| throw new SRUException( |
| SRUConstants.SRU_UNSUPPORTED_RELATION, |
| e.getMessage()); |
| case 20: |
| throw new SRUException( |
| SRUConstants.SRU_UNSUPPORTED_RELATION_MODIFIER, |
| e.getMessage()); |
| case 27: |
| throw new SRUException( |
| SRUConstants.SRU_EMPTY_TERM_UNSUPPORTED, |
| e.getMessage()); |
| case 48: |
| throw new SRUException( |
| SRUConstants.SRU_QUERY_FEATURE_UNSUPPORTED, |
| e.getMessage()); |
| default: |
| throw new SRUException( |
| SRUConstants.SRU_GENERAL_SYSTEM_ERROR, |
| e.getMessage()); |
| } |
| |
| } |
| catch (IOException e) { |
| throw new SRUException(SRUConstants.SRU_GENERAL_SYSTEM_ERROR, |
| e.getMessage()); |
| } |
| } |
| |
| private void checkKorapResultError(KorapResult korapResult, |
| QueryLanguage queryLanguage, boolean isRewitesAllowed, |
| SRUDiagnosticList diagnostics) throws SRUException { |
| if (korapResult.getErrors() != null) { |
| for (List<Object> error : korapResult.getErrors()) { |
| int errorCode = (int) error.get(0); |
| |
| switch (errorCode) { |
| case 301: |
| throw new SRUException( |
| SRUConstants.SRU_EMPTY_TERM_UNSUPPORTED, |
| (String) error.get(1)); |
| case 302: |
| if (queryLanguage == QueryLanguage.FCSQL) { |
| throw new SRUException( |
| FCSConstants.FCS_GENERAL_QUERY_SYNTAX_ERROR, |
| (String) error.get(1)); |
| } |
| else { |
| throw new SRUException( |
| SRUConstants.SRU_QUERY_SYNTAX_ERROR, |
| (String) error.get(1)); |
| } |
| case 306: |
| throw new SRUException( |
| SRUConstants.SRU_QUERY_FEATURE_UNSUPPORTED, |
| (String) error.get(1)); |
| case 307: |
| throw new SRUException( |
| SRUConstants.SRU_UNSUPPORTED_PARAMETER_VALUE, |
| (String) error.get(1)); |
| case 309: |
| throw new SRUException( |
| SRUConstants.SRU_MANDATORY_PARAMETER_NOT_SUPPLIED, |
| (String) error.get(1)); |
| case 310: |
| throw new SRUException( |
| SRUConstants.SRU_UNSUPPORTED_VERSION, |
| (String) error.get(1)); |
| case 311: |
| throw new SRUException( |
| FCSConstants.FCS_QUERY_TOO_COMPLEX, |
| (String) error.get(1)); |
| case 399: |
| if (queryLanguage == QueryLanguage.FCSQL) { |
| throw new SRUException( |
| FCSConstants.FCS_GENERAL_QUERY_SYNTAX_ERROR, |
| (String) error.get(1)); |
| } |
| else { |
| throw new SRUException( |
| SRUConstants.SRU_QUERY_SYNTAX_ERROR, |
| (String) error.get(1)); |
| } |
| case 780: |
| throw new SRUException( |
| SRUConstants.SRU_RESULT_SET_NOT_CREATED_TOO_MANY_MATCHING_RECORDS, |
| (String) error.get(1)); |
| case 781: |
| if (isRewitesAllowed) { |
| diagnostics.addDiagnostic( |
| FCSConstants.FCS_QUERY_REWRITTEN, "", |
| (String) error.get(1)); |
| } |
| else { |
| throw new SRUException( |
| SRUConstants.SRU_RESULT_SET_NOT_CREATED_TOO_MANY_MATCHING_RECORDS, |
| "Too many matching records."); |
| } |
| default: |
| break; |
| } |
| |
| } |
| } |
| } |
| |
| private String[] getCorporaList(SRURequest request) { |
| try { |
| String corpusPids = request.getExtraRequestData("x-fcs-context"); |
| if (!corpusPids.isEmpty() && corpusPids != null) { |
| if (corpusPids.contains(",")) { |
| return corpusPids.split(","); |
| } |
| return new String[] { corpusPids }; |
| } |
| return null; |
| } |
| catch (NullPointerException e) { |
| return null; |
| } |
| } |
| |
| private void checkRequestRecordSchema(SRURequest request) |
| throws SRUException { |
| final String recordSchemaIdentifier = request |
| .getRecordSchemaIdentifier(); |
| if ((recordSchemaIdentifier != null) |
| && !recordSchemaIdentifier.equals(CLARIN_FCS_RECORD_SCHEMA)) { |
| throw new SRUException( |
| SRUConstants.SRU_UNKNOWN_SCHEMA_FOR_RETRIEVAL, |
| recordSchemaIdentifier, "Record schema \"" |
| + recordSchemaIdentifier |
| + "\" is not supported by this endpoint."); |
| } |
| } |
| |
| } |