Handle resource pid & resolve resources (virtual corpora) using map #62
Change-Id: I0bba01e4417876b625ae6da5d135d412d4aeba43
diff --git a/ChangeLog b/ChangeLog
index 1653c1c..87ef4b7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,9 +1,15 @@
+1.0.8-SNAPSHOT
+
+- handle resource pid and resolve resources (virtual corpora) using map (#62)
+
1.0.7-SNAPSHOT
- update libraries including fcs-simple-endpoint 1.8.0
- add Dockerfile
- add institution
- improve landingPageURI with corpus query
+- Fixed the language of resource description
+
1.0.6-SNAPSHOT
diff --git a/pom.xml b/pom.xml
index 12590c1..567d403 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
<groupId>de.mannheim.ids</groupId>
<artifactId>KorapSRU</artifactId>
- <version>1.0.7</version>
+ <version>1.0.8-SNAPSHOT</version>
<packaging>war</packaging>
<name>KorapSRU</name>
diff --git a/src/main/java/de/ids_mannheim/korap/sru/KorapClient.java b/src/main/java/de/ids_mannheim/korap/sru/KorapClient.java
index fccc24a..1c313c0 100644
--- a/src/main/java/de/ids_mannheim/korap/sru/KorapClient.java
+++ b/src/main/java/de/ids_mannheim/korap/sru/KorapClient.java
@@ -7,7 +7,9 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
@@ -30,6 +32,8 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import de.ids_mannheim.korap.util.RedirectStrategy;
+import eu.clarin.sru.server.SRUConstants;
+import eu.clarin.sru.server.SRUException;
/**
* Client to KorAP public services supporting calls to the resource,
@@ -50,6 +54,9 @@
private static ObjectMapper objectMapper = new ObjectMapper();
private static Logger logger =
(Logger) LoggerFactory.getLogger(KorapClient.class);
+
+ // pid : cq
+ public static Map<String, String> virtualCorpora = new HashMap<>();
/**
* Constructs a KorapClient with the given number of records per
@@ -105,6 +112,16 @@
InputStream jsonStream = response.getEntity().getContent();
try {
resources = objectMapper.readValue(jsonStream, KorapResource[].class);
+
+ // update vc map
+ if (resources.length > virtualCorpora.size()) {
+ for (KorapResource r : resources) {
+ String[] urlParts = r.getLandingPage().split("cq=");
+ if (urlParts.length > 1 && !urlParts[1].isEmpty()) {
+ virtualCorpora.put(r.getResourceId(), urlParts[1]);
+ }
+ }
+ }
}
catch (JsonParseException | JsonMappingException e) {
throw e;
@@ -140,10 +157,11 @@
*
* @throws HttpResponseException
* @throws IOException
+ * @throws SRUException
*/
public KorapResult query (String query, QueryLanguage queryLanguage,
String version, int startRecord, int maximumRecords,
- String[] corpora) throws HttpResponseException, IOException {
+ String[] corpora) throws IOException, SRUException {
if (query == null) {
throw new NullPointerException("Query is null.");
@@ -274,10 +292,13 @@
* @param corpora
* @return a HttpGet request
* @throws URISyntaxException
+ * @throws IOException
+ * @throws SRUException
*/
- private HttpGet createSearchRequest (String query,
- QueryLanguage queryLanguage, String version, int startRecord,
- int maximumRecords, String[] corpora) throws URISyntaxException {
+ private HttpGet createSearchRequest (String query,
+ QueryLanguage queryLanguage, String version, int startRecord,
+ int maximumRecords, String[] corpora)
+ throws URISyntaxException, IOException, SRUException {
if (maximumRecords <= 0) {
maximumRecords = defaultNumOfRecords;
@@ -288,37 +309,80 @@
maximumRecords = defaultMaxRecords;
}
- String corpusQuery = "";
- if (corpora != null && corpora.length > 0) {
- for (int i = 0; i < corpora.length; i++) {
- corpusQuery += "corpusSigle=" + corpora[i];
- if (i != corpora.length - 1) {
- corpusQuery += "|";
- }
- }
- }
+ String corpusQuery = resolveVirtualCorpus(corpora);
+// if (corpora != null && corpora.length > 0) {
+// for (int i = 0; i < corpora.length; i++) {
+// corpusQuery += "corpusSigle=" + corpora[i];
+// if (i != corpora.length - 1) {
+// corpusQuery += "|";
+// }
+// }
+// }
- List<NameValuePair> params = new ArrayList<NameValuePair>();
- params.add(new BasicNameValuePair("q", query));
- params.add(new BasicNameValuePair("ql", queryLanguage.toString()));
- if (!corpusQuery.isEmpty()){
- params.add(new BasicNameValuePair("cq", corpusQuery));
- }
- params.add(new BasicNameValuePair("v", version));
- params.add(new BasicNameValuePair("context", DEFAULT_CONTEXT_TYPE));
- params.add(new BasicNameValuePair("count",
- String.valueOf(maximumRecords)));
- params.add(
- new BasicNameValuePair("offset", String.valueOf(startRecord)));
-
- URIBuilder builder = new URIBuilder(serviceUri + "/search");
- builder.addParameters(params);
-
- URI uri = builder.build();
+ URI uri = createSearchUri(query, queryLanguage, version, startRecord,
+ maximumRecords, corpusQuery, false);
+
logger.info("Query URI: " + uri.toString());
HttpGet request = new HttpGet(uri);
return request;
}
+
+ private String resolveVirtualCorpus (String[] corpora)
+ throws URISyntaxException, IOException, SRUException {
+ String corpusQuery = "";
+ if (corpora != null && corpora.length > 0) {
+ for (int i = 0; i < corpora.length; i++) {
+ String pid = corpora[i];
+ String cq = virtualCorpora.get(pid);
+ if (cq != null) {
+ corpusQuery += " & " + cq;
+ }
+ else {
+ retrieveResources();
+ cq = virtualCorpora.get(pid);
+ if (cq != null) {
+ corpusQuery += " & " + cq;
+ }
+ else {
+ throw new SRUException(
+ SRUConstants.SRU_GENERAL_SYSTEM_ERROR,
+ "Virtual corpus with pid: " + pid
+ + " is not found.");
+ }
+ }
+ }
+ }
+ return corpusQuery;
+ }
+
+ private URI createSearchUri (String query, QueryLanguage queryLanguage,
+ String version, int startRecord, int maximumRecords,
+ String corpusQuery, boolean authenticationRequired)
+ throws URISyntaxException {
+
+ List<NameValuePair> params = new ArrayList<NameValuePair>();
+ params.add(new BasicNameValuePair("q", query));
+ params.add(new BasicNameValuePair("ql", queryLanguage.toString()));
+ if (!corpusQuery.isEmpty()) {
+ params.add(new BasicNameValuePair("cq", corpusQuery));
+ }
+ params.add(new BasicNameValuePair("v", version));
+ params.add(new BasicNameValuePair("context", DEFAULT_CONTEXT_TYPE));
+ params.add(new BasicNameValuePair("count",
+ String.valueOf(maximumRecords)));
+ params.add(
+ new BasicNameValuePair("offset", String.valueOf(startRecord)));
+
+ if (authenticationRequired) {
+ params.add(
+ new BasicNameValuePair("access-rewrite-disabled", "true"));
+ }
+
+ URIBuilder builder = new URIBuilder(serviceUri + "/search");
+ builder.addParameters(params);
+ URI uri = builder.build();
+ return uri;
+ }
/**
* Sends a request to the MatchInfo API to get the annotations of
diff --git a/src/main/resources/resources.json b/src/main/resources/resources.json
deleted file mode 100644
index eb27b3f..0000000
--- a/src/main/resources/resources.json
+++ /dev/null
@@ -1,20 +0,0 @@
-[
- {
- "id": "WPD17",
- "description": "German Wikipedia article corpus 2017",
- "name": "Wikipedia articles 2017",
- "data": {}
- },
- {
- "id": "WDD17",
- "description": "German Wikipedia talk corpus 2017",
- "name": "Wikipedia talk corpus 2017",
- "data": {}
- },
- {
- "id": "WUD17",
- "description": "German Wikipedia user talk corpus 2017",
- "name": "Wikipedia user talk 2017",
- "data": {}
- }
-]
\ No newline at end of file
diff --git a/src/main/webapp/WEB-INF/endpoint-description.xml b/src/main/webapp/WEB-INF/endpoint-description.xml
index ed90d1f..80b8f02 100644
--- a/src/main/webapp/WEB-INF/endpoint-description.xml
+++ b/src/main/webapp/WEB-INF/endpoint-description.xml
@@ -12,7 +12,6 @@
<SupportedDataViews>
<SupportedDataView id="hits" delivery-policy="send-by-default">application/x-clarin-fcs-hits+xml</SupportedDataView>
<SupportedDataView id="adv" delivery-policy="send-by-default">application/x-clarin-fcs-adv+xml</SupportedDataView>
- <SupportedDataView id="kwic" delivery-policy="need-to-request">application/x-clarin-fcs-kwic+xml</SupportedDataView>
</SupportedDataViews>
<SupportedLayers>
<SupportedLayer id="l1" result-id="http://clarin.ids-mannheim.de/korapsru/layers/text">text</SupportedLayer>
diff --git a/src/test/java/de/ids_mannheim/korap/test/BaseTest.java b/src/test/java/de/ids_mannheim/korap/test/BaseTest.java
index 560a11e..3fc8726 100644
--- a/src/test/java/de/ids_mannheim/korap/test/BaseTest.java
+++ b/src/test/java/de/ids_mannheim/korap/test/BaseTest.java
@@ -44,13 +44,13 @@
mockServer.stop();
}
- protected void createRetrieveResource () throws IOException {
+ protected void createExpectationForRetrieveResource () throws IOException {
String korapResources = IOUtils.toString(
ClassLoader.getSystemResourceAsStream(
"korap-api-responses/resources.json"),
StandardCharsets.UTF_8);
- mockClient.reset()
+ mockClient
.when(request().withMethod("GET").withPath("/resource"))
.respond(response()
.withHeader(new Header("Content-Type",
@@ -66,7 +66,7 @@
"korap-api-responses/" + jsonFilename),
StandardCharsets.UTF_8);
- mockClient.reset()
+ mockClient
.when(request().withMethod("GET").withPath("/search")
.withQueryStringParameter("q", query)
.withQueryStringParameter("ql", queryLanguage)
diff --git a/src/test/java/de/ids_mannheim/korap/test/KorapClientTest.java b/src/test/java/de/ids_mannheim/korap/test/KorapClientTest.java
index 30b5ec3..0206da7 100644
--- a/src/test/java/de/ids_mannheim/korap/test/KorapClientTest.java
+++ b/src/test/java/de/ids_mannheim/korap/test/KorapClientTest.java
@@ -13,6 +13,7 @@
import de.ids_mannheim.korap.sru.KorapResource;
import de.ids_mannheim.korap.sru.KorapResult;
import de.ids_mannheim.korap.sru.QueryLanguage;
+import eu.clarin.sru.server.SRUException;
/**
* The tests are based on the sample corpus from the Goethe corpus.
@@ -29,10 +30,11 @@
c = new KorapClient("http://localhost:1080", 25, 50);
}
- @Test
- public void testCQLQuery () throws HttpResponseException, IOException {
+ @Test
+ public void testCQLQuery ()
+ throws HttpResponseException, IOException, SRUException {
- createExpectationForSearch("der", "cql", "1.2", "50",
+ createExpectationForSearch("der", "cql", "1.2", "50",
"search-der.jsonld");
result = c.query("der", QueryLanguage.CQL, "1.2", 51, 1, null);
@@ -49,8 +51,9 @@
}
- @Test
- public void testOrQuery () throws HttpResponseException, IOException {
+ @Test
+ public void testOrQuery ()
+ throws HttpResponseException, IOException, SRUException {
createExpectationForSearch("(\"blaue\"|\"grüne\")", "fcsql", "2.0", "0",
"search-or.jsonld");
@@ -106,11 +109,14 @@
@Test
public void testRetrieveResource ()
throws HttpResponseException, Exception {
- createRetrieveResource();
+ createExpectationForRetrieveResource();
KorapResource[] resources = c.retrieveResources();
assertEquals(3, resources.length);
- assertEquals("WPD17", resources[0].getResourceId());
- assertEquals("WDD17", resources[1].getResourceId());
- assertEquals("WUD17", resources[2].getResourceId());
+ assertEquals("http://hdl.handle.net/10932/00-03B6-558F-4E10-6201-1",
+ resources[0].getResourceId());
+ assertEquals("http://hdl.handle.net/10932/00-03B6-558F-5EA0-6301-B",
+ resources[1].getResourceId());
+ assertEquals("http://hdl.handle.net/10932/00-03B6-558F-6EF0-6401-F",
+ resources[2].getResourceId());
}
}
diff --git a/src/test/java/de/ids_mannheim/korap/test/KorapSRUTest.java b/src/test/java/de/ids_mannheim/korap/test/KorapSRUTest.java
index ef6511a..d04b0f2 100644
--- a/src/test/java/de/ids_mannheim/korap/test/KorapSRUTest.java
+++ b/src/test/java/de/ids_mannheim/korap/test/KorapSRUTest.java
@@ -40,7 +40,7 @@
"korap-api-responses/search-fein.jsonld"),
StandardCharsets.UTF_8);
- mockClient.reset()
+ mockClient
.when(request().withMethod("GET").withPath("/search")
.withQueryStringParameter("q", "fein")
.withQueryStringParameter("ql", "cql")
@@ -53,6 +53,27 @@
"application/json; charset=utf-8"))
.withBody(searchResult).withStatusCode(200));
}
+
+ private void createExpectationForSearchFeinWithUnknownCq () throws IOException {
+ String searchResult = IOUtils.toString(
+ ClassLoader.getSystemResourceAsStream(
+ "korap-api-responses/search-fein-unknown-cq.jsonld"),
+ StandardCharsets.UTF_8);
+
+ mockClient
+ .when(request().withMethod("GET").withPath("/search")
+ .withQueryStringParameter("q", "fein")
+ .withQueryStringParameter("ql", "cql")
+ .withQueryStringParameter("v", "1.2")
+ .withQueryStringParameter("context", "sentence")
+ .withQueryStringParameter("count", "25")
+ .withQueryStringParameter("offset", "0")
+ .withQueryStringParameter("cq","corpusSigle=unknown"))
+ .respond(response()
+ .withHeader(new Header("Content-Type",
+ "application/json; charset=utf-8"))
+ .withBody(searchResult).withStatusCode(200));
+ }
private void createExpectationForMatchInfoFein () throws IOException {
String matchInfoResult = IOUtils.toString(
@@ -124,17 +145,45 @@
public void searchRetrieveWithResourceId ()
throws IOException, URISyntaxException, IllegalStateException,
SAXException, ParserConfigurationException {
- createExpectationForSearchFein();
+ createExpectationForRetrieveResource();
+ createExpectationForSearchFein();
createExpectationForMatchInfoFein();
ClientResponse response = resource()
.queryParam("operation", "searchRetrieve")
.queryParam("query", "fein").queryParam("version", "1.2")
- .queryParam("x-fcs-context", "GOE").get(ClientResponse.class);
+ .queryParam("x-fcs-context", "http://hdl.handle.net/10932/00-03B6-558F-4E10-6201-1").get(ClientResponse.class);
InputStream entity = response.getEntity(InputStream.class);
checkSearchRetrieveResponseSRUVersion1_2(entity);
}
+
+ @Test
+ public void searchRetrieveWithUnknownResourceId ()
+ throws IOException, URISyntaxException, IllegalStateException,
+ SAXException, ParserConfigurationException {
+
+ createExpectationForRetrieveResource();
+ createExpectationForSearchFeinWithUnknownCq();
+ createExpectationForMatchInfoFein();
+
+ ClientResponse response = resource()
+ .queryParam("operation", "searchRetrieve")
+ .queryParam("query", "fein").queryParam("version", "1.2")
+ .queryParam("x-fcs-context", "unknown").get(ClientResponse.class);
+
+ InputStream entity = response.getEntity(InputStream.class);
+ docBuilder = factory.newDocumentBuilder();
+ Document doc = docBuilder.parse(entity);
+
+ NodeList diagnosticUri = doc.getElementsByTagName("diag:uri");
+ assertEquals("info:srw/diagnostic/1/1",
+ diagnosticUri.item(0).getTextContent());
+
+ NodeList diagnosticMessage = doc.getElementsByTagName("diag:message");
+ assertEquals("Virtual corpus with pid: unknown is not found.",
+ diagnosticMessage.item(0).getTextContent());
+ }
@Test
public void searchRetrieveFCSQLTest ()
@@ -279,7 +328,7 @@
throws URISyntaxException, ClientProtocolException, IOException,
IllegalStateException, SAXException {
- createRetrieveResource();
+ createExpectationForRetrieveResource();
ClientResponse response = resource().queryParam("operation", "explain")
.queryParam("x-fcs-endpoint-description", "true")
diff --git a/src/test/resources/korap-api-responses/resources.json b/src/test/resources/korap-api-responses/resources.json
index 64eb69c..929572a 100644
--- a/src/test/resources/korap-api-responses/resources.json
+++ b/src/test/resources/korap-api-responses/resources.json
@@ -1,59 +1,65 @@
[
- {
- "resourceId": "WPD17",
- "titles": {
- "de": "Deutsche Wikipedia Artikel 2017",
- "en": "German Wikipedia Articles 2017"
- },
- "description": "A collection of articles of German Wikipedia from July 1st, 2017.",
- "languages": ["deu"],
- "layers": {
- "18": "marmot/m",
- "19": "marmot/p",
- "23": "opennlp/p",
- "11": "corenlp/p",
- "27": "tt/l",
- "28": "tt/p"
- },
- "institution" : "Wikimedia Foundation",
- "landingPage" : "https://korap.ids-mannheim.de?corpusSigle=WPD17"
+ {
+ "resourceId": "http://hdl.handle.net/10932/00-03B6-558F-4E10-6201-1",
+ "titles": {
+ "de": "Deutsche Wikipedia Artikel 2017",
+ "en": "German Wikipedia Articles 2017"
},
- {
- "resourceId": "WDD17",
- "titles": {
- "de": "Deutsche Wikipedia-Diskussionskorpus 2017",
- "en": "German Wikipedia talk corpus 2017"
- },
- "description": "A collection of talk pages of German Wikipedia from July 1st, 2017.",
- "languages": ["deu"],
- "layers": {
- "18": "marmot/m",
- "19": "marmot/p",
- "23": "opennlp/p",
- "11": "corenlp/p",
- "27": "tt/l",
- "28": "tt/p"
- },
- "institution" : "Wikimedia Foundation",
- "landingPage" : "https://korap.ids-mannheim.de?corpusSigle=WDD17"
+ "description": "A collection of articles of German Wikipedia from July 1st, 2017.",
+ "languages": [
+ "deu"
+ ],
+ "layers": {
+ "1": "opennlp/p",
+ "14": "marmot/m",
+ "15": "marmot/p",
+ "29": "corenlp/p",
+ "30": "tt/l",
+ "31": "tt/p"
},
- {
- "resourceId": "WUD17",
- "titles": {
- "de": "Deutsche Wikipedia-Benutzerdiskussionskorpus 2017",
- "en": "German Wikipedia talk corpus 2017"
- },
- "description": "A collection of user talk pages of German Wikipedia from July 1st, 2017.",
- "languages": ["deu"],
- "layers": {
- "18": "marmot/m",
- "19": "marmot/p",
- "23": "opennlp/p",
- "11": "corenlp/p",
- "27": "tt/l",
- "28": "tt/p"
- },
- "institution" : "Wikimedia Foundation",
- "landingPage" : "https://korap.ids-mannheim.de?corpusSigle=WUD17"
- }
+ "institution": "IDS Mannheim",
+ "landingPage": "https://korap.ids-mannheim.de?cq=corpusSigle=WPD17"
+ },
+ {
+ "resourceId": "http://hdl.handle.net/10932/00-03B6-558F-5EA0-6301-B",
+ "titles": {
+ "de": "Deutsche Wikipedia-Diskussionskorpus 2017",
+ "en": "German Wikipedia talk corpus 2017"
+ },
+ "description": "A collection of talk pages of German Wikipedia from July 1st, 2017.",
+ "languages": [
+ "deu"
+ ],
+ "layers": {
+ "1": "opennlp/p",
+ "14": "marmot/m",
+ "15": "marmot/p",
+ "29": "corenlp/p",
+ "30": "tt/l",
+ "31": "tt/p"
+ },
+ "institution": "IDS Mannheim",
+ "landingPage": "https://korap.ids-mannheim.de?cq=corpusSigle=WDD17"
+ },
+ {
+ "resourceId": "http://hdl.handle.net/10932/00-03B6-558F-6EF0-6401-F",
+ "titles": {
+ "de": "Deutsche Wikipedia-Benutzerdiskussionskorpus 2017",
+ "en": "German Wikipedia user talk corpus 2017"
+ },
+ "description": "A collection of user talk pages of German Wikipedia from July 1st, 2017.",
+ "languages": [
+ "deu"
+ ],
+ "layers": {
+ "1": "opennlp/p",
+ "14": "marmot/m",
+ "15": "marmot/p",
+ "29": "corenlp/p",
+ "30": "tt/l",
+ "31": "tt/p"
+ },
+ "institution": "IDS Mannheim",
+ "landingPage": "https://korap.ids-mannheim.de?cq=corpusSigle=WUD17"
+ }
]
\ No newline at end of file
diff --git a/src/test/resources/korap-api-responses/search-fein-unknown-cq.jsonld b/src/test/resources/korap-api-responses/search-fein-unknown-cq.jsonld
new file mode 100644
index 0000000..f886d50
--- /dev/null
+++ b/src/test/resources/korap-api-responses/search-fein-unknown-cq.jsonld
@@ -0,0 +1,95 @@
+{
+ "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
+ "meta": {
+ "count": 25,
+ "startIndex": 0,
+ "timeout": 10000,
+ "context": {
+ "left": [
+ "token",
+ 6
+ ],
+ "right": [
+ "token",
+ 6
+ ]
+ },
+ "fields": [
+ "ID",
+ "UID",
+ "textSigle",
+ "corpusID",
+ "author",
+ "title",
+ "subTitle",
+ "textClass",
+ "pubPlace",
+ "pubDate",
+ "availability",
+ "layerInfos",
+ "docSigle",
+ "corpusSigle"
+ ],
+ "version": "0.63.3",
+ "benchmark": "0.19016714 s",
+ "totalResources": 0,
+ "totalResults": 0,
+ "serialQuery": "tokens:s:ich",
+ "itemsPerPage": 25
+ },
+ "query": {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "match": "match:eq",
+ "layer": "orth",
+ "key": "ich",
+ "foundry": "opennlp",
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "src": "Kustvakt",
+ "editor": "Kustvakt",
+ "operation": "operation:injection",
+ "scope": "foundry",
+ "_comment": "Default foundry has been added."
+ }
+ ]
+ }
+ },
+ "collection": {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "match": "match:eq",
+ "type": "type:regex",
+ "value": "CC.*",
+ "key": "availability"
+ },
+ {
+ "@type": "koral:doc",
+ "match": "match:eq",
+ "value": "unknown",
+ "key": "corpusSigle"
+ }
+ ],
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "src": "Kustvakt",
+ "editor": "Kustvakt",
+ "operation": "operation:override",
+ "original": {
+ "@type": "koral:doc",
+ "match": "match:eq",
+ "value": "unknown",
+ "key": "corpusSigle"
+ },
+ "_comment": "Free corpus access policy has been added."
+ }
+ ]
+ },
+ "matches": []
+}
\ No newline at end of file