Add cache for virtual corpus access (#63).
Change-Id: I2b8ccc1602e12748d33f01791466f34b1abc6caf
diff --git a/ChangeLog b/ChangeLog
index 0153d19..70ec602 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,7 @@
- handle resource pid and resolve resources (virtual corpora) using map (#62)
- handle empty description.
+- Add cache for virtual corpus access.
1.0.7-SNAPSHOT
diff --git a/src/main/java/de/ids_mannheim/korap/sru/KorapClient.java b/src/main/java/de/ids_mannheim/korap/sru/KorapClient.java
index 1c313c0..b86739d 100644
--- a/src/main/java/de/ids_mannheim/korap/sru/KorapClient.java
+++ b/src/main/java/de/ids_mannheim/korap/sru/KorapClient.java
@@ -6,6 +6,7 @@
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
+import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -56,7 +57,9 @@
(Logger) LoggerFactory.getLogger(KorapClient.class);
// pid : cq
- public static Map<String, String> virtualCorpora = new HashMap<>();
+ public static Map<String, String> virtualCorpusQueries = new HashMap<>();
+ // pid : access
+ public static Map<String, Boolean> virtualCorpusAccesses = new HashMap<>();
/**
* Constructs a KorapClient with the given number of records per
@@ -114,11 +117,16 @@
resources = objectMapper.readValue(jsonStream, KorapResource[].class);
// update vc map
- if (resources.length > virtualCorpora.size()) {
+ if (resources.length > virtualCorpusQueries.size()) {
for (KorapResource r : resources) {
String[] urlParts = r.getLandingPage().split("cq=");
+ boolean freeAccess = false;
+ if (r.getRequiredAccess().equals("FREE")) {
+ freeAccess = true;
+ }
if (urlParts.length > 1 && !urlParts[1].isEmpty()) {
- virtualCorpora.put(r.getResourceId(), urlParts[1]);
+ virtualCorpusQueries.put(r.getResourceId(), urlParts[1]);
+ virtualCorpusAccesses.put(r.getResourceId(), freeAccess);
}
}
}
@@ -196,16 +204,10 @@
try {
result = objectMapper.readValue(jsonStream, KorapResult.class);
}
-// catch (IOException e) {
-// throw new IOException("Failed processing response.");
-// }
finally {
jsonStream.close();
}
}
-// catch (IOException e) {
-// throw new IOException("Failed executing HTTP request.", e);
-// }
finally {
if (response != null) {
response.close();
@@ -282,7 +284,7 @@
* @param query
* a query string
* @param queryLanguage
- * the query language
+ * the query languagegetTotalResults()
* @param version
* the query language version
* @param startRecord
@@ -309,55 +311,50 @@
maximumRecords = defaultMaxRecords;
}
- String corpusQuery = resolveVirtualCorpus(corpora);
-// if (corpora != null && corpora.length > 0) {
-// for (int i = 0; i < corpora.length; i++) {
-// corpusQuery += "corpusSigle=" + corpora[i];
-// if (i != corpora.length - 1) {
-// corpusQuery += "|";
-// }
-// }
-// }
-
- URI uri = createSearchUri(query, queryLanguage, version, startRecord,
- maximumRecords, corpusQuery, false);
-
- logger.info("Query URI: " + uri.toString());
- HttpGet request = new HttpGet(uri);
- return request;
- }
-
- private String resolveVirtualCorpus (String[] corpora)
- throws URISyntaxException, IOException, SRUException {
- String corpusQuery = "";
+ String corpusQuery = "";
+ boolean freeAccess = true;
if (corpora != null && corpora.length > 0) {
for (int i = 0; i < corpora.length; i++) {
String pid = corpora[i];
- String cq = virtualCorpora.get(pid);
- if (cq != null) {
- corpusQuery += " & " + cq;
- }
- else {
+ String cq = virtualCorpusQueries.get(pid);
+
+ if (cq == null) {
retrieveResources();
- cq = virtualCorpora.get(pid);
- if (cq != null) {
- corpusQuery += " & " + cq;
- }
- else {
+ cq = virtualCorpusQueries.get(pid);
+ if (cq == null) {
throw new SRUException(
SRUConstants.SRU_GENERAL_SYSTEM_ERROR,
"Virtual corpus with pid: " + pid
+ " is not found.");
}
- }
+ }
+ cq = URLDecoder.decode(cq, "utf-8");
+ if (i == 0) {
+ corpusQuery = cq;
+ }
+ else {
+ corpusQuery += " & " + cq;
+ }
+
+ if (!virtualCorpusAccesses.get(pid)) {
+ freeAccess = false;
+ }
}
}
- return corpusQuery;
- }
+
+ URI uri = createSearchUri(query, queryLanguage, version, startRecord,
+ maximumRecords, corpusQuery, freeAccess);
+
+// logger.info("Query URI: " + uri.toString());
+ System.out.println(uri.toString());
+ HttpGet request = new HttpGet(uri);
+ return request;
+ }
+
private URI createSearchUri (String query, QueryLanguage queryLanguage,
String version, int startRecord, int maximumRecords,
- String corpusQuery, boolean authenticationRequired)
+ String corpusQuery, boolean freeAccess)
throws URISyntaxException {
List<NameValuePair> params = new ArrayList<NameValuePair>();
@@ -373,7 +370,7 @@
params.add(
new BasicNameValuePair("offset", String.valueOf(startRecord)));
- if (authenticationRequired) {
+ if (!freeAccess) {
params.add(
new BasicNameValuePair("access-rewrite-disabled", "true"));
}
diff --git a/src/main/java/de/ids_mannheim/korap/sru/KorapResource.java b/src/main/java/de/ids_mannheim/korap/sru/KorapResource.java
index 43eee16..2f1c892 100644
--- a/src/main/java/de/ids_mannheim/korap/sru/KorapResource.java
+++ b/src/main/java/de/ids_mannheim/korap/sru/KorapResource.java
@@ -14,6 +14,7 @@
private Map<Integer, String> layers;
private String institution;
private String landingPage;
+ private String requiredAccess;
public String getResourceId () {
return resourceId;
@@ -57,5 +58,11 @@
public void setLandingPage (String landingPage) {
this.landingPage = landingPage;
}
+ public String getRequiredAccess () {
+ return requiredAccess;
+ }
+ public void setRequiredAccess (String requiredAccess) {
+ this.requiredAccess = requiredAccess;
+ }
}
diff --git a/src/test/java/de/ids_mannheim/korap/test/BaseTest.java b/src/test/java/de/ids_mannheim/korap/test/BaseTest.java
index 3fc8726..1dbf484 100644
--- a/src/test/java/de/ids_mannheim/korap/test/BaseTest.java
+++ b/src/test/java/de/ids_mannheim/korap/test/BaseTest.java
@@ -58,27 +58,54 @@
.withBody(korapResources).withStatusCode(200));
}
- protected void createExpectationForSearch (String query,
- String queryLanguage, String version, String offset,
- String jsonFilename) throws IOException {
- String searchResult = IOUtils.toString(
- ClassLoader.getSystemResourceAsStream(
- "korap-api-responses/" + jsonFilename),
- StandardCharsets.UTF_8);
+ protected void createExpectationForSearch (String query,
+ String queryLanguage, String version, String offset,
+ String jsonFilename) throws IOException {
+ String searchResult = IOUtils.toString(
+ ClassLoader.getSystemResourceAsStream(
+ "korap-api-responses/" + jsonFilename),
+ StandardCharsets.UTF_8);
- mockClient
+ mockClient
+ .when(request().withMethod("GET").withPath("/search")
+ .withQueryStringParameter("q", query)
+ .withQueryStringParameter("ql", queryLanguage)
+ .withQueryStringParameter("v", version)
+ .withQueryStringParameter("context", "sentence")
+ .withQueryStringParameter("count", "1")
+ .withQueryStringParameter("offset", offset))
+ .respond(response()
+ .withHeader(new Header("Content-Type",
+ "application/json; charset=utf-8"))
+ .withBody(searchResult).withStatusCode(200));
+
+ }
+
+ protected void createExpectationForSearch (String query,
+ String queryLanguage, String version, String offset, String cq,
+ boolean accessRewriteDisabled, String jsonFilename)
+ throws IOException {
+ String searchResult = IOUtils.toString(
+ ClassLoader.getSystemResourceAsStream(
+ "korap-api-responses/" + jsonFilename),
+ StandardCharsets.UTF_8);
+
+ mockClient
.when(request().withMethod("GET").withPath("/search")
.withQueryStringParameter("q", query)
.withQueryStringParameter("ql", queryLanguage)
+ .withQueryStringParameter("cq", cq)
.withQueryStringParameter("v", version)
.withQueryStringParameter("context", "sentence")
.withQueryStringParameter("count", "1")
- .withQueryStringParameter("offset", offset))
+ .withQueryStringParameter("offset", offset)
+ .withQueryStringParameter("access-rewrite-disabled",
+ String.valueOf(accessRewriteDisabled)))
.respond(response()
.withHeader(new Header("Content-Type",
"application/json; charset=utf-8"))
.withBody(searchResult).withStatusCode(200));
- }
+ }
protected void createExpectationForMatchInfo (String jsonFilename,
String uriPath) throws IOException {
diff --git a/src/test/java/de/ids_mannheim/korap/test/KorapClientTest.java b/src/test/java/de/ids_mannheim/korap/test/KorapClientTest.java
index 0206da7..94e19e9 100644
--- a/src/test/java/de/ids_mannheim/korap/test/KorapClientTest.java
+++ b/src/test/java/de/ids_mannheim/korap/test/KorapClientTest.java
@@ -7,6 +7,7 @@
import org.apache.http.client.HttpResponseException;
import org.junit.Test;
+import org.mockserver.model.HttpRequest;
import de.ids_mannheim.korap.sru.KorapClient;
import de.ids_mannheim.korap.sru.KorapMatch;
@@ -119,4 +120,16 @@
assertEquals("http://hdl.handle.net/10932/00-03B6-558F-6EF0-6401-F",
resources[2].getResourceId());
}
+
+ @Test
+ public void testSearchRequest () throws IOException, SRUException {
+ createExpectationForRetrieveResource();
+ createExpectationForSearch("\"Freizeit\"", "fcsql", "2.0", "0",
+ "textType = /.*[Rr]oman/", true,
+ "search-public-metadata.jsonld");
+ KorapResult result = c.query("\"Freizeit\"", QueryLanguage.FCSQL, "2.0",
+ 1, 1, new String[] { "Romane" });
+ assertEquals(702, result.getTotalResults());
+ assertEquals(0, result.getMatchSize());
+ }
}
diff --git a/src/test/resources/korap-api-responses/search-public-metadata.jsonld b/src/test/resources/korap-api-responses/search-public-metadata.jsonld
new file mode 100644
index 0000000..6ca6a1d
--- /dev/null
+++ b/src/test/resources/korap-api-responses/search-public-metadata.jsonld
@@ -0,0 +1,210 @@
+{
+ "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
+ "meta": {
+ "count": 1,
+ "startIndex": 0,
+ "timeout": 90000,
+ "context": "base/s:s",
+ "fields": [
+ "ID",
+ "UID",
+ "textSigle",
+ "corpusID",
+ "author",
+ "title",
+ "subTitle",
+ "textClass",
+ "pubPlace",
+ "pubDate",
+ "availability",
+ "layerInfos",
+ "docSigle",
+ "corpusSigle"
+ ],
+ "version": "0.64.0",
+ "benchmark": "2.79172047 s",
+ "totalResources": -1,
+ "totalResults": 702,
+ "serialQuery": "SpanMultiTermQueryWrapper(tokens:/s:Freizeit/)",
+ "itemsPerPage": 1
+ },
+ "query": {
+ "@type": "koral:token",
+ "wrap": {
+ "@type": "koral:term",
+ "key": "Freizeit",
+ "foundry": "opennlp",
+ "layer": "orth",
+ "type": "type:regex",
+ "match": "match:eq"
+ }
+ },
+ "collection": {
+ "@type": "koral:docGroup",
+ "operation": "operation:and",
+ "operands": [
+ {
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "match": "match:eq",
+ "type": "type:regex",
+ "value": "CC.*",
+ "key": "availability"
+ },
+ {
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "match": "match:eq",
+ "type": "type:regex",
+ "value": "ACA.*",
+ "key": "availability"
+ },
+ {
+ "operands": [
+ {
+ "@type": "koral:doc",
+ "match": "match:eq",
+ "type": "type:regex",
+ "value": "QAO-NC",
+ "key": "availability"
+ },
+ {
+ "@type": "koral:doc",
+ "match": "match:eq",
+ "type": "type:regex",
+ "value": "QAO.*",
+ "key": "availability"
+ }
+ ],
+ "@type": "koral:docGroup",
+ "operation": "operation:or"
+ }
+ ],
+ "@type": "koral:docGroup",
+ "operation": "operation:or"
+ }
+ ],
+ "@type": "koral:docGroup",
+ "operation": "operation:or"
+ },
+ {
+ "@type": "koral:doc",
+ "match": "match:eq",
+ "type": "type:regex",
+ "value": ".*[Rr]oman",
+ "key": "textType"
+ }
+ ],
+ "rewrites": [
+ {
+ "@type": "koral:rewrite",
+ "src": "Kustvakt",
+ "editor": "Kustvakt",
+ "operation": "operation:override",
+ "original": {
+ "@type": "koral:doc",
+ "match": "match:eq",
+ "type": "type:regex",
+ "value": ".*[Rr]oman",
+ "key": "textType"
+ },
+ "_comment": "All corpus access policy has been added."
+ }
+ ]
+ },
+ "matches": [
+ {
+ "@context": "http://korap.ids-mannheim.de/ns/KoralQuery/v0.3/context.jsonld",
+ "meta": {},
+ "hasSnippet": false,
+ "hasTokens": false,
+ "matchID": "match-GR1/TL1/09008-p9047-9048x_tCjqTBKUA_ThfzoAYf4uSWtbbm2eLBIvf0CNfdYtNWo",
+ "fields": [
+ {
+ "@type": "koral:field",
+ "key": "ID"
+ },
+ {
+ "@type": "koral:field",
+ "key": "textSigle",
+ "type": "type:string",
+ "value": "GR1/TL1/09008"
+ },
+ {
+ "@type": "koral:field",
+ "key": "corpusID"
+ },
+ {
+ "@type": "koral:field",
+ "key": "author",
+ "type": "type:text",
+ "value": "de Groot, Anne"
+ },
+ {
+ "@type": "koral:field",
+ "key": "title",
+ "type": "type:text",
+ "value": "Dein Vater wird uns liebgewinnen"
+ },
+ {
+ "@type": "koral:field",
+ "key": "subTitle",
+ "type": "type:text",
+ "value": "Delias Leidensweg"
+ },
+ {
+ "@type": "koral:field",
+ "key": "textClass"
+ },
+ {
+ "@type": "koral:field",
+ "key": "pubPlace",
+ "type": "type:string",
+ "value": "Hamburg"
+ },
+ {
+ "@type": "koral:field",
+ "key": "pubDate",
+ "type": "type:date",
+ "value": "1990"
+ },
+ {
+ "@type": "koral:field",
+ "key": "availability",
+ "type": "type:string",
+ "value": "QAO-NC"
+ },
+ {
+ "@type": "koral:field",
+ "key": "layerInfos",
+ "type": "type:store",
+ "value": "corenlp/c=spans corenlp/p=tokens corenlp/s=spans dereko/s=spans malt/d=rels marmot/m=tokens marmot/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens"
+ },
+ {
+ "@type": "koral:field",
+ "key": "docSigle",
+ "type": "type:string",
+ "value": "GR1/TL1"
+ },
+ {
+ "@type": "koral:field",
+ "key": "corpusSigle",
+ "type": "type:string",
+ "value": "GR1"
+ }
+ ],
+ "textSigle": "GR1/TL1/09008",
+ "author": "de Groot, Anne",
+ "title": "Dein Vater wird uns liebgewinnen",
+ "subTitle": "Delias Leidensweg",
+ "pubPlace": "Hamburg",
+ "pubDate": "1990",
+ "availability": "QAO-NC",
+ "layerInfos": "corenlp/c=spans corenlp/p=tokens corenlp/s=spans dereko/s=spans malt/d=rels marmot/m=tokens marmot/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens",
+ "docSigle": "GR1/TL1",
+ "corpusSigle": "GR1"
+ }
+ ]
+}
\ No newline at end of file