blob: ea6d5c426ebae25d3c6be6350164501e8c30e2b0 [file] [log] [blame]
Marc Kupietzfd9e7492019-11-08 15:45:18 +01001################################################################################
2# Use setClassUnion to define the unholy NULL-data union as a virtual class.
3################################################################################
4setClassUnion("characterOrNULL", c("character", "NULL"))
Marc Kupietza4675722022-02-23 23:55:15 +01005setClassUnion("listOrNULL", c("list", "NULL"))
Marc Kupietzfd9e7492019-11-08 15:45:18 +01006
Marc Kupietze95108e2019-09-18 13:23:58 +02007#' Class KorAPConnection
Marc Kupietz25aebc32019-09-16 18:40:50 +02008#'
Marc Kupietz67edcb52021-09-20 21:54:24 +02009#' `KorAPConnection` objects represent the connection to a KorAP server.
10#' New `KorAPConnection` objects can be created by `new("KorAPConnection")`.
Marc Kupietze95108e2019-09-18 13:23:58 +020011#'
Marc Kupietz0a96b282019-10-01 11:05:31 +020012#' @import R.cache
Marc Kupietze95108e2019-09-18 13:23:58 +020013#' @import utils
14#' @import methods
Marc Kupietza81343d2022-09-06 12:32:10 +020015#' @slot KorAPUrl URL of the web user interface of the KorAP server used in the connection.
16#' @slot apiVersion requested KorAP API version.
17#' @slot indexRevision indexRevision code as reported from API via `X-Index-Revision` HTTP header.
18#' @slot apiUrl full URL of API including version.
19#' @slot accessToken OAuth2 access token.
20#' @slot userAgent user agent string used for connection the API.
21#' @slot timeout tineout in seconds for API requests (this does not influence server internal timeouts)
22#' @slot verbose logical that decides whether operations will default to be verbose.
23#' @slot cache logical that decides if API calls are cached locally.
24#' @slot welcome list containing HTTP response received from KorAP server welcome function.
25
Marc Kupietze95108e2019-09-18 13:23:58 +020026#' @export
Marc Kupietza4675722022-02-23 23:55:15 +010027KorAPConnection <- setClass("KorAPConnection", slots=c(KorAPUrl="character", apiVersion="character", indexRevision="characterOrNULL", apiUrl="character", accessToken="characterOrNULL", userAgent="character", timeout="numeric", verbose="logical", cache="logical", welcome="listOrNULL"))
Marc Kupietze95108e2019-09-18 13:23:58 +020028
29#' @param .Object KorAPConnection object
Marc Kupietza81343d2022-09-06 12:32:10 +020030#' @param KorAPUrl URL of the web user interface of the KorAP server instance you want to access.
Marc Kupietze95108e2019-09-18 13:23:58 +020031#' @param apiVersion which version of KorAP's API you want to connect to.
32#' @param apiUrl URL of the KorAP web service.
Marc Kupietz132f0052023-04-16 14:23:05 +020033#' @param accessToken OAuth2 access token. For queries on corpus parts with restricted
34#' access (e.g. textual queries on IPR protected data), you need to authorize
35#' your application with an access token.
36#' How to obtain an access token for the DeReKo KorAP instance is explained in the
37#' [authorization section](https://github.com/KorAP/RKorAPClient#authorization)
38#' of the RKorAPClient Readme on GitHub.
39#'
40#' To use authorization based on an access token
41#' in subsequent queries, initialize your KorAP connection with:
42#'
43#' ```
44#' kco <- new("KorAPConnection", accessToken="<access token>")
45#' ```
46#'
Marc Kupietz4862b862019-11-07 10:13:53 +010047#' In order to make the API
Marc Kupietz67edcb52021-09-20 21:54:24 +020048#' token persistent for the currently used `KorAPUrl` (you can have one
Marc Kupietz132f0052023-04-16 14:23:05 +020049#' token per KorAPUrl / KorAP server instance), use:
50#'
51#' ```
52#' persistAccessToken(kco)
53#' ```
54#'
55#' This will store it in your keyring using the
Marc Kupietz67edcb52021-09-20 21:54:24 +020056#' [keyring()] package. Subsequent new("KorAPConnection") calls will
Marc Kupietz4862b862019-11-07 10:13:53 +010057#' then automatically retrieve the token from your keying. To stop using a
Marc Kupietz67edcb52021-09-20 21:54:24 +020058#' persisted token, call `clearAccessToken(kco)`. Please note that for
Marc Kupietz4862b862019-11-07 10:13:53 +010059#' DeReKo, authorized queries will behave differently inside and outside the
60#' IDS, because of the special license situation. This concerns also cached
61#' results which do not take into account from where a request was issued. If
Marc Kupietz67edcb52021-09-20 21:54:24 +020062#' you experience problems or unexpected results, please try `kco <-
63#' new("KorAPConnection", cache=FALSE)` or use
64#' [clearCache()] to clear the cache completely.
Marc Kupietz132f0052023-04-16 14:23:05 +020065#'
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +020066#' @param userAgent user agent string.
Marc Kupietza81343d2022-09-06 12:32:10 +020067#' @param timeout tineout in seconds for API requests (this does not influence server internal timeouts).
68#' @param verbose logical that decides whether following operations will default to
Marc Kupietz4862b862019-11-07 10:13:53 +010069#' be verbose.
Marc Kupietza81343d2022-09-06 12:32:10 +020070#' @param cache logical that decides if API calls are cached locally. You can clear
Marc Kupietz67edcb52021-09-20 21:54:24 +020071#' the cache with [clearCache()].
72#' @return [KorAPConnection()] object that can be used e.g. with
73#' [corpusQuery()]
Marc Kupietze95108e2019-09-18 13:23:58 +020074#'
75#' @examples
Marc Kupietz6ae76052021-09-21 10:34:00 +020076#' \dontrun{
77#'
Marc Kupietz5a519822019-09-20 21:43:52 +020078#' kcon <- new("KorAPConnection", verbose = TRUE)
Marc Kupietze95108e2019-09-18 13:23:58 +020079#' kq <- corpusQuery(kcon, "Ameisenplage")
Marc Kupietz5a519822019-09-20 21:43:52 +020080#' kq <- fetchAll(kq)
Marc Kupietz05b22772020-02-18 21:58:42 +010081#' }
Marc Kupietz7915dc42019-09-12 17:44:58 +020082#'
Marc Kupietz4862b862019-11-07 10:13:53 +010083#' \dontrun{
Marc Kupietz6ae76052021-09-21 10:34:00 +020084#'
Marc Kupietzb956b812019-11-25 17:53:13 +010085#' kcon <- new("KorAPConnection", verbose = TRUE, accessToken="e739u6eOzkwADQPdVChxFg")
Marc Kupietz4862b862019-11-07 10:13:53 +010086#' kq <- corpusQuery(kcon, "Ameisenplage", metadataOnly=FALSE)
87#' kq <- fetchAll(kq)
88#' kq@collectedMatches$snippet
89#' }
Marc Kupietz7915dc42019-09-12 17:44:58 +020090#'
Marc Kupietze95108e2019-09-18 13:23:58 +020091#' @rdname KorAPConnection-class
Marc Kupietz632cbd42019-09-06 16:04:51 +020092#' @export
Marc Kupietze95108e2019-09-18 13:23:58 +020093setMethod("initialize", "KorAPConnection",
Marc Kupietz6a3185b2021-12-07 10:23:16 +010094 function(.Object, KorAPUrl = "https://korap.ids-mannheim.de/", apiVersion = 'v1.0', apiUrl, accessToken = getAccessToken(KorAPUrl), userAgent = "R-KorAP-Client", timeout=240, verbose = FALSE, cache = TRUE) {
Marc Kupietze95108e2019-09-18 13:23:58 +020095 .Object <- callNextMethod()
96 m <- regexpr("https?://[^?]+", KorAPUrl, perl = TRUE)
97 .Object@KorAPUrl <- regmatches(KorAPUrl, m)
98 if (!endsWith(.Object@KorAPUrl, '/')) {
99 .Object@KorAPUrl <- paste0(.Object@KorAPUrl, "/")
100 }
101 if (missing(apiUrl)) {
102 .Object@apiUrl = paste0(.Object@KorAPUrl, 'api/', apiVersion, '/')
103 } else {
104 .Object@apiUrl = apiUrl
105 }
Marc Kupietzb956b812019-11-25 17:53:13 +0100106 .Object@accessToken = accessToken
Marc Kupietze95108e2019-09-18 13:23:58 +0200107 .Object@apiVersion = apiVersion
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200108 .Object@userAgent = userAgent
109 .Object@timeout = timeout
Marc Kupietz5a519822019-09-20 21:43:52 +0200110 .Object@verbose = verbose
Marc Kupietz0a96b282019-10-01 11:05:31 +0200111 .Object@cache = cache
Marc Kupietza4675722022-02-23 23:55:15 +0100112 .Object@welcome = apiCall(.Object, .Object@apiUrl, json = FALSE, cache = FALSE, getHeaders = TRUE)
113 if (!is.null(.Object@welcome)) {
114 message(.Object@welcome[[2]])
115 }
116 .Object@indexRevision <- .Object@welcome[[1]][["x-index-revision"]]
Marc Kupietze95108e2019-09-18 13:23:58 +0200117 .Object
118 })
119
Marc Kupietza96537f2019-11-09 23:07:44 +0100120
Marc Kupietzb956b812019-11-25 17:53:13 +0100121accessTokenServiceName <- "RKorAPClientAccessToken"
Marc Kupietz4862b862019-11-07 10:13:53 +0100122
Marc Kupietzb956b812019-11-25 17:53:13 +0100123setGeneric("persistAccessToken", function(kco, ...) standardGeneric("persistAccessToken") )
Marc Kupietz4862b862019-11-07 10:13:53 +0100124
Marc Kupietzb956b812019-11-25 17:53:13 +0100125#' @aliases persistAccessToken
Marc Kupietz4862b862019-11-07 10:13:53 +0100126#' @rdname KorAPConnection-class
127#' @import keyring
128#' @export
129#' @examples
130#' \dontrun{
Marc Kupietz6ae76052021-09-21 10:34:00 +0200131#'
Marc Kupietzb956b812019-11-25 17:53:13 +0100132#' kco <- new("KorAPConnection", accessToken="e739u6eOzkwADQPdVChxFg")
133#' persistAccessToken(kco)
Marc Kupietz4862b862019-11-07 10:13:53 +0100134#' }
135#'
Marc Kupietzb956b812019-11-25 17:53:13 +0100136setMethod("persistAccessToken", "KorAPConnection", function(kco, accessToken = kco@accessToken) {
137 if (is.null(accessToken))
138 stop("It seems that you have not supplied any access token that could be persisted.", call. = FALSE)
Marc Kupietz4862b862019-11-07 10:13:53 +0100139
Marc Kupietzb956b812019-11-25 17:53:13 +0100140 kco@accessToken <- accessToken
141 key_set_with_value(accessTokenServiceName, kco@KorAPUrl, accessToken)
Marc Kupietz4862b862019-11-07 10:13:53 +0100142})
143
Marc Kupietzb956b812019-11-25 17:53:13 +0100144setGeneric("clearAccessToken", function(kco) standardGeneric("clearAccessToken") )
Marc Kupietz4862b862019-11-07 10:13:53 +0100145
Marc Kupietzb956b812019-11-25 17:53:13 +0100146#' @aliases clearAccessToken
Marc Kupietz4862b862019-11-07 10:13:53 +0100147#' @rdname KorAPConnection-class
148#' @import keyring
149#' @export
150#' @examples
151#' \dontrun{
Marc Kupietz6ae76052021-09-21 10:34:00 +0200152#'
Marc Kupietz4862b862019-11-07 10:13:53 +0100153#' kco <- new("KorAPConnection")
Marc Kupietzb956b812019-11-25 17:53:13 +0100154#' clearAccessToken(kco)
Marc Kupietz4862b862019-11-07 10:13:53 +0100155#' }
156#'
Marc Kupietzb956b812019-11-25 17:53:13 +0100157setMethod("clearAccessToken", "KorAPConnection", function(kco) {
158 key_delete(accessTokenServiceName, kco@KorAPUrl)
Marc Kupietz4862b862019-11-07 10:13:53 +0100159})
160
161#' @import keyring
Marc Kupietzb956b812019-11-25 17:53:13 +0100162getAccessToken <- function(KorAPUrl) {
Marc Kupietz59e449b2019-12-12 12:53:54 +0100163 keyList <- tryCatch(withCallingHandlers(key_list(service = accessTokenServiceName),
Marc Kupietzddce5562019-11-24 15:45:38 +0100164 warning = function(w) invokeRestart("muffleWarning"),
Marc Kupietz59e449b2019-12-12 12:53:54 +0100165 error = function(e) return(NULL)),
166 error = function(e) { })
Marc Kupietz01c24772021-07-14 18:27:36 +0200167 if (KorAPUrl %in% keyList$username)
Marc Kupietzb956b812019-11-25 17:53:13 +0100168 key_get(accessTokenServiceName, KorAPUrl)
Marc Kupietzfd9e7492019-11-08 15:45:18 +0100169 else
170 NULL
Marc Kupietz4862b862019-11-07 10:13:53 +0100171}
Marc Kupietz0a96b282019-10-01 11:05:31 +0200172
Marc Kupietz581a29b2021-09-04 20:51:04 +0200173
174warnIfNoAccessToken <- function(kco) {
175 if (is.null(kco@accessToken)) {
176 warning(
177 paste0(
178 "In order to receive KWICSs also from corpora with restricted licenses, you need an access token.\n",
179 "To generate an access token, login to KorAP and navigite to KorAP's OAuth settings <",
180 kco@KorAPUrl,
181 "settings/oauth#page-top>"
182 )
183 )
184 }
185}
186
Marc Kupietz0a96b282019-10-01 11:05:31 +0200187KorAPCacheSubDir <- function() {
Marc Kupietz70b2c722020-02-18 13:32:09 +0100188 paste0("RKorAPClient_",
189 gsub(
190 "^([0-9]+\\.[0-9]+).*",
191 "\\1",
192 packageVersion("RKorAPClient"),
193 perl = TRUE
194 ))
Marc Kupietz0a96b282019-10-01 11:05:31 +0200195}
196
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200197setGeneric("apiCall", function(kco, ...) standardGeneric("apiCall") )
198
Marc Kupietz4de53ec2019-10-04 09:12:00 +0200199## quiets concerns of R CMD check re: the .'s that appear in pipelines
200if(getRversion() >= "2.15.1") utils::globalVariables(c("."))
201
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200202#' @aliases apiCall
203#' @rdname KorAPConnection-class
204#' @param kco KorAPConnection object
205#' @param url request url
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100206#' @param json logical that determines if json result is expected
Marc Kupietzb49afa02020-06-04 15:50:29 +0200207#' @param getHeaders logical that determines if headers and content should be returned (as a list)
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200208#' @importFrom jsonlite fromJSON
Marc Kupietza4675722022-02-23 23:55:15 +0100209#' @importFrom curl has_internet
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200210#' @export
Marc Kupietza4675722022-02-23 23:55:15 +0100211setMethod("apiCall", "KorAPConnection", function(kco, url, json = TRUE, getHeaders = FALSE, cache = kco@cache, timeout=kco@timeout) {
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100212 result <- ""
213 if (cache) {
Marc Kupietzb49afa02020-06-04 15:50:29 +0200214 result <- R.cache::loadCache(dir=KorAPCacheSubDir(), key=list(url, kco@accessToken, kco@indexRevision))
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100215 if (!is.null(result)) {
216 if (!is.null(result$meta))
217 result$meta$cached <- "local"
218 return(result)
Marc Kupietz0a96b282019-10-01 11:05:31 +0200219 }
220 }
Marc Kupietza4675722022-02-23 23:55:15 +0100221
222 # From https://community.rstudio.com/t/internet-resources-should-fail-gracefully/49199/11
223 # Thanks to kvasilopoulos
224 try_GET <- function(x, ...) {
225 tryCatch(
226 GET(url = x, timeout(timeout), ...),
227 error = function(e) conditionMessage(e),
228 warning = function(w) conditionMessage(w)
229 )
230 }
231 is_response <- function(x) {
232 class(x) == "response"
233 }
234
235 # First check internet connection
236 if (!curl::has_internet()) {
237 message("No internet connection.")
238 return(invisible(NULL))
239 }
240
Marc Kupietzb956b812019-11-25 17:53:13 +0100241 if (!is.null(kco@accessToken))
Marc Kupietza4675722022-02-23 23:55:15 +0100242 resp <- try_GET(url, user_agent(kco@userAgent), timeout(timeout), add_headers(Authorization = paste("Bearer", kco@accessToken)))
Marc Kupietz4862b862019-11-07 10:13:53 +0100243 else
Marc Kupietza4675722022-02-23 23:55:15 +0100244 resp <- try_GET(url, user_agent(kco@userAgent), timeout(timeout))
245
246 if (!is_response(resp)) {
247 message(resp)
248 return(invisible(NULL))
249 }
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100250 if (json || status_code(resp) != 200) {
251 if (json && !http_type(resp) %in% c("application/json", "application/ld+json")) {
Marc Kupietz04814f22023-04-16 17:13:27 +0200252 message("API did not return json")
Marc Kupietza4675722022-02-23 23:55:15 +0100253 return(invisible(NULL))
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100254 }
Marc Kupietz04814f22023-04-16 17:13:27 +0200255
256 result <- tryCatch(jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8")), error = function(e) {return(NULL)})
257 if (!is.atomic(result) && !is.null(result$warnings)) {
Marc Kupietza4675722022-02-23 23:55:15 +0100258 msg <- if (nrow(result$warnings) > 1)
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100259 sapply(result$warnings, function(warning) paste(sprintf("%s: %s", warning[1], warning[2]), sep="\n"))
260 else
261 sprintf("%s: %s", result$warnings[1], result$warnings[2])
Marc Kupietza4675722022-02-23 23:55:15 +0100262 message(msg)
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100263 }
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200264 }
265 if (status_code(resp) != 200) {
Marc Kupietzb7d8c272020-01-31 18:51:50 +0100266 if (kco@verbose) {
267 cat("\n")
268 }
Marc Kupietza4675722022-02-23 23:55:15 +0100269 msg <- sprintf("%s KorAP API request failed", status_code(resp))
Marc Kupietz04814f22023-04-16 17:13:27 +0200270 if (!is.atomic(result) && !is.null(result$errors)) {
Marc Kupietza4675722022-02-23 23:55:15 +0100271 errormsg <- unlist(result$errors)
272 msg <- sprintf("%s: %s %s", msg, errormsg[5], errormsg[2])
Marc Kupietzb7d8c272020-01-31 18:51:50 +0100273 }
Marc Kupietza4675722022-02-23 23:55:15 +0100274 message(msg)
275 return(invisible(NULL))
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200276 }
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100277 if (!json) {
278 result <- content(resp, "text", encoding = "UTF-8")
Marc Kupietz0a96b282019-10-01 11:05:31 +0200279 }
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100280 if (cache) {
Marc Kupietzb49afa02020-06-04 15:50:29 +0200281 R.cache::saveCache(result, key = list(url, kco@accessToken, kco@indexRevision), dir = KorAPCacheSubDir(), compress = TRUE)
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100282 }
Marc Kupietzb49afa02020-06-04 15:50:29 +0200283 if (getHeaders) {
284 list(httr::headers(resp), result)
285 } else {
286 result
287 }
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200288})
289
Marc Kupietz0a96b282019-10-01 11:05:31 +0200290setGeneric("clearCache", function(kco) standardGeneric("clearCache") )
291
292#' @aliases clearCache
293#' @rdname KorAPConnection-class
294#' @export
295setMethod("clearCache", "KorAPConnection", function(kco) {
296 R.cache::clearCache(dir=KorAPCacheSubDir())
297})
298
Marc Kupietze95108e2019-09-18 13:23:58 +0200299#' @rdname KorAPConnection-class
300#' @param object KorAPConnection object
301#' @export
302setMethod("show", "KorAPConnection", function(object) {
303 cat("<KorAPConnection>", "\n")
304 cat("apiUrl: ", object@apiUrl, "\n")
305})
306
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200307##' Funtion KorAPConnection()
308##'
309##' Wrappper function for new("KorAPConnection")
310##'
311##' @rdname KorAPConnection-constructor
312##' @name KorAPConnection-constructor
313##' @export
314## XKorAPConnection <- function(...) new("KorAPConnection", ...)