blob: 8921e442638c6076ded8378dae924cd68960f1ec [file] [log] [blame]
Marc Kupietzfd9e7492019-11-08 15:45:18 +01001################################################################################
2# Use setClassUnion to define the unholy NULL-data union as a virtual class.
3################################################################################
4setClassUnion("characterOrNULL", c("character", "NULL"))
Marc Kupietza4675722022-02-23 23:55:15 +01005setClassUnion("listOrNULL", c("list", "NULL"))
Marc Kupietzfd9e7492019-11-08 15:45:18 +01006
Marc Kupietze95108e2019-09-18 13:23:58 +02007#' Class KorAPConnection
Marc Kupietz25aebc32019-09-16 18:40:50 +02008#'
Marc Kupietz67edcb52021-09-20 21:54:24 +02009#' `KorAPConnection` objects represent the connection to a KorAP server.
10#' New `KorAPConnection` objects can be created by `new("KorAPConnection")`.
Marc Kupietze95108e2019-09-18 13:23:58 +020011#'
Marc Kupietz0a96b282019-10-01 11:05:31 +020012#' @import R.cache
Marc Kupietze95108e2019-09-18 13:23:58 +020013#' @import utils
14#' @import methods
Marc Kupietza81343d2022-09-06 12:32:10 +020015#' @slot KorAPUrl URL of the web user interface of the KorAP server used in the connection.
16#' @slot apiVersion requested KorAP API version.
17#' @slot indexRevision indexRevision code as reported from API via `X-Index-Revision` HTTP header.
18#' @slot apiUrl full URL of API including version.
19#' @slot accessToken OAuth2 access token.
20#' @slot userAgent user agent string used for connection the API.
21#' @slot timeout tineout in seconds for API requests (this does not influence server internal timeouts)
22#' @slot verbose logical that decides whether operations will default to be verbose.
23#' @slot cache logical that decides if API calls are cached locally.
24#' @slot welcome list containing HTTP response received from KorAP server welcome function.
25
Marc Kupietze95108e2019-09-18 13:23:58 +020026#' @export
Marc Kupietza4675722022-02-23 23:55:15 +010027KorAPConnection <- setClass("KorAPConnection", slots=c(KorAPUrl="character", apiVersion="character", indexRevision="characterOrNULL", apiUrl="character", accessToken="characterOrNULL", userAgent="character", timeout="numeric", verbose="logical", cache="logical", welcome="listOrNULL"))
Marc Kupietze95108e2019-09-18 13:23:58 +020028
29#' @param .Object KorAPConnection object
Marc Kupietza81343d2022-09-06 12:32:10 +020030#' @param KorAPUrl URL of the web user interface of the KorAP server instance you want to access.
Marc Kupietze95108e2019-09-18 13:23:58 +020031#' @param apiVersion which version of KorAP's API you want to connect to.
32#' @param apiUrl URL of the KorAP web service.
Marc Kupietzb956b812019-11-25 17:53:13 +010033#' @param accessToken OAuth2 access token. To use authorization based on an access token
Marc Kupietz43a6ade2020-02-18 17:01:44 +010034#' in subsequent queries, initialize your KorAP connection with
Marc Kupietz67edcb52021-09-20 21:54:24 +020035#' `kco <- new("KorAPConnection", accessToken="<access token>")`.
Marc Kupietz4862b862019-11-07 10:13:53 +010036#' In order to make the API
Marc Kupietz67edcb52021-09-20 21:54:24 +020037#' token persistent for the currently used `KorAPUrl` (you can have one
Marc Kupietz4862b862019-11-07 10:13:53 +010038#' token per KorAPUrl / KorAP server instance), use
Marc Kupietz67edcb52021-09-20 21:54:24 +020039#' `persistAccessToken(kco)`. This will store it in your keyring using the
40#' [keyring()] package. Subsequent new("KorAPConnection") calls will
Marc Kupietz4862b862019-11-07 10:13:53 +010041#' then automatically retrieve the token from your keying. To stop using a
Marc Kupietz67edcb52021-09-20 21:54:24 +020042#' persisted token, call `clearAccessToken(kco)`. Please note that for
Marc Kupietz4862b862019-11-07 10:13:53 +010043#' DeReKo, authorized queries will behave differently inside and outside the
44#' IDS, because of the special license situation. This concerns also cached
45#' results which do not take into account from where a request was issued. If
Marc Kupietz67edcb52021-09-20 21:54:24 +020046#' you experience problems or unexpected results, please try `kco <-
47#' new("KorAPConnection", cache=FALSE)` or use
48#' [clearCache()] to clear the cache completely.
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +020049#' @param userAgent user agent string.
Marc Kupietza81343d2022-09-06 12:32:10 +020050#' @param timeout tineout in seconds for API requests (this does not influence server internal timeouts).
51#' @param verbose logical that decides whether following operations will default to
Marc Kupietz4862b862019-11-07 10:13:53 +010052#' be verbose.
Marc Kupietza81343d2022-09-06 12:32:10 +020053#' @param cache logical that decides if API calls are cached locally. You can clear
Marc Kupietz67edcb52021-09-20 21:54:24 +020054#' the cache with [clearCache()].
55#' @return [KorAPConnection()] object that can be used e.g. with
56#' [corpusQuery()]
Marc Kupietze95108e2019-09-18 13:23:58 +020057#'
58#' @examples
Marc Kupietz6ae76052021-09-21 10:34:00 +020059#' \dontrun{
60#'
Marc Kupietz5a519822019-09-20 21:43:52 +020061#' kcon <- new("KorAPConnection", verbose = TRUE)
Marc Kupietze95108e2019-09-18 13:23:58 +020062#' kq <- corpusQuery(kcon, "Ameisenplage")
Marc Kupietz5a519822019-09-20 21:43:52 +020063#' kq <- fetchAll(kq)
Marc Kupietz05b22772020-02-18 21:58:42 +010064#' }
Marc Kupietz7915dc42019-09-12 17:44:58 +020065#'
Marc Kupietz4862b862019-11-07 10:13:53 +010066#' \dontrun{
Marc Kupietz6ae76052021-09-21 10:34:00 +020067#'
Marc Kupietzb956b812019-11-25 17:53:13 +010068#' kcon <- new("KorAPConnection", verbose = TRUE, accessToken="e739u6eOzkwADQPdVChxFg")
Marc Kupietz4862b862019-11-07 10:13:53 +010069#' kq <- corpusQuery(kcon, "Ameisenplage", metadataOnly=FALSE)
70#' kq <- fetchAll(kq)
71#' kq@collectedMatches$snippet
72#' }
Marc Kupietz7915dc42019-09-12 17:44:58 +020073#'
Marc Kupietze95108e2019-09-18 13:23:58 +020074#' @rdname KorAPConnection-class
Marc Kupietz632cbd42019-09-06 16:04:51 +020075#' @export
Marc Kupietze95108e2019-09-18 13:23:58 +020076setMethod("initialize", "KorAPConnection",
Marc Kupietz6a3185b2021-12-07 10:23:16 +010077 function(.Object, KorAPUrl = "https://korap.ids-mannheim.de/", apiVersion = 'v1.0', apiUrl, accessToken = getAccessToken(KorAPUrl), userAgent = "R-KorAP-Client", timeout=240, verbose = FALSE, cache = TRUE) {
Marc Kupietze95108e2019-09-18 13:23:58 +020078 .Object <- callNextMethod()
79 m <- regexpr("https?://[^?]+", KorAPUrl, perl = TRUE)
80 .Object@KorAPUrl <- regmatches(KorAPUrl, m)
81 if (!endsWith(.Object@KorAPUrl, '/')) {
82 .Object@KorAPUrl <- paste0(.Object@KorAPUrl, "/")
83 }
84 if (missing(apiUrl)) {
85 .Object@apiUrl = paste0(.Object@KorAPUrl, 'api/', apiVersion, '/')
86 } else {
87 .Object@apiUrl = apiUrl
88 }
Marc Kupietzb956b812019-11-25 17:53:13 +010089 .Object@accessToken = accessToken
Marc Kupietze95108e2019-09-18 13:23:58 +020090 .Object@apiVersion = apiVersion
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +020091 .Object@userAgent = userAgent
92 .Object@timeout = timeout
Marc Kupietz5a519822019-09-20 21:43:52 +020093 .Object@verbose = verbose
Marc Kupietz0a96b282019-10-01 11:05:31 +020094 .Object@cache = cache
Marc Kupietza4675722022-02-23 23:55:15 +010095 .Object@welcome = apiCall(.Object, .Object@apiUrl, json = FALSE, cache = FALSE, getHeaders = TRUE)
96 if (!is.null(.Object@welcome)) {
97 message(.Object@welcome[[2]])
98 }
99 .Object@indexRevision <- .Object@welcome[[1]][["x-index-revision"]]
Marc Kupietze95108e2019-09-18 13:23:58 +0200100 .Object
101 })
102
Marc Kupietza96537f2019-11-09 23:07:44 +0100103
Marc Kupietzb956b812019-11-25 17:53:13 +0100104accessTokenServiceName <- "RKorAPClientAccessToken"
Marc Kupietz4862b862019-11-07 10:13:53 +0100105
Marc Kupietzb956b812019-11-25 17:53:13 +0100106setGeneric("persistAccessToken", function(kco, ...) standardGeneric("persistAccessToken") )
Marc Kupietz4862b862019-11-07 10:13:53 +0100107
Marc Kupietzb956b812019-11-25 17:53:13 +0100108#' @aliases persistAccessToken
Marc Kupietz4862b862019-11-07 10:13:53 +0100109#' @rdname KorAPConnection-class
110#' @import keyring
111#' @export
112#' @examples
113#' \dontrun{
Marc Kupietz6ae76052021-09-21 10:34:00 +0200114#'
Marc Kupietzb956b812019-11-25 17:53:13 +0100115#' kco <- new("KorAPConnection", accessToken="e739u6eOzkwADQPdVChxFg")
116#' persistAccessToken(kco)
Marc Kupietz4862b862019-11-07 10:13:53 +0100117#' }
118#'
Marc Kupietzb956b812019-11-25 17:53:13 +0100119setMethod("persistAccessToken", "KorAPConnection", function(kco, accessToken = kco@accessToken) {
120 if (is.null(accessToken))
121 stop("It seems that you have not supplied any access token that could be persisted.", call. = FALSE)
Marc Kupietz4862b862019-11-07 10:13:53 +0100122
Marc Kupietzb956b812019-11-25 17:53:13 +0100123 kco@accessToken <- accessToken
124 key_set_with_value(accessTokenServiceName, kco@KorAPUrl, accessToken)
Marc Kupietz4862b862019-11-07 10:13:53 +0100125})
126
Marc Kupietzb956b812019-11-25 17:53:13 +0100127setGeneric("clearAccessToken", function(kco) standardGeneric("clearAccessToken") )
Marc Kupietz4862b862019-11-07 10:13:53 +0100128
Marc Kupietzb956b812019-11-25 17:53:13 +0100129#' @aliases clearAccessToken
Marc Kupietz4862b862019-11-07 10:13:53 +0100130#' @rdname KorAPConnection-class
131#' @import keyring
132#' @export
133#' @examples
134#' \dontrun{
Marc Kupietz6ae76052021-09-21 10:34:00 +0200135#'
Marc Kupietz4862b862019-11-07 10:13:53 +0100136#' kco <- new("KorAPConnection")
Marc Kupietzb956b812019-11-25 17:53:13 +0100137#' clearAccessToken(kco)
Marc Kupietz4862b862019-11-07 10:13:53 +0100138#' }
139#'
Marc Kupietzb956b812019-11-25 17:53:13 +0100140setMethod("clearAccessToken", "KorAPConnection", function(kco) {
141 key_delete(accessTokenServiceName, kco@KorAPUrl)
Marc Kupietz4862b862019-11-07 10:13:53 +0100142})
143
144#' @import keyring
Marc Kupietzb956b812019-11-25 17:53:13 +0100145getAccessToken <- function(KorAPUrl) {
Marc Kupietz59e449b2019-12-12 12:53:54 +0100146 keyList <- tryCatch(withCallingHandlers(key_list(service = accessTokenServiceName),
Marc Kupietzddce5562019-11-24 15:45:38 +0100147 warning = function(w) invokeRestart("muffleWarning"),
Marc Kupietz59e449b2019-12-12 12:53:54 +0100148 error = function(e) return(NULL)),
149 error = function(e) { })
Marc Kupietz01c24772021-07-14 18:27:36 +0200150 if (KorAPUrl %in% keyList$username)
Marc Kupietzb956b812019-11-25 17:53:13 +0100151 key_get(accessTokenServiceName, KorAPUrl)
Marc Kupietzfd9e7492019-11-08 15:45:18 +0100152 else
153 NULL
Marc Kupietz4862b862019-11-07 10:13:53 +0100154}
Marc Kupietz0a96b282019-10-01 11:05:31 +0200155
Marc Kupietz581a29b2021-09-04 20:51:04 +0200156
157warnIfNoAccessToken <- function(kco) {
158 if (is.null(kco@accessToken)) {
159 warning(
160 paste0(
161 "In order to receive KWICSs also from corpora with restricted licenses, you need an access token.\n",
162 "To generate an access token, login to KorAP and navigite to KorAP's OAuth settings <",
163 kco@KorAPUrl,
164 "settings/oauth#page-top>"
165 )
166 )
167 }
168}
169
Marc Kupietz0a96b282019-10-01 11:05:31 +0200170KorAPCacheSubDir <- function() {
Marc Kupietz70b2c722020-02-18 13:32:09 +0100171 paste0("RKorAPClient_",
172 gsub(
173 "^([0-9]+\\.[0-9]+).*",
174 "\\1",
175 packageVersion("RKorAPClient"),
176 perl = TRUE
177 ))
Marc Kupietz0a96b282019-10-01 11:05:31 +0200178}
179
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200180setGeneric("apiCall", function(kco, ...) standardGeneric("apiCall") )
181
Marc Kupietz4de53ec2019-10-04 09:12:00 +0200182## quiets concerns of R CMD check re: the .'s that appear in pipelines
183if(getRversion() >= "2.15.1") utils::globalVariables(c("."))
184
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200185#' @aliases apiCall
186#' @rdname KorAPConnection-class
187#' @param kco KorAPConnection object
188#' @param url request url
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100189#' @param json logical that determines if json result is expected
Marc Kupietzb49afa02020-06-04 15:50:29 +0200190#' @param getHeaders logical that determines if headers and content should be returned (as a list)
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200191#' @importFrom jsonlite fromJSON
Marc Kupietza4675722022-02-23 23:55:15 +0100192#' @importFrom curl has_internet
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200193#' @export
Marc Kupietza4675722022-02-23 23:55:15 +0100194setMethod("apiCall", "KorAPConnection", function(kco, url, json = TRUE, getHeaders = FALSE, cache = kco@cache, timeout=kco@timeout) {
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100195 result <- ""
196 if (cache) {
Marc Kupietzb49afa02020-06-04 15:50:29 +0200197 result <- R.cache::loadCache(dir=KorAPCacheSubDir(), key=list(url, kco@accessToken, kco@indexRevision))
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100198 if (!is.null(result)) {
199 if (!is.null(result$meta))
200 result$meta$cached <- "local"
201 return(result)
Marc Kupietz0a96b282019-10-01 11:05:31 +0200202 }
203 }
Marc Kupietza4675722022-02-23 23:55:15 +0100204
205 # From https://community.rstudio.com/t/internet-resources-should-fail-gracefully/49199/11
206 # Thanks to kvasilopoulos
207 try_GET <- function(x, ...) {
208 tryCatch(
209 GET(url = x, timeout(timeout), ...),
210 error = function(e) conditionMessage(e),
211 warning = function(w) conditionMessage(w)
212 )
213 }
214 is_response <- function(x) {
215 class(x) == "response"
216 }
217
218 # First check internet connection
219 if (!curl::has_internet()) {
220 message("No internet connection.")
221 return(invisible(NULL))
222 }
223
Marc Kupietzb956b812019-11-25 17:53:13 +0100224 if (!is.null(kco@accessToken))
Marc Kupietza4675722022-02-23 23:55:15 +0100225 resp <- try_GET(url, user_agent(kco@userAgent), timeout(timeout), add_headers(Authorization = paste("Bearer", kco@accessToken)))
Marc Kupietz4862b862019-11-07 10:13:53 +0100226 else
Marc Kupietza4675722022-02-23 23:55:15 +0100227 resp <- try_GET(url, user_agent(kco@userAgent), timeout(timeout))
228
229 if (!is_response(resp)) {
230 message(resp)
231 return(invisible(NULL))
232 }
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100233 if (json || status_code(resp) != 200) {
234 if (json && !http_type(resp) %in% c("application/json", "application/ld+json")) {
Marc Kupietz04814f22023-04-16 17:13:27 +0200235 message("API did not return json")
Marc Kupietza4675722022-02-23 23:55:15 +0100236 return(invisible(NULL))
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100237 }
Marc Kupietz04814f22023-04-16 17:13:27 +0200238
239 result <- tryCatch(jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8")), error = function(e) {return(NULL)})
240 if (!is.atomic(result) && !is.null(result$warnings)) {
Marc Kupietza4675722022-02-23 23:55:15 +0100241 msg <- if (nrow(result$warnings) > 1)
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100242 sapply(result$warnings, function(warning) paste(sprintf("%s: %s", warning[1], warning[2]), sep="\n"))
243 else
244 sprintf("%s: %s", result$warnings[1], result$warnings[2])
Marc Kupietza4675722022-02-23 23:55:15 +0100245 message(msg)
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100246 }
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200247 }
248 if (status_code(resp) != 200) {
Marc Kupietzb7d8c272020-01-31 18:51:50 +0100249 if (kco@verbose) {
250 cat("\n")
251 }
Marc Kupietza4675722022-02-23 23:55:15 +0100252 msg <- sprintf("%s KorAP API request failed", status_code(resp))
Marc Kupietz04814f22023-04-16 17:13:27 +0200253 if (!is.atomic(result) && !is.null(result$errors)) {
Marc Kupietza4675722022-02-23 23:55:15 +0100254 errormsg <- unlist(result$errors)
255 msg <- sprintf("%s: %s %s", msg, errormsg[5], errormsg[2])
Marc Kupietzb7d8c272020-01-31 18:51:50 +0100256 }
Marc Kupietza4675722022-02-23 23:55:15 +0100257 message(msg)
258 return(invisible(NULL))
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200259 }
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100260 if (!json) {
261 result <- content(resp, "text", encoding = "UTF-8")
Marc Kupietz0a96b282019-10-01 11:05:31 +0200262 }
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100263 if (cache) {
Marc Kupietzb49afa02020-06-04 15:50:29 +0200264 R.cache::saveCache(result, key = list(url, kco@accessToken, kco@indexRevision), dir = KorAPCacheSubDir(), compress = TRUE)
Marc Kupietzb2b32a32020-03-24 13:56:50 +0100265 }
Marc Kupietzb49afa02020-06-04 15:50:29 +0200266 if (getHeaders) {
267 list(httr::headers(resp), result)
268 } else {
269 result
270 }
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200271})
272
Marc Kupietz0a96b282019-10-01 11:05:31 +0200273setGeneric("clearCache", function(kco) standardGeneric("clearCache") )
274
275#' @aliases clearCache
276#' @rdname KorAPConnection-class
277#' @export
278setMethod("clearCache", "KorAPConnection", function(kco) {
279 R.cache::clearCache(dir=KorAPCacheSubDir())
280})
281
Marc Kupietze95108e2019-09-18 13:23:58 +0200282#' @rdname KorAPConnection-class
283#' @param object KorAPConnection object
284#' @export
285setMethod("show", "KorAPConnection", function(object) {
286 cat("<KorAPConnection>", "\n")
287 cat("apiUrl: ", object@apiUrl, "\n")
288})
289
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200290##' Funtion KorAPConnection()
291##'
292##' Wrappper function for new("KorAPConnection")
293##'
294##' @rdname KorAPConnection-constructor
295##' @name KorAPConnection-constructor
296##' @export
297## XKorAPConnection <- function(...) new("KorAPConnection", ...)