Marc Kupietz | fd9e749 | 2019-11-08 15:45:18 +0100 | [diff] [blame] | 1 | ################################################################################ |
| 2 | # Use setClassUnion to define the unholy NULL-data union as a virtual class. |
| 3 | ################################################################################ |
| 4 | setClassUnion("characterOrNULL", c("character", "NULL")) |
| 5 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 6 | #' Class KorAPConnection |
Marc Kupietz | 25aebc3 | 2019-09-16 18:40:50 +0200 | [diff] [blame] | 7 | #' |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame^] | 8 | #' `KorAPConnection` objects represent the connection to a KorAP server. |
| 9 | #' New `KorAPConnection` objects can be created by `new("KorAPConnection")`. |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 10 | #' |
Marc Kupietz | 0a96b28 | 2019-10-01 11:05:31 +0200 | [diff] [blame] | 11 | #' @import R.cache |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 12 | #' @import utils |
| 13 | #' @import methods |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 14 | #' @export |
Marc Kupietz | b49afa0 | 2020-06-04 15:50:29 +0200 | [diff] [blame] | 15 | KorAPConnection <- setClass("KorAPConnection", slots=c(KorAPUrl="character", apiVersion="character", indexRevision="characterOrNULL", apiUrl="character", accessToken="characterOrNULL", userAgent="character", timeout="numeric", verbose="logical", cache="logical")) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 16 | |
| 17 | #' @param .Object KorAPConnection object |
| 18 | #' @param KorAPUrl the URL of the KorAP server instance you want to access. |
| 19 | #' @param apiVersion which version of KorAP's API you want to connect to. |
| 20 | #' @param apiUrl URL of the KorAP web service. |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 21 | #' @param accessToken OAuth2 access token. To use authorization based on an access token |
Marc Kupietz | 43a6ade | 2020-02-18 17:01:44 +0100 | [diff] [blame] | 22 | #' in subsequent queries, initialize your KorAP connection with |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame^] | 23 | #' `kco <- new("KorAPConnection", accessToken="<access token>")`. |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 24 | #' In order to make the API |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame^] | 25 | #' token persistent for the currently used `KorAPUrl` (you can have one |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 26 | #' token per KorAPUrl / KorAP server instance), use |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame^] | 27 | #' `persistAccessToken(kco)`. This will store it in your keyring using the |
| 28 | #' [keyring()] package. Subsequent new("KorAPConnection") calls will |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 29 | #' then automatically retrieve the token from your keying. To stop using a |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame^] | 30 | #' persisted token, call `clearAccessToken(kco)`. Please note that for |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 31 | #' DeReKo, authorized queries will behave differently inside and outside the |
| 32 | #' IDS, because of the special license situation. This concerns also cached |
| 33 | #' results which do not take into account from where a request was issued. If |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame^] | 34 | #' you experience problems or unexpected results, please try `kco <- |
| 35 | #' new("KorAPConnection", cache=FALSE)` or use |
| 36 | #' [clearCache()] to clear the cache completely. |
Marc Kupietz | d0d3e9b | 2019-09-24 17:36:03 +0200 | [diff] [blame] | 37 | #' @param userAgent user agent string. |
| 38 | #' @param timeout time out in seconds. |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 39 | #' @param verbose logical. Decides whether following operations will default to |
| 40 | #' be verbose. |
| 41 | #' @param cache logical. Decides if API calls are cached locally. You can clear |
Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame^] | 42 | #' the cache with [clearCache()]. |
| 43 | #' @return [KorAPConnection()] object that can be used e.g. with |
| 44 | #' [corpusQuery()] |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 45 | #' |
| 46 | #' @examples |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 47 | #' \donttest{ |
Marc Kupietz | 5a51982 | 2019-09-20 21:43:52 +0200 | [diff] [blame] | 48 | #' kcon <- new("KorAPConnection", verbose = TRUE) |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 49 | #' kq <- corpusQuery(kcon, "Ameisenplage") |
Marc Kupietz | 5a51982 | 2019-09-20 21:43:52 +0200 | [diff] [blame] | 50 | #' kq <- fetchAll(kq) |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 51 | #' } |
Marc Kupietz | 7915dc4 | 2019-09-12 17:44:58 +0200 | [diff] [blame] | 52 | #' |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 53 | #' \dontrun{ |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 54 | #' kcon <- new("KorAPConnection", verbose = TRUE, accessToken="e739u6eOzkwADQPdVChxFg") |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 55 | #' kq <- corpusQuery(kcon, "Ameisenplage", metadataOnly=FALSE) |
| 56 | #' kq <- fetchAll(kq) |
| 57 | #' kq@collectedMatches$snippet |
| 58 | #' } |
Marc Kupietz | 7915dc4 | 2019-09-12 17:44:58 +0200 | [diff] [blame] | 59 | #' |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 60 | #' @rdname KorAPConnection-class |
Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 61 | #' @export |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 62 | setMethod("initialize", "KorAPConnection", |
Marc Kupietz | 8a82af7 | 2019-12-12 12:58:22 +0100 | [diff] [blame] | 63 | function(.Object, KorAPUrl = "https://korap.ids-mannheim.de/", apiVersion = 'v1.0', apiUrl, accessToken = getAccessToken(KorAPUrl), userAgent = "R-KorAP-Client", timeout=110, verbose = FALSE, cache = TRUE) { |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 64 | .Object <- callNextMethod() |
| 65 | m <- regexpr("https?://[^?]+", KorAPUrl, perl = TRUE) |
| 66 | .Object@KorAPUrl <- regmatches(KorAPUrl, m) |
| 67 | if (!endsWith(.Object@KorAPUrl, '/')) { |
| 68 | .Object@KorAPUrl <- paste0(.Object@KorAPUrl, "/") |
| 69 | } |
| 70 | if (missing(apiUrl)) { |
| 71 | .Object@apiUrl = paste0(.Object@KorAPUrl, 'api/', apiVersion, '/') |
| 72 | } else { |
| 73 | .Object@apiUrl = apiUrl |
| 74 | } |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 75 | .Object@accessToken = accessToken |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 76 | .Object@apiVersion = apiVersion |
Marc Kupietz | d0d3e9b | 2019-09-24 17:36:03 +0200 | [diff] [blame] | 77 | .Object@userAgent = userAgent |
| 78 | .Object@timeout = timeout |
Marc Kupietz | 5a51982 | 2019-09-20 21:43:52 +0200 | [diff] [blame] | 79 | .Object@verbose = verbose |
Marc Kupietz | 0a96b28 | 2019-10-01 11:05:31 +0200 | [diff] [blame] | 80 | .Object@cache = cache |
Marc Kupietz | b49afa0 | 2020-06-04 15:50:29 +0200 | [diff] [blame] | 81 | welcome <- apiCall(.Object, .Object@apiUrl, json = FALSE, cache = FALSE, getHeaders = TRUE) |
| 82 | message(welcome[[2]]) |
| 83 | .Object@indexRevision <- welcome[[1]][["x-index-revision"]] |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 84 | .Object |
| 85 | }) |
| 86 | |
Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 87 | |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 88 | accessTokenServiceName <- "RKorAPClientAccessToken" |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 89 | |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 90 | setGeneric("persistAccessToken", function(kco, ...) standardGeneric("persistAccessToken") ) |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 91 | |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 92 | #' @aliases persistAccessToken |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 93 | #' @rdname KorAPConnection-class |
| 94 | #' @import keyring |
| 95 | #' @export |
| 96 | #' @examples |
| 97 | #' \dontrun{ |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 98 | #' kco <- new("KorAPConnection", accessToken="e739u6eOzkwADQPdVChxFg") |
| 99 | #' persistAccessToken(kco) |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 100 | #' } |
| 101 | #' |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 102 | setMethod("persistAccessToken", "KorAPConnection", function(kco, accessToken = kco@accessToken) { |
| 103 | if (is.null(accessToken)) |
| 104 | stop("It seems that you have not supplied any access token that could be persisted.", call. = FALSE) |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 105 | |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 106 | kco@accessToken <- accessToken |
| 107 | key_set_with_value(accessTokenServiceName, kco@KorAPUrl, accessToken) |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 108 | }) |
| 109 | |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 110 | setGeneric("clearAccessToken", function(kco) standardGeneric("clearAccessToken") ) |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 111 | |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 112 | #' @aliases clearAccessToken |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 113 | #' @rdname KorAPConnection-class |
| 114 | #' @import keyring |
| 115 | #' @export |
| 116 | #' @examples |
| 117 | #' \dontrun{ |
| 118 | #' kco <- new("KorAPConnection") |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 119 | #' clearAccessToken(kco) |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 120 | #' } |
| 121 | #' |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 122 | setMethod("clearAccessToken", "KorAPConnection", function(kco) { |
| 123 | key_delete(accessTokenServiceName, kco@KorAPUrl) |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 124 | }) |
| 125 | |
| 126 | #' @import keyring |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 127 | getAccessToken <- function(KorAPUrl) { |
Marc Kupietz | 59e449b | 2019-12-12 12:53:54 +0100 | [diff] [blame] | 128 | keyList <- tryCatch(withCallingHandlers(key_list(service = accessTokenServiceName), |
Marc Kupietz | ddce556 | 2019-11-24 15:45:38 +0100 | [diff] [blame] | 129 | warning = function(w) invokeRestart("muffleWarning"), |
Marc Kupietz | 59e449b | 2019-12-12 12:53:54 +0100 | [diff] [blame] | 130 | error = function(e) return(NULL)), |
| 131 | error = function(e) { }) |
Marc Kupietz | 01c2477 | 2021-07-14 18:27:36 +0200 | [diff] [blame] | 132 | if (KorAPUrl %in% keyList$username) |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 133 | key_get(accessTokenServiceName, KorAPUrl) |
Marc Kupietz | fd9e749 | 2019-11-08 15:45:18 +0100 | [diff] [blame] | 134 | else |
| 135 | NULL |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 136 | } |
Marc Kupietz | 0a96b28 | 2019-10-01 11:05:31 +0200 | [diff] [blame] | 137 | |
Marc Kupietz | 581a29b | 2021-09-04 20:51:04 +0200 | [diff] [blame] | 138 | |
| 139 | warnIfNoAccessToken <- function(kco) { |
| 140 | if (is.null(kco@accessToken)) { |
| 141 | warning( |
| 142 | paste0( |
| 143 | "In order to receive KWICSs also from corpora with restricted licenses, you need an access token.\n", |
| 144 | "To generate an access token, login to KorAP and navigite to KorAP's OAuth settings <", |
| 145 | kco@KorAPUrl, |
| 146 | "settings/oauth#page-top>" |
| 147 | ) |
| 148 | ) |
| 149 | } |
| 150 | } |
| 151 | |
Marc Kupietz | 0a96b28 | 2019-10-01 11:05:31 +0200 | [diff] [blame] | 152 | KorAPCacheSubDir <- function() { |
Marc Kupietz | 70b2c72 | 2020-02-18 13:32:09 +0100 | [diff] [blame] | 153 | paste0("RKorAPClient_", |
| 154 | gsub( |
| 155 | "^([0-9]+\\.[0-9]+).*", |
| 156 | "\\1", |
| 157 | packageVersion("RKorAPClient"), |
| 158 | perl = TRUE |
| 159 | )) |
Marc Kupietz | 0a96b28 | 2019-10-01 11:05:31 +0200 | [diff] [blame] | 160 | } |
| 161 | |
Marc Kupietz | d0d3e9b | 2019-09-24 17:36:03 +0200 | [diff] [blame] | 162 | setGeneric("apiCall", function(kco, ...) standardGeneric("apiCall") ) |
| 163 | |
Marc Kupietz | 4de53ec | 2019-10-04 09:12:00 +0200 | [diff] [blame] | 164 | ## quiets concerns of R CMD check re: the .'s that appear in pipelines |
| 165 | if(getRversion() >= "2.15.1") utils::globalVariables(c(".")) |
| 166 | |
Marc Kupietz | d0d3e9b | 2019-09-24 17:36:03 +0200 | [diff] [blame] | 167 | #' @aliases apiCall |
| 168 | #' @rdname KorAPConnection-class |
| 169 | #' @param kco KorAPConnection object |
| 170 | #' @param url request url |
Marc Kupietz | b2b32a3 | 2020-03-24 13:56:50 +0100 | [diff] [blame] | 171 | #' @param json logical that determines if json result is expected |
Marc Kupietz | b49afa0 | 2020-06-04 15:50:29 +0200 | [diff] [blame] | 172 | #' @param getHeaders logical that determines if headers and content should be returned (as a list) |
Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 173 | #' @importFrom jsonlite fromJSON |
| 174 | #' @export |
Marc Kupietz | b49afa0 | 2020-06-04 15:50:29 +0200 | [diff] [blame] | 175 | setMethod("apiCall", "KorAPConnection", function(kco, url, json = TRUE, getHeaders = FALSE, cache = kco@cache) { |
Marc Kupietz | b2b32a3 | 2020-03-24 13:56:50 +0100 | [diff] [blame] | 176 | result <- "" |
| 177 | if (cache) { |
Marc Kupietz | b49afa0 | 2020-06-04 15:50:29 +0200 | [diff] [blame] | 178 | result <- R.cache::loadCache(dir=KorAPCacheSubDir(), key=list(url, kco@accessToken, kco@indexRevision)) |
Marc Kupietz | b2b32a3 | 2020-03-24 13:56:50 +0100 | [diff] [blame] | 179 | if (!is.null(result)) { |
| 180 | if (!is.null(result$meta)) |
| 181 | result$meta$cached <- "local" |
| 182 | return(result) |
Marc Kupietz | 0a96b28 | 2019-10-01 11:05:31 +0200 | [diff] [blame] | 183 | } |
| 184 | } |
Marc Kupietz | b956b81 | 2019-11-25 17:53:13 +0100 | [diff] [blame] | 185 | if (!is.null(kco@accessToken)) |
| 186 | resp <- GET(url, user_agent(kco@userAgent), timeout(kco@timeout), add_headers(Authorization = paste("Bearer", kco@accessToken))) |
Marc Kupietz | 4862b86 | 2019-11-07 10:13:53 +0100 | [diff] [blame] | 187 | else |
| 188 | resp <- GET(url, user_agent(kco@userAgent), timeout(kco@timeout)) |
Marc Kupietz | b2b32a3 | 2020-03-24 13:56:50 +0100 | [diff] [blame] | 189 | if (json || status_code(resp) != 200) { |
| 190 | if (json && !http_type(resp) %in% c("application/json", "application/ld+json")) { |
| 191 | stop("API did not return json", call. = FALSE) |
| 192 | } |
| 193 | result <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8")) |
| 194 | if (!is.null(result$warnings)) { |
| 195 | message <- if (nrow(result$warnings) > 1) |
| 196 | sapply(result$warnings, function(warning) paste(sprintf("%s: %s", warning[1], warning[2]), sep="\n")) |
| 197 | else |
| 198 | sprintf("%s: %s", result$warnings[1], result$warnings[2]) |
| 199 | warning(message, call. = FALSE) |
| 200 | } |
Marc Kupietz | d0d3e9b | 2019-09-24 17:36:03 +0200 | [diff] [blame] | 201 | } |
| 202 | if (status_code(resp) != 200) { |
Marc Kupietz | b7d8c27 | 2020-01-31 18:51:50 +0100 | [diff] [blame] | 203 | if (kco@verbose) { |
| 204 | cat("\n") |
| 205 | } |
| 206 | message <- sprintf("%s KorAP API request failed", status_code(resp)) |
Marc Kupietz | b2b32a3 | 2020-03-24 13:56:50 +0100 | [diff] [blame] | 207 | if (!is.null(result$errors)) { |
| 208 | message <- sprintf("%s - %s %s", message, result$errors[1], result$errors[2]) |
Marc Kupietz | b7d8c27 | 2020-01-31 18:51:50 +0100 | [diff] [blame] | 209 | } |
Marc Kupietz | d0d3e9b | 2019-09-24 17:36:03 +0200 | [diff] [blame] | 210 | stop(message, call. = FALSE) |
| 211 | } |
Marc Kupietz | b2b32a3 | 2020-03-24 13:56:50 +0100 | [diff] [blame] | 212 | if (!json) { |
| 213 | result <- content(resp, "text", encoding = "UTF-8") |
Marc Kupietz | 0a96b28 | 2019-10-01 11:05:31 +0200 | [diff] [blame] | 214 | } |
Marc Kupietz | b2b32a3 | 2020-03-24 13:56:50 +0100 | [diff] [blame] | 215 | if (cache) { |
Marc Kupietz | b49afa0 | 2020-06-04 15:50:29 +0200 | [diff] [blame] | 216 | R.cache::saveCache(result, key = list(url, kco@accessToken, kco@indexRevision), dir = KorAPCacheSubDir(), compress = TRUE) |
Marc Kupietz | b2b32a3 | 2020-03-24 13:56:50 +0100 | [diff] [blame] | 217 | } |
Marc Kupietz | b49afa0 | 2020-06-04 15:50:29 +0200 | [diff] [blame] | 218 | if (getHeaders) { |
| 219 | list(httr::headers(resp), result) |
| 220 | } else { |
| 221 | result |
| 222 | } |
Marc Kupietz | d0d3e9b | 2019-09-24 17:36:03 +0200 | [diff] [blame] | 223 | }) |
| 224 | |
Marc Kupietz | 0a96b28 | 2019-10-01 11:05:31 +0200 | [diff] [blame] | 225 | setGeneric("clearCache", function(kco) standardGeneric("clearCache") ) |
| 226 | |
| 227 | #' @aliases clearCache |
| 228 | #' @rdname KorAPConnection-class |
| 229 | #' @export |
| 230 | setMethod("clearCache", "KorAPConnection", function(kco) { |
| 231 | R.cache::clearCache(dir=KorAPCacheSubDir()) |
| 232 | }) |
| 233 | |
Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 234 | #' @rdname KorAPConnection-class |
| 235 | #' @param object KorAPConnection object |
| 236 | #' @export |
| 237 | setMethod("show", "KorAPConnection", function(object) { |
| 238 | cat("<KorAPConnection>", "\n") |
| 239 | cat("apiUrl: ", object@apiUrl, "\n") |
| 240 | }) |
| 241 | |
Marc Kupietz | d0d3e9b | 2019-09-24 17:36:03 +0200 | [diff] [blame] | 242 | ##' Funtion KorAPConnection() |
| 243 | ##' |
| 244 | ##' Wrappper function for new("KorAPConnection") |
| 245 | ##' |
| 246 | ##' @rdname KorAPConnection-constructor |
| 247 | ##' @name KorAPConnection-constructor |
| 248 | ##' @export |
| 249 | ## XKorAPConnection <- function(...) new("KorAPConnection", ...) |