| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 1 | #' Class KorAPQuery | 
|  | 2 | #' | 
| Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 3 | #' This class provides methods to perform different kinds of queries on the KorAP API server. | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 4 | #' `KorAPQuery` objects, which are typically created by the [corpusQuery()] method, | 
| Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 5 | #' represent the current state of a query to a KorAP server. | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 6 | #' | 
|  | 7 | #' @include KorAPConnection.R | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 8 | #' @import httr | 
|  | 9 | #' | 
| Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 10 | #' @include RKorAPClient-package.R | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 11 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 12 | #' @export | 
|  | 13 | KorAPQuery <- setClass("KorAPQuery", slots = c( | 
| Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 14 | "korapConnection", | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 15 | "request", | 
|  | 16 | "vc", | 
|  | 17 | "totalResults", | 
|  | 18 | "nextStartIndex", | 
|  | 19 | "fields", | 
|  | 20 | "requestUrl", | 
|  | 21 | "webUIRequestUrl", | 
|  | 22 | "apiResponse", | 
|  | 23 | "collectedMatches", | 
|  | 24 | "hasMoreMatches" | 
|  | 25 | )) | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 26 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 27 | #' Method initialize | 
|  | 28 | #' | 
|  | 29 | #' @rdname KorAPQuery-class | 
|  | 30 | #' @param .Object … | 
| Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 31 | #' @param korapConnection KorAPConnection object | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 32 | #' @param request query part of the request URL | 
|  | 33 | #' @param vc definition of a virtual corpus | 
|  | 34 | #' @param totalResults number of hits the query has yielded | 
|  | 35 | #' @param nextStartIndex at what index to start the next fetch of query results | 
|  | 36 | #' @param fields what data / metadata fields should be collected | 
|  | 37 | #' @param requestUrl complete URL of the API request | 
|  | 38 | #' @param webUIRequestUrl URL of a web frontend request corresponding to the API request | 
|  | 39 | #' @param apiResponse data-frame representation of the JSON response of the API request | 
| Marc Kupietz | 7776dec | 2019-09-27 16:59:02 +0200 | [diff] [blame] | 40 | #' @param hasMoreMatches logical that signals if more query results can be fetched | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 41 | #' @param collectedMatches matches already fetched from the KorAP-API-server | 
| Marc Kupietz | 97a1bca | 2019-10-04 22:52:09 +0200 | [diff] [blame] | 42 | #' | 
|  | 43 | #' @importFrom tibble tibble | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 44 | #' @export | 
|  | 45 | setMethod("initialize", "KorAPQuery", | 
| Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 46 | function(.Object, korapConnection = NULL, request = NULL, vc="", totalResults=0, nextStartIndex=0, fields=c("corpusSigle", "textSigle", "pubDate",  "pubPlace", | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 47 | "availability", "textClass", "snippet", "tokens"), | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 48 | requestUrl="", webUIRequestUrl = "", apiResponse = NULL, hasMoreMatches= FALSE, collectedMatches = NULL) { | 
|  | 49 | .Object <- callNextMethod() | 
| Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 50 | .Object@korapConnection = korapConnection | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 51 | .Object@request = request | 
|  | 52 | .Object@vc = vc | 
|  | 53 | .Object@totalResults = totalResults | 
|  | 54 | .Object@nextStartIndex = nextStartIndex | 
|  | 55 | .Object@fields = fields | 
|  | 56 | .Object@requestUrl = requestUrl | 
|  | 57 | .Object@webUIRequestUrl = webUIRequestUrl | 
|  | 58 | .Object@apiResponse = apiResponse | 
|  | 59 | .Object@hasMoreMatches = hasMoreMatches | 
|  | 60 | .Object@collectedMatches = collectedMatches | 
|  | 61 | .Object | 
|  | 62 | }) | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 63 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 64 | setGeneric("corpusQuery", function(kco, ...)  standardGeneric("corpusQuery") ) | 
|  | 65 | setGeneric("fetchAll", function(kqo, ...)  standardGeneric("fetchAll") ) | 
|  | 66 | setGeneric("fetchNext", function(kqo, ...)  standardGeneric("fetchNext") ) | 
|  | 67 | setGeneric("fetchRest", function(kqo, ...)  standardGeneric("fetchRest") ) | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 68 | setGeneric("frequencyQuery", function(kco, ...)  standardGeneric("frequencyQuery") ) | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 69 |  | 
|  | 70 | maxResultsPerPage <- 50 | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 71 |  | 
| Marc Kupietz | 4de53ec | 2019-10-04 09:12:00 +0200 | [diff] [blame] | 72 | ## quiets concerns of R CMD check re: the .'s that appear in pipelines | 
|  | 73 | if(getRversion() >= "2.15.1")  utils::globalVariables(c(".")) | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 74 |  | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 75 | #' Corpus query | 
|  | 76 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 77 | #' **`corpusQuery`** performs a corpus query via a connection to a KorAP-API-server | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 78 | #' | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 79 | #' @rdname KorAPQuery-class | 
|  | 80 | #' @aliases corpusQuery | 
|  | 81 | #' | 
|  | 82 | #' @importFrom urltools url_encode | 
|  | 83 | #' @importFrom purrr pmap | 
|  | 84 | #' @importFrom dplyr bind_rows | 
|  | 85 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 86 | #' @param kco [KorAPConnection()] object (obtained e.g. from `new("KorAPConnection")` | 
|  | 87 | #' @param query string that contains the corpus query. The query language depends on the `ql` parameter. Either `query` must be provided or `KorAPUrl`. | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 88 | #' @param vc string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible. | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 89 | #' @param KorAPUrl instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in `KorAPConnection`) to provide all necessary information for the query. | 
| Marc Kupietz | 132f005 | 2023-04-16 14:23:05 +0200 | [diff] [blame] | 90 | #' @param metadataOnly logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE. | 
|  | 91 | #'    If you want your corpus queries to return not only metadata, but also KWICS, you need to authorize | 
|  | 92 | #'    your RKorAPClient application as explained in the | 
|  | 93 | #'   [authorization section](https://github.com/KorAP/RKorAPClient#authorization) | 
|  | 94 | #'   of the RKorAPClient Readme on GitHub and set the `metadataOnly` parameter to | 
|  | 95 | #'   `FALSE`. | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 96 | #' @param ql string to choose the query language (see [section on Query Parameters](https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters) in the Kustvakt-Wiki for possible values. | 
| Akron | 5e13546 | 2019-09-27 16:31:38 +0200 | [diff] [blame] | 97 | #' @param fields (meta)data fields that will be fetched for every match. | 
| Marc Kupietz | 43a6ade | 2020-02-18 17:01:44 +0100 | [diff] [blame] | 98 | #' @param accessRewriteFatal abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented). | 
| Marc Kupietz | 25aebc3 | 2019-09-16 18:40:50 +0200 | [diff] [blame] | 99 | #' @param verbose print some info | 
| Marc Kupietz | 4de53ec | 2019-10-04 09:12:00 +0200 | [diff] [blame] | 100 | #' @param as.df return result as data frame instead of as S4 object? | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 101 | #' @param expand logical that decides if `query` and `vc` parameters are expanded to all of their combinations | 
| Marc Kupietz | d9b2fd7 | 2023-04-17 19:08:50 +0200 | [diff] [blame] | 102 | #' @param context string that specifies the size of the left and the right context returned in `snippet` | 
|  | 103 | #'        (provided that `metadataOnly` is set to `false` and that the necessary access right are  met). | 
|  | 104 | #'        The format of the context size specifcation (e.g. `3-token,3-token`) is described in the [Service: Search GET documentation of the Kustvakt Wiki](https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET). | 
|  | 105 | #'        If the parameter is not set, the default context size secification of the KorAP server instance will be used. | 
|  | 106 | #'        Note that you cannot overrule the maximum context size set in the KorAP server instance, | 
|  | 107 | #'        as this is typically legally motivated. | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 108 | #' @return Depending on the `as.df` parameter, a table or a [KorAPQuery()] object that, among other information, contains the total number of results in `@totalResults`. The resulting object can be used to fetch all query results (with [fetchAll()]) or the next page of results (with [fetchNext()]). | 
|  | 109 | #' A corresponding URL to be used within a web browser is contained in `@webUIRequestUrl` | 
|  | 110 | #' Please make sure to check `$collection$rewrites` to see if any unforeseen access rewrites of the query's virtual corpus had to be performed. | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 111 | #' | 
|  | 112 | #' @examples | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 113 | #' \dontrun{ | 
|  | 114 | #' | 
| Marc Kupietz | 603491f | 2019-09-18 14:01:02 +0200 | [diff] [blame] | 115 | #' # Fetch metadata of every query hit for "Ameisenplage" and show a summary | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 116 | #' new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchAll() | 
| Marc Kupietz | 657d8e7 | 2020-02-25 18:31:50 +0100 | [diff] [blame] | 117 | #' } | 
| Marc Kupietz | 3c531f6 | 2019-09-13 12:17:24 +0200 | [diff] [blame] | 118 | #' | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 119 | #' \dontrun{ | 
|  | 120 | #' | 
| Marc Kupietz | 603491f | 2019-09-18 14:01:02 +0200 | [diff] [blame] | 121 | #' # Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus | 
|  | 122 | #' # and show the number of query hits (but don't fetch them). | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 123 | #' | 
|  | 124 | #' new("KorAPConnection", verbose = TRUE) %>% | 
|  | 125 | #'  corpusQuery(KorAPUrl = | 
|  | 126 | #'    "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp") | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 127 | #' } | 
|  | 128 | #' | 
|  | 129 | #' \dontrun{ | 
| Marc Kupietz | 3c531f6 | 2019-09-13 12:17:24 +0200 | [diff] [blame] | 130 | #' | 
| Marc Kupietz | 603491f | 2019-09-18 14:01:02 +0200 | [diff] [blame] | 131 | #' # Plot the time/frequency curve of "Ameisenplage" | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 132 | #' new("KorAPConnection", verbose=TRUE) %>% | 
|  | 133 | #'   { . ->> kco } %>% | 
|  | 134 | #'   corpusQuery("Ameisenplage") %>% | 
|  | 135 | #'   fetchAll() %>% | 
|  | 136 | #'   slot("collectedMatches") %>% | 
|  | 137 | #'   mutate(year = lubridate::year(pubDate)) %>% | 
| Marc Kupietz | 19e2ebd | 2019-10-07 11:45:30 +0200 | [diff] [blame] | 138 | #'   dplyr::select(year) %>% | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 139 | #'   group_by(year) %>% | 
| Marc Kupietz | cb3c59e | 2020-06-02 10:10:43 +0200 | [diff] [blame] | 140 | #'   summarise(Count = dplyr::n()) %>% | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 141 | #'   mutate(Freq = mapply(function(f, y) | 
|  | 142 | #'     f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) %>% | 
| Marc Kupietz | 19e2ebd | 2019-10-07 11:45:30 +0200 | [diff] [blame] | 143 | #'   dplyr::select(-Count) %>% | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 144 | #'   complete(year = min(year):max(year), fill = list(Freq = 0)) %>% | 
|  | 145 | #'   plot(type = "l") | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 146 | #' } | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 147 | #' @seealso [KorAPConnection()], [fetchNext()], [fetchRest()], [fetchAll()], [corpusStats()] | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 148 | #' | 
|  | 149 | #' @references | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 150 | #' <https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026> | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 151 | #' | 
|  | 152 | #' @export | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 153 | setMethod("corpusQuery", "KorAPConnection", | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 154 | function(kco, | 
|  | 155 | query = if (missing(KorAPUrl)) | 
|  | 156 | stop("At least one of the parameters query and KorAPUrl must be specified.", call. = FALSE) | 
|  | 157 | else | 
|  | 158 | httr::parse_url(KorAPUrl)$query$q, | 
|  | 159 | vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq, | 
|  | 160 | KorAPUrl, | 
|  | 161 | metadataOnly = TRUE, | 
|  | 162 | ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql, | 
|  | 163 | fields = c( | 
|  | 164 | "corpusSigle", | 
|  | 165 | "textSigle", | 
|  | 166 | "pubDate", | 
|  | 167 | "pubPlace", | 
|  | 168 | "availability", | 
|  | 169 | "textClass", | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 170 | "snippet", | 
|  | 171 | "tokens" | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 172 | ), | 
|  | 173 | accessRewriteFatal = TRUE, | 
|  | 174 | verbose = kco@verbose, | 
|  | 175 | expand = length(vc) != length(query), | 
| Marc Kupietz | d9b2fd7 | 2023-04-17 19:08:50 +0200 | [diff] [blame] | 176 | as.df = FALSE, | 
|  | 177 | context = NULL) { | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 178 | if (length(query) > 1 || length(vc) > 1) { | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 179 | grid <- if (expand) expand_grid(query=query, vc=vc) else tibble(query=query, vc=vc) | 
|  | 180 | purrr::pmap(grid, function(query, vc, ...) | 
|  | 181 | corpusQuery(kco, query=query, vc=vc, ql=ql, verbose=verbose, as.df = TRUE)) %>% | 
|  | 182 | bind_rows() | 
|  | 183 | } else { | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 184 | contentFields <- c("snippet", "tokens") | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 185 | if (metadataOnly) { | 
|  | 186 | fields <- fields[!fields %in% contentFields] | 
|  | 187 | } | 
|  | 188 | request <- | 
|  | 189 | paste0('?q=', | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 190 | url_encode(enc2utf8(query)), | 
| Marc Kupietz | d9b2fd7 | 2023-04-17 19:08:50 +0200 | [diff] [blame] | 191 | ifelse (!metadataOnly && ! is.null(context) && context !=  '', paste0('&context=', url_encode(enc2utf8(context))), ''), | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 192 | ifelse (vc != '', paste0('&cq=', url_encode(enc2utf8(vc))), ''), | 
|  | 193 | ifelse (!metadataOnly, '&show-tokens=true', ''), | 
|  | 194 | '&ql=', ql) | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 195 | webUIRequestUrl <- paste0(kco@KorAPUrl, request) | 
|  | 196 | requestUrl <- paste0( | 
|  | 197 | kco@apiUrl, | 
|  | 198 | 'search', | 
|  | 199 | request, | 
|  | 200 | '&fields=', | 
|  | 201 | paste(fields, collapse = ","), | 
|  | 202 | if (metadataOnly) '&access-rewrite-disabled=true' else '' | 
|  | 203 | ) | 
| Marc Kupietz | a47d150 | 2023-04-18 15:26:47 +0200 | [diff] [blame] | 204 | log_info(verbose, "Searching \"", query, "\" in \"", vc, "\"", sep = | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 205 | "") | 
|  | 206 | res = apiCall(kco, paste0(requestUrl, '&count=0')) | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 207 | if (is.null(res)) { | 
| Marc Kupietz | a47d150 | 2023-04-18 15:26:47 +0200 | [diff] [blame] | 208 | log_info(verbose, " [failed]\n") | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 209 | message("API call failed.") | 
|  | 210 | totalResults <- 0 | 
|  | 211 | } else { | 
| Marc Kupietz | 41d4e35 | 2024-03-11 21:48:55 +0100 | [diff] [blame] | 212 | totalResults <-as.integer(res$meta$totalResults) | 
| Marc Kupietz | a47d150 | 2023-04-18 15:26:47 +0200 | [diff] [blame] | 213 | log_info(verbose, ": ", totalResults, " hits") | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 214 | if(!is.null(res$meta$cached)) | 
| Marc Kupietz | a47d150 | 2023-04-18 15:26:47 +0200 | [diff] [blame] | 215 | log_info(verbose, " [cached]\n") | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 216 | else | 
| Marc Kupietz | a47d150 | 2023-04-18 15:26:47 +0200 | [diff] [blame] | 217 | log_info(verbose, ", took ", res$meta$benchmark, "\n", sep = "") | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 218 | } | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 219 | if (as.df) | 
|  | 220 | data.frame( | 
|  | 221 | query = query, | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 222 | totalResults = totalResults, | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 223 | vc = vc, | 
|  | 224 | webUIRequestUrl = webUIRequestUrl, | 
|  | 225 | stringsAsFactors = FALSE | 
|  | 226 | ) | 
|  | 227 | else | 
|  | 228 | KorAPQuery( | 
|  | 229 | korapConnection = kco, | 
|  | 230 | nextStartIndex = 0, | 
|  | 231 | fields = fields, | 
|  | 232 | requestUrl = requestUrl, | 
|  | 233 | request = request, | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 234 | totalResults = totalResults, | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 235 | vc = vc, | 
|  | 236 | apiResponse = res, | 
|  | 237 | webUIRequestUrl = webUIRequestUrl, | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 238 | hasMoreMatches = (totalResults > 0), | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 239 | ) | 
|  | 240 | } | 
| Marc Kupietz | 4de53ec | 2019-10-04 09:12:00 +0200 | [diff] [blame] | 241 | }) | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 242 |  | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 243 | #' Fetch the next bunch of results of a KorAP query. | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 244 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 245 | #' **`fetchNext`** fetches the next bunch of results of a KorAP query. | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 246 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 247 | #' @param kqo object obtained from [corpusQuery()] | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 248 | #' @param offset start offset for query results to fetch | 
|  | 249 | #' @param maxFetch maximum number of query results to fetch | 
| Marc Kupietz | 25aebc3 | 2019-09-16 18:40:50 +0200 | [diff] [blame] | 250 | #' @param verbose print progress information if true | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 251 | #' @param randomizePageOrder fetch result pages in pseudo random order if true. Use [set.seed()] to set seed for reproducible results. | 
|  | 252 | #' @return The `kqo` input object with updated slots `collectedMatches`, `apiResponse`, `nextStartIndex`, `hasMoreMatches` | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 253 | #' | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 254 | #' @examples | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 255 | #' \dontrun{ | 
|  | 256 | #' | 
|  | 257 | #' q <- new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchNext() | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 258 | #' q@collectedMatches | 
| Marc Kupietz | 657d8e7 | 2020-02-25 18:31:50 +0100 | [diff] [blame] | 259 | #' } | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 260 | #' | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 261 | #' @references | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 262 | #' <https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026> | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 263 | #' | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 264 | #' @aliases fetchNext | 
|  | 265 | #' @rdname KorAPQuery-class | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame^] | 266 | #' @importFrom dplyr rowwise mutate bind_rows select summarise n select | 
|  | 267 | #' @importFrom tibble enframe | 
|  | 268 | #' @importFrom tidyr unnest unchop pivot_wider | 
|  | 269 | #' @importFrom purrr map | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 270 | #' @export | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 271 | setMethod("fetchNext", "KorAPQuery", function(kqo, | 
|  | 272 | offset = kqo@nextStartIndex, | 
|  | 273 | maxFetch = maxResultsPerPage, | 
|  | 274 | verbose = kqo@korapConnection@verbose, | 
|  | 275 | randomizePageOrder = FALSE) { | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 276 | if (kqo@totalResults == 0 || offset >= kqo@totalResults) { | 
|  | 277 | return(kqo) | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 278 | } | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame^] | 279 | use_korap_api <- Sys.getenv("USE_KORAP_API", unset = NA) | 
| Marc Kupietz | 705488d | 2021-06-30 18:26:36 +0200 | [diff] [blame] | 280 | page <- kqo@nextStartIndex / maxResultsPerPage + 1 | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 281 | results <- 0 | 
| Marc Kupietz | 25aebc3 | 2019-09-16 18:40:50 +0200 | [diff] [blame] | 282 | pubDate <- NULL # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 283 | collectedMatches <- kqo@collectedMatches | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 284 |  | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 285 | if (randomizePageOrder) { | 
|  | 286 | pages <- head(sample.int(ceiling(kqo@totalResults / maxResultsPerPage)), maxFetch) - 1 | 
|  | 287 | } | 
|  | 288 |  | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame^] | 289 | if(is.null(collectedMatches)) { | 
|  | 290 | collectedMatches <- data.frame() | 
|  | 291 | } | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 292 | repeat { | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame^] | 293 | page = nrow(collectedMatches) %/% maxResultsPerPage + 1 | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 294 | currentOffset = ifelse(randomizePageOrder, pages[page],  page - 1) * maxResultsPerPage | 
|  | 295 | query <- paste0(kqo@requestUrl, '&count=', min(if (!is.na(maxFetch)) maxFetch - results else maxResultsPerPage, maxResultsPerPage) ,'&offset=', currentOffset, '&cutoff=true') | 
| Marc Kupietz | 6817095 | 2021-06-30 09:37:21 +0200 | [diff] [blame] | 296 | res <- apiCall(kqo@korapConnection, query) | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame^] | 297 | rawRes <<- res | 
| Marc Kupietz | 6817095 | 2021-06-30 09:37:21 +0200 | [diff] [blame] | 298 | if (length(res$matches) == 0) { | 
|  | 299 | break | 
|  | 300 | } | 
|  | 301 |  | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame^] | 302 | if ("fields" %in% colnames(res$matches) && (is.na(use_korap_api) || as.numeric(use_korap_api) >= 1.0)) { | 
|  | 303 | if (verbose) cat("Using fields API: ") | 
|  | 304 | currentMatches <-  tibble::enframe(res$matches$fields) %>% | 
|  | 305 | tidyr::unnest(cols = value) %>% | 
|  | 306 | tidyr::pivot_wider(names_from = key, id_cols = name, names_repair = "unique") %>% | 
|  | 307 | dplyr::mutate(across(where(is.list), ~ purrr::map(.x, ~ if (length(.x) < 2) unlist(.x) else paste(.x, collapse = " ")))) %>% | 
|  | 308 | tidyr::unchop(where(is.list)) %>% | 
|  | 309 | dplyr::select(-name) | 
|  | 310 | if("snippet" %in% colnames(res$matches)) { | 
|  | 311 | currentMatches$snippet <- res$matches$snippet | 
|  | 312 | } | 
|  | 313 | } else { | 
|  | 314 | currentMatches <- res$matches | 
|  | 315 | } | 
|  | 316 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 317 | for (field in kqo@fields) { | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame^] | 318 | if (!field %in% colnames(currentMatches)) { | 
|  | 319 | currentMatches[, field] <- NA | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 320 | } | 
|  | 321 | } | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame^] | 322 | currentMatches <- currentMatches %>% select(kqo@fields) | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 323 | if (!is.list(collectedMatches)) { | 
|  | 324 | collectedMatches <- currentMatches | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 325 | } else { | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 326 | collectedMatches <- bind_rows(collectedMatches, currentMatches) | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 327 | } | 
| Marc Kupietz | c2c59bd | 2019-08-30 16:50:49 +0200 | [diff] [blame] | 328 | if (verbose) { | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 329 | cat(paste0( | 
|  | 330 | "Retrieved page ", | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame^] | 331 | ceiling(nrow(collectedMatches) / res$meta$itemsPerPage), | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 332 | "/", | 
|  | 333 | if (!is.na(maxFetch) && maxFetch < kqo@totalResults) | 
|  | 334 | sprintf("%d (%d)", ceiling(maxFetch / res$meta$itemsPerPage), ceiling(kqo@totalResults / res$meta$itemsPerPage)) | 
|  | 335 | else | 
|  | 336 | sprintf("%d", ceiling(kqo@totalResults / res$meta$itemsPerPage)), | 
|  | 337 | ' in ', | 
|  | 338 | res$meta$benchmark, | 
|  | 339 | '\n' | 
|  | 340 | )) | 
| Marc Kupietz | c2c59bd | 2019-08-30 16:50:49 +0200 | [diff] [blame] | 341 | } | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 342 | page <- page + 1 | 
|  | 343 | results <- results + res$meta$itemsPerPage | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame^] | 344 | if (nrow(collectedMatches) >= kqo@totalResults || (!is.na(maxFetch) && results >= maxFetch)) { | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 345 | break | 
|  | 346 | } | 
|  | 347 | } | 
| Marc Kupietz | 6817095 | 2021-06-30 09:37:21 +0200 | [diff] [blame] | 348 | nextStartIndex <- min(res$meta$startIndex + res$meta$itemsPerPage, kqo@totalResults) | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 349 | KorAPQuery(nextStartIndex = nextStartIndex, | 
| Marc Kupietz | d0d3e9b | 2019-09-24 17:36:03 +0200 | [diff] [blame] | 350 | korapConnection = kqo@korapConnection, | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 351 | fields = kqo@fields, | 
|  | 352 | requestUrl = kqo@requestUrl, | 
|  | 353 | request = kqo@request, | 
| Marc Kupietz | 6817095 | 2021-06-30 09:37:21 +0200 | [diff] [blame] | 354 | totalResults = kqo@totalResults, | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 355 | vc = kqo@vc, | 
|  | 356 | webUIRequestUrl = kqo@webUIRequestUrl, | 
| Marc Kupietz | 6817095 | 2021-06-30 09:37:21 +0200 | [diff] [blame] | 357 | hasMoreMatches = (kqo@totalResults > nextStartIndex), | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 358 | apiResponse = res, | 
|  | 359 | collectedMatches = collectedMatches) | 
|  | 360 | }) | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 361 |  | 
|  | 362 | #' Fetch all results of a KorAP query. | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 363 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 364 | #' **`fetchAll`** fetches all results of a KorAP query. | 
| Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 365 | #' | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 366 | #' @examples | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 367 | #' \dontrun{ | 
|  | 368 | #' | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 369 | #' q <- new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchAll() | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 370 | #' q@collectedMatches | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 371 | #' } | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 372 | #' | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 373 | #' @aliases fetchAll | 
|  | 374 | #' @rdname KorAPQuery-class | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 375 | #' @export | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 376 | setMethod("fetchAll", "KorAPQuery", function(kqo, verbose = kqo@korapConnection@verbose, ...) { | 
|  | 377 | return(fetchNext(kqo, offset = 0, maxFetch = NA, verbose = verbose, ...)) | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 378 | }) | 
|  | 379 |  | 
|  | 380 | #' Fetches the remaining results of a KorAP query. | 
|  | 381 | #' | 
|  | 382 | #' @examples | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 383 | #' \dontrun{ | 
|  | 384 | #' | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 385 | #' q <- new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchRest() | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 386 | #' q@collectedMatches | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 387 | #' } | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 388 | #' | 
|  | 389 | #' @aliases fetchRest | 
|  | 390 | #' @rdname KorAPQuery-class | 
|  | 391 | #' @export | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 392 | setMethod("fetchRest", "KorAPQuery", function(kqo, verbose = kqo@korapConnection@verbose, ...) { | 
|  | 393 | return(fetchNext(kqo, maxFetch = NA, verbose = verbose, ...)) | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 394 | }) | 
|  | 395 |  | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 396 | #' Query relative frequency of search term(s) | 
|  | 397 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 398 | #' **`frequencyQuery`** combines [corpusQuery()], [corpusStats()] and | 
|  | 399 | #' [ci()] to compute a table with the relative frequencies and | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 400 | #' confidence intervals of one ore multiple search terms across one or multiple | 
|  | 401 | #' virtual corpora. | 
|  | 402 | #' | 
|  | 403 | #' @aliases frequencyQuery | 
|  | 404 | #' @rdname KorAPQuery-class | 
|  | 405 | #' @examples | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 406 | #' \dontrun{ | 
|  | 407 | #' | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 408 | #' new("KorAPConnection", verbose = TRUE) %>% | 
|  | 409 | #'   frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003)) | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 410 | #' } | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 411 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 412 | #' @param kco [KorAPConnection()] object (obtained e.g. from `new("KorAPConnection")` | 
|  | 413 | #' @param query string that contains the corpus query. The query language depends on the `ql` parameter. Either `query` must be provided or `KorAPUrl`. | 
|  | 414 | #' @param conf.level confidence level of the returned confidence interval (passed through [ci()]  to [prop.test()]). | 
|  | 415 | #' @param as.alternatives LOGICAL that specifies if the query terms should be treated as alternatives. If `as.alternatives` is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies. | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 416 | #' @export | 
|  | 417 | setMethod("frequencyQuery", "KorAPConnection", | 
| Marc Kupietz | 71d6e05 | 2019-11-22 18:42:10 +0100 | [diff] [blame] | 418 | function(kco, query, vc = "", conf.level = 0.95, as.alternatives = FALSE, ...) { | 
|  | 419 | (if (as.alternatives) { | 
|  | 420 | corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>% | 
|  | 421 | group_by(vc) %>% | 
|  | 422 | mutate(total = sum(totalResults)) | 
|  | 423 | } else { | 
|  | 424 | corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>% | 
|  | 425 | mutate(total = corpusStats(kco, vc=vc, as.df=TRUE)$tokens) | 
|  | 426 | } ) %>% | 
| Marc Kupietz | 0c29cea | 2019-10-09 08:44:36 +0200 | [diff] [blame] | 427 | ci(conf.level = conf.level) | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 428 | }) | 
|  | 429 |  | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 430 |  | 
|  | 431 | #' buildWebUIRequestUrl | 
|  | 432 | #' | 
|  | 433 | #' @rdname KorAPQuery-class | 
|  | 434 | #' @importFrom urltools url_encode | 
|  | 435 | #' @export | 
|  | 436 | buildWebUIRequestUrl <- function(kco, | 
|  | 437 | query = if (missing(KorAPUrl)) | 
|  | 438 | stop("At least one of the parameters query and KorAPUrl must be specified.", call. = FALSE) | 
|  | 439 | else | 
|  | 440 | httr::parse_url(KorAPUrl)$query$q, | 
|  | 441 | vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq, | 
|  | 442 | KorAPUrl, | 
|  | 443 | metadataOnly = TRUE, | 
|  | 444 | ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql, | 
|  | 445 | fields = c( | 
|  | 446 | "corpusSigle", | 
|  | 447 | "textSigle", | 
|  | 448 | "pubDate", | 
|  | 449 | "pubPlace", | 
|  | 450 | "availability", | 
|  | 451 | "textClass", | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 452 | "snippet", | 
|  | 453 | "tokens" | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 454 | ), | 
|  | 455 | accessRewriteFatal = TRUE) { | 
|  | 456 | request <- | 
|  | 457 | paste0( | 
|  | 458 | '?q=', | 
|  | 459 | urltools::url_encode(enc2utf8(as.character(query))), | 
|  | 460 | ifelse(vc != '', | 
|  | 461 | paste0('&cq=', urltools::url_encode(enc2utf8(vc))), | 
|  | 462 | ''), | 
|  | 463 | '&ql=', | 
|  | 464 | ql | 
|  | 465 | ) | 
|  | 466 | webUIRequestUrl <- paste0(kco@KorAPUrl, request) | 
|  | 467 | requestUrl <- paste0( | 
|  | 468 | kco@apiUrl, | 
|  | 469 | 'search', | 
|  | 470 | request, | 
|  | 471 | '&fields=', | 
|  | 472 | paste(fields, collapse = ","), | 
|  | 473 | if (metadataOnly) | 
|  | 474 | '&access-rewrite-disabled=true' | 
|  | 475 | else | 
|  | 476 | '' | 
|  | 477 | ) | 
|  | 478 | webUIRequestUrl | 
|  | 479 | } | 
|  | 480 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 481 | #´ format() | 
|  | 482 | #' @rdname KorAPQuery-class | 
|  | 483 | #' @param x KorAPQuery object | 
|  | 484 | #' @param ... further arguments passed to or from other methods | 
|  | 485 | #' @export | 
|  | 486 | format.KorAPQuery <- function(x, ...) { | 
|  | 487 | cat("<KorAPQuery>\n") | 
|  | 488 | q <- x | 
|  | 489 | aurl = parse_url(q@request) | 
| Marc Kupietz | 0d4c909 | 2020-03-23 09:02:30 +0100 | [diff] [blame] | 490 | cat("           Query: ", aurl$query$q, "\n") | 
|  | 491 | if (!is.null(aurl$query$cq) && aurl$query$cq != "") { | 
|  | 492 | cat("  Virtual corpus: ", aurl$query$cq, "\n") | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 493 | } | 
|  | 494 | if (!is.null(q@collectedMatches)) { | 
|  | 495 | cat("==============================================================================================================", "\n") | 
|  | 496 | print(summary(q@collectedMatches)) | 
|  | 497 | cat("==============================================================================================================", "\n") | 
|  | 498 | } | 
|  | 499 | cat("   Total results: ", q@totalResults, "\n") | 
|  | 500 | cat(" Fetched results: ", q@nextStartIndex, "\n") | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 501 | } | 
|  | 502 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 503 | #' show() | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 504 | #' | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 505 | #' @rdname KorAPQuery-class | 
|  | 506 | #' @param object KorAPQuery object | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 507 | #' @export | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 508 | setMethod("show", "KorAPQuery", function(object) { | 
|  | 509 | format(object) | 
|  | 510 | }) | 
| Marc Kupietz | 006b47c | 2021-01-13 17:00:59 +0100 | [diff] [blame] | 511 |  |