| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 1 | #' Class KorAPQuery | 
 | 2 | #' | 
| Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 3 | #' This class provides methods to perform different kinds of queries on the KorAP API server. | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 4 | #' `KorAPQuery` objects, which are typically created by the [corpusQuery()] method, | 
| Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 5 | #' represent the current state of a query to a KorAP server. | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 6 | #' | 
 | 7 | #' @include KorAPConnection.R | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 8 | #' @import httr | 
 | 9 | #' | 
| Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 10 | #' @include RKorAPClient-package.R | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 11 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 12 | #' @export | 
 | 13 | KorAPQuery <- setClass("KorAPQuery", slots = c( | 
| Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 14 |   "korapConnection", | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 15 |   "request", | 
 | 16 |   "vc", | 
 | 17 |   "totalResults", | 
 | 18 |   "nextStartIndex", | 
 | 19 |   "fields", | 
 | 20 |   "requestUrl", | 
 | 21 |   "webUIRequestUrl", | 
 | 22 |   "apiResponse", | 
 | 23 |   "collectedMatches", | 
 | 24 |   "hasMoreMatches" | 
 | 25 | )) | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 26 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 27 | #' Method initialize | 
 | 28 | #' | 
 | 29 | #' @rdname KorAPQuery-class | 
 | 30 | #' @param .Object … | 
| Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 31 | #' @param korapConnection KorAPConnection object | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 32 | #' @param request query part of the request URL | 
 | 33 | #' @param vc definition of a virtual corpus | 
 | 34 | #' @param totalResults number of hits the query has yielded | 
 | 35 | #' @param nextStartIndex at what index to start the next fetch of query results | 
 | 36 | #' @param fields what data / metadata fields should be collected | 
 | 37 | #' @param requestUrl complete URL of the API request | 
 | 38 | #' @param webUIRequestUrl URL of a web frontend request corresponding to the API request | 
 | 39 | #' @param apiResponse data-frame representation of the JSON response of the API request | 
| Marc Kupietz | 7776dec | 2019-09-27 16:59:02 +0200 | [diff] [blame] | 40 | #' @param hasMoreMatches logical that signals if more query results can be fetched | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 41 | #' @param collectedMatches matches already fetched from the KorAP-API-server | 
| Marc Kupietz | 97a1bca | 2019-10-04 22:52:09 +0200 | [diff] [blame] | 42 | #' | 
 | 43 | #' @importFrom tibble tibble | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 44 | #' @export | 
 | 45 | setMethod("initialize", "KorAPQuery", | 
| Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 46 |           function(.Object, korapConnection = NULL, request = NULL, vc="", totalResults=0, nextStartIndex=0, fields=c("corpusSigle", "textSigle", "pubDate",  "pubPlace", | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 47 |                                                                               "availability", "textClass", "snippet", "tokens"), | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 48 |                    requestUrl="", webUIRequestUrl = "", apiResponse = NULL, hasMoreMatches= FALSE, collectedMatches = NULL) { | 
 | 49 |             .Object <- callNextMethod() | 
| Marc Kupietz | b897218 | 2019-09-20 21:33:46 +0200 | [diff] [blame] | 50 |             .Object@korapConnection = korapConnection | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 51 |             .Object@request = request | 
 | 52 |             .Object@vc = vc | 
 | 53 |             .Object@totalResults = totalResults | 
 | 54 |             .Object@nextStartIndex = nextStartIndex | 
 | 55 |             .Object@fields = fields | 
 | 56 |             .Object@requestUrl = requestUrl | 
 | 57 |             .Object@webUIRequestUrl = webUIRequestUrl | 
 | 58 |             .Object@apiResponse = apiResponse | 
 | 59 |             .Object@hasMoreMatches = hasMoreMatches | 
 | 60 |             .Object@collectedMatches = collectedMatches | 
 | 61 |             .Object | 
 | 62 |           }) | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 63 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 64 | setGeneric("corpusQuery", function(kco, ...)  standardGeneric("corpusQuery") ) | 
 | 65 | setGeneric("fetchAll", function(kqo, ...)  standardGeneric("fetchAll") ) | 
 | 66 | setGeneric("fetchNext", function(kqo, ...)  standardGeneric("fetchNext") ) | 
 | 67 | setGeneric("fetchRest", function(kqo, ...)  standardGeneric("fetchRest") ) | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 68 | setGeneric("frequencyQuery", function(kco, ...)  standardGeneric("frequencyQuery") ) | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 69 |  | 
 | 70 | maxResultsPerPage <- 50 | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 71 |  | 
| Marc Kupietz | 4de53ec | 2019-10-04 09:12:00 +0200 | [diff] [blame] | 72 | ## quiets concerns of R CMD check re: the .'s that appear in pipelines | 
 | 73 | if(getRversion() >= "2.15.1")  utils::globalVariables(c(".")) | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 74 |  | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 75 | #' Corpus query | 
 | 76 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 77 | #' **`corpusQuery`** performs a corpus query via a connection to a KorAP-API-server | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 78 | #' | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 79 | #' @rdname KorAPQuery-class | 
 | 80 | #' @aliases corpusQuery | 
 | 81 | #' | 
 | 82 | #' @importFrom urltools url_encode | 
 | 83 | #' @importFrom purrr pmap | 
 | 84 | #' @importFrom dplyr bind_rows | 
 | 85 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 86 | #' @param kco [KorAPConnection()] object (obtained e.g. from `new("KorAPConnection")` | 
 | 87 | #' @param query string that contains the corpus query. The query language depends on the `ql` parameter. Either `query` must be provided or `KorAPUrl`. | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 88 | #' @param vc string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible. | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 89 | #' @param KorAPUrl instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in `KorAPConnection`) to provide all necessary information for the query. | 
| Marc Kupietz | 132f005 | 2023-04-16 14:23:05 +0200 | [diff] [blame] | 90 | #' @param metadataOnly logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE. | 
 | 91 | #'    If you want your corpus queries to return not only metadata, but also KWICS, you need to authorize | 
 | 92 | #'    your RKorAPClient application as explained in the | 
 | 93 | #'   [authorization section](https://github.com/KorAP/RKorAPClient#authorization) | 
 | 94 | #'   of the RKorAPClient Readme on GitHub and set the `metadataOnly` parameter to | 
 | 95 | #'   `FALSE`. | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 96 | #' @param ql string to choose the query language (see [section on Query Parameters](https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters) in the Kustvakt-Wiki for possible values. | 
| Akron | 5e13546 | 2019-09-27 16:31:38 +0200 | [diff] [blame] | 97 | #' @param fields (meta)data fields that will be fetched for every match. | 
| Marc Kupietz | 43a6ade | 2020-02-18 17:01:44 +0100 | [diff] [blame] | 98 | #' @param accessRewriteFatal abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented). | 
| Marc Kupietz | 25aebc3 | 2019-09-16 18:40:50 +0200 | [diff] [blame] | 99 | #' @param verbose print some info | 
| Marc Kupietz | 4de53ec | 2019-10-04 09:12:00 +0200 | [diff] [blame] | 100 | #' @param as.df return result as data frame instead of as S4 object? | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 101 | #' @param expand logical that decides if `query` and `vc` parameters are expanded to all of their combinations | 
| Marc Kupietz | d9b2fd7 | 2023-04-17 19:08:50 +0200 | [diff] [blame] | 102 | #' @param context string that specifies the size of the left and the right context returned in `snippet` | 
 | 103 | #'        (provided that `metadataOnly` is set to `false` and that the necessary access right are  met). | 
 | 104 | #'        The format of the context size specifcation (e.g. `3-token,3-token`) is described in the [Service: Search GET documentation of the Kustvakt Wiki](https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET). | 
 | 105 | #'        If the parameter is not set, the default context size secification of the KorAP server instance will be used. | 
 | 106 | #'        Note that you cannot overrule the maximum context size set in the KorAP server instance, | 
 | 107 | #'        as this is typically legally motivated. | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 108 | #' @return Depending on the `as.df` parameter, a table or a [KorAPQuery()] object that, among other information, contains the total number of results in `@totalResults`. The resulting object can be used to fetch all query results (with [fetchAll()]) or the next page of results (with [fetchNext()]). | 
 | 109 | #' A corresponding URL to be used within a web browser is contained in `@webUIRequestUrl` | 
 | 110 | #' Please make sure to check `$collection$rewrites` to see if any unforeseen access rewrites of the query's virtual corpus had to be performed. | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 111 | #' | 
 | 112 | #' @examples | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 113 | #' \dontrun{ | 
 | 114 | #' | 
| Marc Kupietz | 603491f | 2019-09-18 14:01:02 +0200 | [diff] [blame] | 115 | #' # Fetch metadata of every query hit for "Ameisenplage" and show a summary | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 116 | #' new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchAll() | 
| Marc Kupietz | 657d8e7 | 2020-02-25 18:31:50 +0100 | [diff] [blame] | 117 | #' } | 
| Marc Kupietz | 3c531f6 | 2019-09-13 12:17:24 +0200 | [diff] [blame] | 118 | #' | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 119 | #' \dontrun{ | 
 | 120 | #' | 
| Marc Kupietz | 603491f | 2019-09-18 14:01:02 +0200 | [diff] [blame] | 121 | #' # Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus | 
 | 122 | #' # and show the number of query hits (but don't fetch them). | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 123 | #' | 
 | 124 | #' new("KorAPConnection", verbose = TRUE) %>% | 
 | 125 | #'  corpusQuery(KorAPUrl = | 
 | 126 | #'    "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp") | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 127 | #' } | 
 | 128 | #' | 
 | 129 | #' \dontrun{ | 
| Marc Kupietz | 3c531f6 | 2019-09-13 12:17:24 +0200 | [diff] [blame] | 130 | #' | 
| Marc Kupietz | 603491f | 2019-09-18 14:01:02 +0200 | [diff] [blame] | 131 | #' # Plot the time/frequency curve of "Ameisenplage" | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 132 | #' new("KorAPConnection", verbose=TRUE) %>% | 
 | 133 | #'   { . ->> kco } %>% | 
 | 134 | #'   corpusQuery("Ameisenplage") %>% | 
 | 135 | #'   fetchAll() %>% | 
 | 136 | #'   slot("collectedMatches") %>% | 
 | 137 | #'   mutate(year = lubridate::year(pubDate)) %>% | 
| Marc Kupietz | 19e2ebd | 2019-10-07 11:45:30 +0200 | [diff] [blame] | 138 | #'   dplyr::select(year) %>% | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 139 | #'   group_by(year) %>% | 
| Marc Kupietz | cb3c59e | 2020-06-02 10:10:43 +0200 | [diff] [blame] | 140 | #'   summarise(Count = dplyr::n()) %>% | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 141 | #'   mutate(Freq = mapply(function(f, y) | 
 | 142 | #'     f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) %>% | 
| Marc Kupietz | 19e2ebd | 2019-10-07 11:45:30 +0200 | [diff] [blame] | 143 | #'   dplyr::select(-Count) %>% | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 144 | #'   complete(year = min(year):max(year), fill = list(Freq = 0)) %>% | 
 | 145 | #'   plot(type = "l") | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 146 | #' } | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 147 | #' @seealso [KorAPConnection()], [fetchNext()], [fetchRest()], [fetchAll()], [corpusStats()] | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 148 | #' | 
 | 149 | #' @references | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 150 | #' <https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026> | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 151 | #' | 
 | 152 | #' @export | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 153 | setMethod("corpusQuery", "KorAPConnection", | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 154 |           function(kco, | 
 | 155 |                    query = if (missing(KorAPUrl)) | 
 | 156 |                      stop("At least one of the parameters query and KorAPUrl must be specified.", call. = FALSE) | 
 | 157 |                    else | 
 | 158 |                      httr::parse_url(KorAPUrl)$query$q, | 
 | 159 |                    vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq, | 
 | 160 |                    KorAPUrl, | 
 | 161 |                    metadataOnly = TRUE, | 
 | 162 |                    ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql, | 
 | 163 |                    fields = c( | 
 | 164 |                      "corpusSigle", | 
 | 165 |                      "textSigle", | 
 | 166 |                      "pubDate", | 
 | 167 |                      "pubPlace", | 
 | 168 |                      "availability", | 
 | 169 |                      "textClass", | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 170 |                      "snippet", | 
 | 171 |                      "tokens" | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 172 |                    ), | 
 | 173 |                    accessRewriteFatal = TRUE, | 
 | 174 |                    verbose = kco@verbose, | 
 | 175 |                    expand = length(vc) != length(query), | 
| Marc Kupietz | d9b2fd7 | 2023-04-17 19:08:50 +0200 | [diff] [blame] | 176 |           as.df = FALSE, | 
 | 177 |           context = NULL) { | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 178 |   if (length(query) > 1 || length(vc) > 1) { | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 179 |     grid <- if (expand) expand_grid(query=query, vc=vc) else tibble(query=query, vc=vc) | 
 | 180 |     purrr::pmap(grid, function(query, vc, ...) | 
 | 181 |       corpusQuery(kco, query=query, vc=vc, ql=ql, verbose=verbose, as.df = TRUE)) %>% | 
 | 182 |       bind_rows() | 
 | 183 |   } else { | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 184 |       contentFields <- c("snippet", "tokens") | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 185 |       if (metadataOnly) { | 
 | 186 |         fields <- fields[!fields %in% contentFields] | 
 | 187 |       } | 
 | 188 |       request <- | 
 | 189 |         paste0('?q=', | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 190 |                url_encode(enc2utf8(query)), | 
| Marc Kupietz | d9b2fd7 | 2023-04-17 19:08:50 +0200 | [diff] [blame] | 191 |                ifelse (!metadataOnly && ! is.null(context) && context !=  '', paste0('&context=', url_encode(enc2utf8(context))), ''), | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 192 |                ifelse (vc != '', paste0('&cq=', url_encode(enc2utf8(vc))), ''), | 
 | 193 |                ifelse (!metadataOnly, '&show-tokens=true', ''), | 
 | 194 |                '&ql=', ql) | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 195 |       webUIRequestUrl <- paste0(kco@KorAPUrl, request) | 
 | 196 |       requestUrl <- paste0( | 
 | 197 |         kco@apiUrl, | 
 | 198 |         'search', | 
 | 199 |         request, | 
 | 200 |         '&fields=', | 
 | 201 |         paste(fields, collapse = ","), | 
 | 202 |         if (metadataOnly) '&access-rewrite-disabled=true' else '' | 
 | 203 |       ) | 
| Marc Kupietz | a47d150 | 2023-04-18 15:26:47 +0200 | [diff] [blame] | 204 |       log_info(verbose, "Searching \"", query, "\" in \"", vc, "\"", sep = | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 205 |                  "") | 
 | 206 |       res = apiCall(kco, paste0(requestUrl, '&count=0')) | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 207 |       if (is.null(res)) { | 
| Marc Kupietz | a47d150 | 2023-04-18 15:26:47 +0200 | [diff] [blame] | 208 |         log_info(verbose, " [failed]\n") | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 209 |         message("API call failed.") | 
 | 210 |         totalResults <- 0 | 
 | 211 |       } else { | 
| Marc Kupietz | 41d4e35 | 2024-03-11 21:48:55 +0100 | [diff] [blame] | 212 |         totalResults <-as.integer(res$meta$totalResults) | 
| Marc Kupietz | a47d150 | 2023-04-18 15:26:47 +0200 | [diff] [blame] | 213 |         log_info(verbose, ": ", totalResults, " hits") | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 214 |         if(!is.null(res$meta$cached)) | 
| Marc Kupietz | a47d150 | 2023-04-18 15:26:47 +0200 | [diff] [blame] | 215 |           log_info(verbose, " [cached]\n") | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 216 |         else | 
| Marc Kupietz | a47d150 | 2023-04-18 15:26:47 +0200 | [diff] [blame] | 217 |           log_info(verbose, ", took ", res$meta$benchmark, "\n", sep = "") | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 218 |       } | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 219 |       if (as.df) | 
 | 220 |         data.frame( | 
 | 221 |           query = query, | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 222 |           totalResults = totalResults, | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 223 |           vc = vc, | 
 | 224 |           webUIRequestUrl = webUIRequestUrl, | 
 | 225 |           stringsAsFactors = FALSE | 
 | 226 |         ) | 
 | 227 |       else | 
 | 228 |         KorAPQuery( | 
 | 229 |           korapConnection = kco, | 
 | 230 |           nextStartIndex = 0, | 
 | 231 |           fields = fields, | 
 | 232 |           requestUrl = requestUrl, | 
 | 233 |           request = request, | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 234 |           totalResults = totalResults, | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 235 |           vc = vc, | 
 | 236 |           apiResponse = res, | 
 | 237 |           webUIRequestUrl = webUIRequestUrl, | 
| Marc Kupietz | a467572 | 2022-02-23 23:55:15 +0100 | [diff] [blame] | 238 |           hasMoreMatches = (totalResults > 0), | 
| Marc Kupietz | a96537f | 2019-11-09 23:07:44 +0100 | [diff] [blame] | 239 |         ) | 
 | 240 |     } | 
| Marc Kupietz | 4de53ec | 2019-10-04 09:12:00 +0200 | [diff] [blame] | 241 |   }) | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 242 |  | 
| Marc Kupietz | 05a6079 | 2024-12-07 16:23:31 +0100 | [diff] [blame] | 243 | #' @importFrom purrr map | 
 | 244 | repair_data_strcuture <- function(x) { | 
 | 245 |   if (is.list(x)) | 
 | 246 |     as.character (purrr::map(x, ~ if (length(.x) > 1) { | 
 | 247 |       paste(.x, collapse = " ") | 
 | 248 |     } else { | 
 | 249 |       .x | 
 | 250 |     })) | 
 | 251 |   else | 
 | 252 |     ifelse(is.na(x), "", x) | 
 | 253 | } | 
 | 254 |  | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 255 | #' Fetch the next bunch of results of a KorAP query. | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 256 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 257 | #' **`fetchNext`** fetches the next bunch of results of a KorAP query. | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 258 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 259 | #' @param kqo object obtained from [corpusQuery()] | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 260 | #' @param offset start offset for query results to fetch | 
 | 261 | #' @param maxFetch maximum number of query results to fetch | 
| Marc Kupietz | 25aebc3 | 2019-09-16 18:40:50 +0200 | [diff] [blame] | 262 | #' @param verbose print progress information if true | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 263 | #' @param randomizePageOrder fetch result pages in pseudo random order if true. Use [set.seed()] to set seed for reproducible results. | 
 | 264 | #' @return The `kqo` input object with updated slots `collectedMatches`, `apiResponse`, `nextStartIndex`, `hasMoreMatches` | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 265 | #' | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 266 | #' @examples | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 267 | #' \dontrun{ | 
 | 268 | #' | 
 | 269 | #' q <- new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchNext() | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 270 | #' q@collectedMatches | 
| Marc Kupietz | 657d8e7 | 2020-02-25 18:31:50 +0100 | [diff] [blame] | 271 | #' } | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 272 | #' | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 273 | #' @references | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 274 | #' <https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026> | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 275 | #' | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 276 | #' @aliases fetchNext | 
 | 277 | #' @rdname KorAPQuery-class | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 278 | #' @importFrom dplyr rowwise mutate bind_rows select summarise n select | 
| Marc Kupietz | f488112 | 2024-12-17 14:55:39 +0100 | [diff] [blame^] | 279 | #' @importFrom tibble enframe add_column | 
 | 280 | #' @importFrom stringr word | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 281 | #' @importFrom tidyr unnest unchop pivot_wider | 
 | 282 | #' @importFrom purrr map | 
| Marc Kupietz | 632cbd4 | 2019-09-06 16:04:51 +0200 | [diff] [blame] | 283 | #' @export | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 284 | setMethod("fetchNext", "KorAPQuery", function(kqo, | 
 | 285 |                                               offset = kqo@nextStartIndex, | 
 | 286 |                                               maxFetch = maxResultsPerPage, | 
 | 287 |                                               verbose = kqo@korapConnection@verbose, | 
 | 288 |                                               randomizePageOrder = FALSE) { | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 289 |   if (kqo@totalResults == 0 || offset >= kqo@totalResults) { | 
 | 290 |     return(kqo) | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 291 |   } | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 292 |   use_korap_api <- Sys.getenv("USE_KORAP_API", unset = NA) | 
| Marc Kupietz | 705488d | 2021-06-30 18:26:36 +0200 | [diff] [blame] | 293 |   page <- kqo@nextStartIndex / maxResultsPerPage + 1 | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 294 |   results <- 0 | 
| Marc Kupietz | 25aebc3 | 2019-09-16 18:40:50 +0200 | [diff] [blame] | 295 |   pubDate <- NULL # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 296 |   collectedMatches <- kqo@collectedMatches | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 297 |  | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 298 |   if (randomizePageOrder) { | 
 | 299 |     pages <- head(sample.int(ceiling(kqo@totalResults / maxResultsPerPage)), maxFetch) - 1 | 
 | 300 |   } | 
 | 301 |  | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 302 |   if(is.null(collectedMatches)) { | 
 | 303 |     collectedMatches <- data.frame() | 
 | 304 |   } | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 305 |   repeat { | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 306 |     page = nrow(collectedMatches) %/% maxResultsPerPage + 1 | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 307 |     currentOffset = ifelse(randomizePageOrder, pages[page],  page - 1) * maxResultsPerPage | 
 | 308 |     query <- paste0(kqo@requestUrl, '&count=', min(if (!is.na(maxFetch)) maxFetch - results else maxResultsPerPage, maxResultsPerPage) ,'&offset=', currentOffset, '&cutoff=true') | 
| Marc Kupietz | 6817095 | 2021-06-30 09:37:21 +0200 | [diff] [blame] | 309 |     res <- apiCall(kqo@korapConnection, query) | 
 | 310 |     if (length(res$matches) == 0) { | 
 | 311 |       break | 
 | 312 |     } | 
 | 313 |  | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 314 |     if ("fields" %in% colnames(res$matches) && (is.na(use_korap_api) || as.numeric(use_korap_api) >= 1.0)) { | 
 | 315 |       if (verbose) cat("Using fields API: ") | 
| Marc Kupietz | 05a6079 | 2024-12-07 16:23:31 +0100 | [diff] [blame] | 316 |       currentMatches <- res$matches$fields %>% | 
 | 317 |         purrr::map(~ mutate(.x, value = repair_data_strcuture(value))) %>% | 
 | 318 |         tibble::enframe() %>% | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 319 |         tidyr::unnest(cols = value) %>% | 
 | 320 |         tidyr::pivot_wider(names_from = key, id_cols = name, names_repair = "unique") %>% | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 321 |         dplyr::select(-name) | 
 | 322 |       if("snippet" %in% colnames(res$matches)) { | 
 | 323 |         currentMatches$snippet <- res$matches$snippet | 
 | 324 |       } | 
 | 325 |     } else { | 
 | 326 |       currentMatches <- res$matches | 
 | 327 |     } | 
 | 328 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 329 |     for (field in kqo@fields) { | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 330 |       if (!field %in% colnames(currentMatches)) { | 
 | 331 |         currentMatches[, field] <- NA | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 332 |       } | 
 | 333 |     } | 
| Marc Kupietz | f488112 | 2024-12-17 14:55:39 +0100 | [diff] [blame^] | 334 |     currentMatches <- currentMatches %>% | 
 | 335 |       select(kqo@fields) %>% | 
 | 336 |       mutate( | 
 | 337 |         tmp_positions = gsub(".*-p(\\d+)-(\\d+)", "\\1 \\2", res$matches$matchID), | 
 | 338 |         matchStart = as.integer(stringr::word(tmp_positions, 1)), | 
 | 339 |         matchEnd = as.integer(stringr::word(tmp_positions, 2)) - 1 | 
 | 340 |       ) %>% | 
 | 341 |       select(-tmp_positions) | 
 | 342 |  | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 343 |     if (!is.list(collectedMatches)) { | 
 | 344 |       collectedMatches <- currentMatches | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 345 |     } else { | 
| Marc Kupietz | 2078bde | 2023-08-27 16:46:15 +0200 | [diff] [blame] | 346 |       collectedMatches <- bind_rows(collectedMatches, currentMatches) | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 347 |     } | 
| Marc Kupietz | c2c59bd | 2019-08-30 16:50:49 +0200 | [diff] [blame] | 348 |     if (verbose) { | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 349 |       cat(paste0( | 
 | 350 |         "Retrieved page ", | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 351 |         ceiling(nrow(collectedMatches) / res$meta$itemsPerPage), | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 352 |         "/", | 
 | 353 |         if (!is.na(maxFetch) && maxFetch < kqo@totalResults) | 
 | 354 |           sprintf("%d (%d)", ceiling(maxFetch / res$meta$itemsPerPage), ceiling(kqo@totalResults / res$meta$itemsPerPage)) | 
 | 355 |         else | 
 | 356 |           sprintf("%d", ceiling(kqo@totalResults / res$meta$itemsPerPage)), | 
 | 357 |         ' in ', | 
 | 358 |         res$meta$benchmark, | 
 | 359 |         '\n' | 
 | 360 |       )) | 
| Marc Kupietz | c2c59bd | 2019-08-30 16:50:49 +0200 | [diff] [blame] | 361 |     } | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 362 |     page <- page + 1 | 
 | 363 |     results <- results + res$meta$itemsPerPage | 
| Marc Kupietz | e8bd49b | 2024-06-28 07:24:44 +0200 | [diff] [blame] | 364 |     if (nrow(collectedMatches) >= kqo@totalResults || (!is.na(maxFetch) && results >= maxFetch)) { | 
| Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 365 |       break | 
 | 366 |     } | 
 | 367 |   } | 
| Marc Kupietz | 6817095 | 2021-06-30 09:37:21 +0200 | [diff] [blame] | 368 |   nextStartIndex <- min(res$meta$startIndex + res$meta$itemsPerPage, kqo@totalResults) | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 369 |   KorAPQuery(nextStartIndex = nextStartIndex, | 
| Marc Kupietz | d0d3e9b | 2019-09-24 17:36:03 +0200 | [diff] [blame] | 370 |     korapConnection = kqo@korapConnection, | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 371 |     fields = kqo@fields, | 
 | 372 |     requestUrl = kqo@requestUrl, | 
 | 373 |     request = kqo@request, | 
| Marc Kupietz | 6817095 | 2021-06-30 09:37:21 +0200 | [diff] [blame] | 374 |     totalResults = kqo@totalResults, | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 375 |     vc = kqo@vc, | 
 | 376 |     webUIRequestUrl = kqo@webUIRequestUrl, | 
| Marc Kupietz | 6817095 | 2021-06-30 09:37:21 +0200 | [diff] [blame] | 377 |     hasMoreMatches = (kqo@totalResults > nextStartIndex), | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 378 |     apiResponse = res, | 
 | 379 |     collectedMatches = collectedMatches) | 
 | 380 | }) | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 381 |  | 
 | 382 | #' Fetch all results of a KorAP query. | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 383 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 384 | #' **`fetchAll`** fetches all results of a KorAP query. | 
| Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 385 | #' | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 386 | #' @examples | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 387 | #' \dontrun{ | 
 | 388 | #' | 
| Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 389 | #' q <- new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchAll() | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 390 | #' q@collectedMatches | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 391 | #' } | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 392 | #' | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 393 | #' @aliases fetchAll | 
 | 394 | #' @rdname KorAPQuery-class | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 395 | #' @export | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 396 | setMethod("fetchAll", "KorAPQuery", function(kqo, verbose = kqo@korapConnection@verbose, ...) { | 
 | 397 |   return(fetchNext(kqo, offset = 0, maxFetch = NA, verbose = verbose, ...)) | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 398 | }) | 
 | 399 |  | 
 | 400 | #' Fetches the remaining results of a KorAP query. | 
 | 401 | #' | 
 | 402 | #' @examples | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 403 | #' \dontrun{ | 
 | 404 | #' | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 405 | #' q <- new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchRest() | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 406 | #' q@collectedMatches | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 407 | #' } | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 408 | #' | 
 | 409 | #' @aliases fetchRest | 
 | 410 | #' @rdname KorAPQuery-class | 
 | 411 | #' @export | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 412 | setMethod("fetchRest", "KorAPQuery", function(kqo, verbose = kqo@korapConnection@verbose, ...) { | 
 | 413 |   return(fetchNext(kqo, maxFetch = NA, verbose = verbose, ...)) | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 414 | }) | 
 | 415 |  | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 416 | #' Query relative frequency of search term(s) | 
 | 417 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 418 | #' **`frequencyQuery`** combines [corpusQuery()], [corpusStats()] and | 
 | 419 | #' [ci()] to compute a table with the relative frequencies and | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 420 | #' confidence intervals of one ore multiple search terms across one or multiple | 
 | 421 | #' virtual corpora. | 
 | 422 | #' | 
 | 423 | #' @aliases frequencyQuery | 
 | 424 | #' @rdname KorAPQuery-class | 
 | 425 | #' @examples | 
| Marc Kupietz | 6ae7605 | 2021-09-21 10:34:00 +0200 | [diff] [blame] | 426 | #' \dontrun{ | 
 | 427 | #' | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 428 | #' new("KorAPConnection", verbose = TRUE) %>% | 
 | 429 | #'   frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003)) | 
| Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 430 | #' } | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 431 | #' | 
| Marc Kupietz | 67edcb5 | 2021-09-20 21:54:24 +0200 | [diff] [blame] | 432 | #' @param kco [KorAPConnection()] object (obtained e.g. from `new("KorAPConnection")` | 
 | 433 | #' @param query string that contains the corpus query. The query language depends on the `ql` parameter. Either `query` must be provided or `KorAPUrl`. | 
 | 434 | #' @param conf.level confidence level of the returned confidence interval (passed through [ci()]  to [prop.test()]). | 
 | 435 | #' @param as.alternatives LOGICAL that specifies if the query terms should be treated as alternatives. If `as.alternatives` is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies. | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 436 | #' @export | 
 | 437 | setMethod("frequencyQuery", "KorAPConnection", | 
| Marc Kupietz | 71d6e05 | 2019-11-22 18:42:10 +0100 | [diff] [blame] | 438 |   function(kco, query, vc = "", conf.level = 0.95, as.alternatives = FALSE, ...) { | 
 | 439 |       (if (as.alternatives) { | 
 | 440 |         corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>% | 
 | 441 |         group_by(vc) %>% | 
 | 442 |         mutate(total = sum(totalResults)) | 
 | 443 |       } else { | 
 | 444 |         corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>% | 
 | 445 |         mutate(total = corpusStats(kco, vc=vc, as.df=TRUE)$tokens) | 
 | 446 |       } ) %>% | 
| Marc Kupietz | 0c29cea | 2019-10-09 08:44:36 +0200 | [diff] [blame] | 447 |       ci(conf.level = conf.level) | 
| Marc Kupietz | 3f57528 | 2019-10-04 14:46:04 +0200 | [diff] [blame] | 448 | }) | 
 | 449 |  | 
| Marc Kupietz | 38a9d68 | 2024-12-06 16:17:09 +0100 | [diff] [blame] | 450 | #' buildWebUIRequestUrlFromString | 
 | 451 | #' | 
 | 452 | #' @rdname KorAPQuery-class | 
 | 453 | #' @importFrom urltools url_encode | 
 | 454 | #' @export | 
 | 455 | buildWebUIRequestUrlFromString <- function(KorAPUrl, | 
 | 456 |                                  query, | 
 | 457 |                                  vc = "", | 
 | 458 |                                  ql = "poliqarp" | 
 | 459 | ) { | 
 | 460 |   if ("KorAPConnection" %in% class(KorAPUrl)) { | 
 | 461 |     KorAPUrl <- KorAPUrl@KorAPUrl | 
 | 462 |   } | 
 | 463 |  | 
 | 464 |   request <- | 
 | 465 |     paste0( | 
 | 466 |       '?q=', | 
 | 467 |       urltools::url_encode(enc2utf8(as.character(query))), | 
 | 468 |       ifelse(vc != '', | 
 | 469 |              paste0('&cq=', urltools::url_encode(enc2utf8(vc))), | 
 | 470 |              ''), | 
 | 471 |       '&ql=', | 
 | 472 |       ql | 
 | 473 |     ) | 
 | 474 |   paste0(KorAPUrl, request) | 
 | 475 | } | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 476 |  | 
 | 477 | #' buildWebUIRequestUrl | 
 | 478 | #' | 
 | 479 | #' @rdname KorAPQuery-class | 
| Marc Kupietz | 38a9d68 | 2024-12-06 16:17:09 +0100 | [diff] [blame] | 480 | #' @importFrom httr parse_url | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 481 | #' @export | 
 | 482 | buildWebUIRequestUrl <- function(kco, | 
 | 483 |                                  query = if (missing(KorAPUrl)) | 
 | 484 |                                    stop("At least one of the parameters query and KorAPUrl must be specified.", call. = FALSE) | 
 | 485 |                                  else | 
 | 486 |                                    httr::parse_url(KorAPUrl)$query$q, | 
 | 487 |                                  vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq, | 
 | 488 |                                  KorAPUrl, | 
| Marc Kupietz | 38a9d68 | 2024-12-06 16:17:09 +0100 | [diff] [blame] | 489 |                                  ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql) { | 
 | 490 |  | 
 | 491 |   buildWebUIRequestUrlFromString(kco@KorAPUrl, query, vc, ql) | 
| Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 492 | } | 
 | 493 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 494 | #´ format() | 
 | 495 | #' @rdname KorAPQuery-class | 
 | 496 | #' @param x KorAPQuery object | 
 | 497 | #' @param ... further arguments passed to or from other methods | 
 | 498 | #' @export | 
 | 499 | format.KorAPQuery <- function(x, ...) { | 
 | 500 |   cat("<KorAPQuery>\n") | 
 | 501 |   q <- x | 
 | 502 |   aurl = parse_url(q@request) | 
| Marc Kupietz | 0d4c909 | 2020-03-23 09:02:30 +0100 | [diff] [blame] | 503 |   cat("           Query: ", aurl$query$q, "\n") | 
 | 504 |   if (!is.null(aurl$query$cq) && aurl$query$cq != "") { | 
 | 505 |     cat("  Virtual corpus: ", aurl$query$cq, "\n") | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 506 |   } | 
 | 507 |   if (!is.null(q@collectedMatches)) { | 
 | 508 |     cat("==============================================================================================================", "\n") | 
 | 509 |     print(summary(q@collectedMatches)) | 
 | 510 |     cat("==============================================================================================================", "\n") | 
 | 511 |   } | 
 | 512 |   cat("   Total results: ", q@totalResults, "\n") | 
 | 513 |   cat(" Fetched results: ", q@nextStartIndex, "\n") | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 514 | } | 
 | 515 |  | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 516 | #' show() | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 517 | #' | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 518 | #' @rdname KorAPQuery-class | 
 | 519 | #' @param object KorAPQuery object | 
| Marc Kupietz | 62da2b5 | 2019-09-12 17:43:34 +0200 | [diff] [blame] | 520 | #' @export | 
| Marc Kupietz | e95108e | 2019-09-18 13:23:58 +0200 | [diff] [blame] | 521 | setMethod("show", "KorAPQuery", function(object) { | 
 | 522 |   format(object) | 
 | 523 | }) | 
| Marc Kupietz | 006b47c | 2021-01-13 17:00:59 +0100 | [diff] [blame] | 524 |  |