blob: b2926d762620b6ea02b1797243c306f5e7e41114 [file] [log] [blame]
Marc Kupietze95108e2019-09-18 13:23:58 +02001#' Class KorAPQuery
2#'
Marc Kupietz43a6ade2020-02-18 17:01:44 +01003#' \code{KorAPQuery} objects represent the current state of a query to a KorAP server.
Marc Kupietze95108e2019-09-18 13:23:58 +02004#' New \code{KorAPQuery} objects are typically created by the \code{\link{corpusQuery}} method.
5#'
6#' @include KorAPConnection.R
Marc Kupietze95108e2019-09-18 13:23:58 +02007#' @import httr
8#'
Marc Kupietz4de53ec2019-10-04 09:12:00 +02009#' @include RKorAPClient.R
Marc Kupietz5bbc9db2019-08-30 16:30:45 +020010
Marc Kupietze95108e2019-09-18 13:23:58 +020011#' @export
12KorAPQuery <- setClass("KorAPQuery", slots = c(
Marc Kupietzb8972182019-09-20 21:33:46 +020013 "korapConnection",
Marc Kupietze95108e2019-09-18 13:23:58 +020014 "request",
15 "vc",
16 "totalResults",
17 "nextStartIndex",
18 "fields",
19 "requestUrl",
20 "webUIRequestUrl",
21 "apiResponse",
22 "collectedMatches",
23 "hasMoreMatches"
24))
Marc Kupietz5bbc9db2019-08-30 16:30:45 +020025
Marc Kupietze95108e2019-09-18 13:23:58 +020026#' Method initialize
27#'
28#' @rdname KorAPQuery-class
29#' @param .Object …
Marc Kupietzb8972182019-09-20 21:33:46 +020030#' @param korapConnection KorAPConnection object
Marc Kupietze95108e2019-09-18 13:23:58 +020031#' @param request query part of the request URL
32#' @param vc definition of a virtual corpus
33#' @param totalResults number of hits the query has yielded
34#' @param nextStartIndex at what index to start the next fetch of query results
35#' @param fields what data / metadata fields should be collected
36#' @param requestUrl complete URL of the API request
37#' @param webUIRequestUrl URL of a web frontend request corresponding to the API request
38#' @param apiResponse data-frame representation of the JSON response of the API request
Marc Kupietz7776dec2019-09-27 16:59:02 +020039#' @param hasMoreMatches logical that signals if more query results can be fetched
Marc Kupietze95108e2019-09-18 13:23:58 +020040#' @param collectedMatches matches already fetched from the KorAP-API-server
Marc Kupietz97a1bca2019-10-04 22:52:09 +020041#'
42#' @importFrom tibble tibble
Marc Kupietze95108e2019-09-18 13:23:58 +020043#' @export
44setMethod("initialize", "KorAPQuery",
Marc Kupietzb8972182019-09-20 21:33:46 +020045 function(.Object, korapConnection = NULL, request = NULL, vc="", totalResults=0, nextStartIndex=0, fields=c("corpusSigle", "textSigle", "pubDate", "pubPlace",
Marc Kupietze95108e2019-09-18 13:23:58 +020046 "availability", "textClass", "snippet"),
47 requestUrl="", webUIRequestUrl = "", apiResponse = NULL, hasMoreMatches= FALSE, collectedMatches = NULL) {
48 .Object <- callNextMethod()
Marc Kupietzb8972182019-09-20 21:33:46 +020049 .Object@korapConnection = korapConnection
Marc Kupietze95108e2019-09-18 13:23:58 +020050 .Object@request = request
51 .Object@vc = vc
52 .Object@totalResults = totalResults
53 .Object@nextStartIndex = nextStartIndex
54 .Object@fields = fields
55 .Object@requestUrl = requestUrl
56 .Object@webUIRequestUrl = webUIRequestUrl
57 .Object@apiResponse = apiResponse
58 .Object@hasMoreMatches = hasMoreMatches
59 .Object@collectedMatches = collectedMatches
60 .Object
61 })
Marc Kupietz632cbd42019-09-06 16:04:51 +020062
Marc Kupietze95108e2019-09-18 13:23:58 +020063setGeneric("corpusQuery", function(kco, ...) standardGeneric("corpusQuery") )
64setGeneric("fetchAll", function(kqo, ...) standardGeneric("fetchAll") )
65setGeneric("fetchNext", function(kqo, ...) standardGeneric("fetchNext") )
66setGeneric("fetchRest", function(kqo, ...) standardGeneric("fetchRest") )
Marc Kupietz3f575282019-10-04 14:46:04 +020067setGeneric("frequencyQuery", function(kco, ...) standardGeneric("frequencyQuery") )
Marc Kupietze95108e2019-09-18 13:23:58 +020068
69maxResultsPerPage <- 50
Marc Kupietz62da2b52019-09-12 17:43:34 +020070
Marc Kupietz632cbd42019-09-06 16:04:51 +020071QueryParameterFromUrl <- function(url, parameter) {
72 regex <- paste0(".*[?&]", parameter, "=([^&]*).*")
73 if (grepl(regex, url)) {
74 return(gsub(regex, '\\1', url, perl = TRUE))
75 } else {
76 return("")
77 }
Marc Kupietz5bbc9db2019-08-30 16:30:45 +020078}
79
Marc Kupietz4de53ec2019-10-04 09:12:00 +020080## quiets concerns of R CMD check re: the .'s that appear in pipelines
81if(getRversion() >= "2.15.1") utils::globalVariables(c("."))
Marc Kupietz632cbd42019-09-06 16:04:51 +020082
Marc Kupietze95108e2019-09-18 13:23:58 +020083#' Method corpusQuery
84#'
85#' Perform a corpus query via a connection to a KorAP-API-server.
86#'
87#' @param kco \code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}
Akron5e135462019-09-27 16:31:38 +020088#' @param query string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.
Marc Kupietz632cbd42019-09-06 16:04:51 +020089#' @param vc string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.
90#' @param KorAPUrl instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.
Marc Kupietz7776dec2019-09-27 16:59:02 +020091#' @param metadataOnly logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE, unless the connection is authorized (currently not possible).
Marc Kupietz3c531f62019-09-13 12:17:24 +020092#' @param ql string to choose the query language (see \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters}{section on Query Parameters} in the Kustvakt-Wiki for possible values.
Akron5e135462019-09-27 16:31:38 +020093#' @param fields (meta)data fields that will be fetched for every match.
Marc Kupietz43a6ade2020-02-18 17:01:44 +010094#' @param accessRewriteFatal abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).
Marc Kupietz25aebc32019-09-16 18:40:50 +020095#' @param verbose print some info
Marc Kupietz4de53ec2019-10-04 09:12:00 +020096#' @param as.df return result as data frame instead of as S4 object?
Marc Kupietz43a6ade2020-02-18 17:01:44 +010097#' @param expand logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations
Marc Kupietz3f575282019-10-04 14:46:04 +020098#' @return Depending on the \code{as.df} parameter, a table or a \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
Marc Kupietze95108e2019-09-18 13:23:58 +020099#' A corresponding URL to be used within a web browser is contained in \code{@webUIRequestUrl}
Marc Kupietz43a6ade2020-02-18 17:01:44 +0100100#' Please make sure to check \code{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
Marc Kupietz632cbd42019-09-06 16:04:51 +0200101#'
102#' @examples
Marc Kupietz603491f2019-09-18 14:01:02 +0200103#' # Fetch metadata of every query hit for "Ameisenplage" and show a summary
Marc Kupietz657d8e72020-02-25 18:31:50 +0100104#' \donttest{
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200105#' new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchAll()
Marc Kupietz657d8e72020-02-25 18:31:50 +0100106#' }
Marc Kupietz3c531f62019-09-13 12:17:24 +0200107#'
Marc Kupietz603491f2019-09-18 14:01:02 +0200108#' # Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus
109#' # and show the number of query hits (but don't fetch them).
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200110#'
111#' new("KorAPConnection", verbose = TRUE) %>%
112#' corpusQuery(KorAPUrl =
113#' "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp")
Marc Kupietz3c531f62019-09-13 12:17:24 +0200114#'
Marc Kupietz603491f2019-09-18 14:01:02 +0200115#' # Plot the time/frequency curve of "Ameisenplage"
Marc Kupietz05b22772020-02-18 21:58:42 +0100116#' \donttest{
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200117#' new("KorAPConnection", verbose=TRUE) %>%
118#' { . ->> kco } %>%
119#' corpusQuery("Ameisenplage") %>%
120#' fetchAll() %>%
121#' slot("collectedMatches") %>%
122#' mutate(year = lubridate::year(pubDate)) %>%
Marc Kupietz19e2ebd2019-10-07 11:45:30 +0200123#' dplyr::select(year) %>%
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200124#' group_by(year) %>%
125#' summarise(Count = n()) %>%
126#' mutate(Freq = mapply(function(f, y)
127#' f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) %>%
Marc Kupietz19e2ebd2019-10-07 11:45:30 +0200128#' dplyr::select(-Count) %>%
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200129#' complete(year = min(year):max(year), fill = list(Freq = 0)) %>%
130#' plot(type = "l")
Marc Kupietz05b22772020-02-18 21:58:42 +0100131#' }
Marc Kupietze95108e2019-09-18 13:23:58 +0200132#' @seealso \code{\link{KorAPConnection}}, \code{\link{fetchNext}}, \code{\link{fetchRest}}, \code{\link{fetchAll}}, \code{\link{corpusStats}}
Marc Kupietz632cbd42019-09-06 16:04:51 +0200133#'
134#' @references
135#' \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
136#'
Marc Kupietze95108e2019-09-18 13:23:58 +0200137#' @aliases corpusQuery
Marc Kupietz632cbd42019-09-06 16:04:51 +0200138#' @export
Marc Kupietze95108e2019-09-18 13:23:58 +0200139setMethod("corpusQuery", "KorAPConnection",
Marc Kupietza96537f2019-11-09 23:07:44 +0100140 function(kco,
141 query = if (missing(KorAPUrl))
142 stop("At least one of the parameters query and KorAPUrl must be specified.", call. = FALSE)
143 else
144 httr::parse_url(KorAPUrl)$query$q,
145 vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq,
146 KorAPUrl,
147 metadataOnly = TRUE,
148 ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql,
149 fields = c(
150 "corpusSigle",
151 "textSigle",
152 "pubDate",
153 "pubPlace",
154 "availability",
155 "textClass",
156 "snippet"
157 ),
158 accessRewriteFatal = TRUE,
159 verbose = kco@verbose,
160 expand = length(vc) != length(query),
161 as.df = FALSE) {
162 if (length(query) > 1 || length(vc) > 1) {
163
164 grid <- {
165 if (expand)
166 expand_grid(query=query, vc=vc) else tibble(query=query, vc=vc) }
Marc Kupietz3f575282019-10-04 14:46:04 +0200167 return(
Marc Kupietz4de53ec2019-10-04 09:12:00 +0200168 do.call(rbind,
Marc Kupietz3f575282019-10-04 14:46:04 +0200169 Map(function(q, cq) corpusQuery(kco, query=q, vc=cq, ql=ql,
170 verbose=verbose, as.df = TRUE), grid$query, grid$vc)) %>%
171 remove_rownames()
Marc Kupietza96537f2019-11-09 23:07:44 +0100172 )
173 } else {
174 contentFields <- c("snippet")
175 if (metadataOnly) {
176 fields <- fields[!fields %in% contentFields]
177 }
178 request <-
179 paste0('?q=',
180 URLencode(query, reserved = TRUE),
181 if (vc != '') paste0('&cq=', URLencode(vc, reserved = TRUE)) else '', '&ql=', ql)
182 webUIRequestUrl <- paste0(kco@KorAPUrl, request)
183 requestUrl <- paste0(
184 kco@apiUrl,
185 'search',
186 request,
187 '&fields=',
188 paste(fields, collapse = ","),
189 if (metadataOnly) '&access-rewrite-disabled=true' else ''
190 )
191 log.info(verbose, "Searching \"", query, "\" in \"", vc, "\"", sep =
192 "")
193 res = apiCall(kco, paste0(requestUrl, '&count=0'))
Marc Kupietzf5769b62019-12-13 09:19:45 +0100194 if(!is.null(res$meta$cached))
195 log.info(verbose, " [cached]\n")
196 else
197 log.info(verbose, " took ", res$meta$benchmark, "\n", sep = "")
Marc Kupietza96537f2019-11-09 23:07:44 +0100198 if (as.df)
199 data.frame(
200 query = query,
201 totalResults = res$meta$totalResults,
202 vc = vc,
203 webUIRequestUrl = webUIRequestUrl,
204 stringsAsFactors = FALSE
205 )
206 else
207 KorAPQuery(
208 korapConnection = kco,
209 nextStartIndex = 0,
210 fields = fields,
211 requestUrl = requestUrl,
212 request = request,
213 totalResults = res$meta$totalResults,
214 vc = vc,
215 apiResponse = res,
216 webUIRequestUrl = webUIRequestUrl,
217 hasMoreMatches = (res$meta$totalResults > 0),
218 )
219 }
Marc Kupietz4de53ec2019-10-04 09:12:00 +0200220 })
Marc Kupietz5bbc9db2019-08-30 16:30:45 +0200221
Marc Kupietz62da2b52019-09-12 17:43:34 +0200222#' Fetch the next bunch of results of a KorAP query.
Marc Kupietze95108e2019-09-18 13:23:58 +0200223#'
Marc Kupietz3f575282019-10-04 14:46:04 +0200224#' \bold{\code{fetchNext}} fetches the next bunch of results of a KorAP query.
225#'
Marc Kupietze95108e2019-09-18 13:23:58 +0200226#' @param kqo object obtained from \code{\link{corpusQuery}}
Marc Kupietz62da2b52019-09-12 17:43:34 +0200227#' @param offset start offset for query results to fetch
228#' @param maxFetch maximum number of query results to fetch
Marc Kupietz25aebc32019-09-16 18:40:50 +0200229#' @param verbose print progress information if true
Marc Kupietze95108e2019-09-18 13:23:58 +0200230#' @return The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches}
Marc Kupietz62da2b52019-09-12 17:43:34 +0200231#'
Marc Kupietz05b22772020-02-18 21:58:42 +0100232#' @examples
Marc Kupietz657d8e72020-02-25 18:31:50 +0100233#' \donttest{q <- new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchNext()
Marc Kupietz05b22772020-02-18 21:58:42 +0100234#' q@collectedMatches
Marc Kupietz657d8e72020-02-25 18:31:50 +0100235#' }
Marc Kupietz05b22772020-02-18 21:58:42 +0100236#'
Marc Kupietz62da2b52019-09-12 17:43:34 +0200237#' @references
238#' \url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
239#'
Marc Kupietze95108e2019-09-18 13:23:58 +0200240#' @aliases fetchNext
241#' @rdname KorAPQuery-class
Marc Kupietz97a1bca2019-10-04 22:52:09 +0200242#' @importFrom purrr map_dfr
Marc Kupietz19e2ebd2019-10-07 11:45:30 +0200243#' @importFrom dplyr rowwise bind_rows select
Marc Kupietz632cbd42019-09-06 16:04:51 +0200244#' @export
Marc Kupietzf6f71312019-09-23 18:35:27 +0200245setMethod("fetchNext", "KorAPQuery", function(kqo, offset = kqo@nextStartIndex, maxFetch = maxResultsPerPage, verbose = kqo@korapConnection@verbose) {
Marc Kupietze95108e2019-09-18 13:23:58 +0200246 if (kqo@totalResults == 0 || offset >= kqo@totalResults) {
247 return(kqo)
Marc Kupietz62da2b52019-09-12 17:43:34 +0200248 }
Marc Kupietz5bbc9db2019-08-30 16:30:45 +0200249
250 page <- 1
251 results <- 0
Marc Kupietz25aebc32019-09-16 18:40:50 +0200252 pubDate <- NULL # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
Marc Kupietze95108e2019-09-18 13:23:58 +0200253 collectedMatches <- kqo@collectedMatches
Marc Kupietz62da2b52019-09-12 17:43:34 +0200254
Marc Kupietz5bbc9db2019-08-30 16:30:45 +0200255 repeat {
Marc Kupietza96537f2019-11-09 23:07:44 +0100256 res <- apiCall(kqo@korapConnection, paste0(kqo@requestUrl, '&count=', min(if (!is.na(maxFetch)) maxFetch - results else maxResultsPerPage, maxResultsPerPage) ,'&offset=', offset + results))
Marc Kupietze95108e2019-09-18 13:23:58 +0200257 if (res$meta$totalResults == 0) { return(kqo) }
258 for (field in kqo@fields) {
Marc Kupietz5bbc9db2019-08-30 16:30:45 +0200259 if (!field %in% colnames(res$matches)) {
260 res$matches[, field] <- NA
261 }
262 }
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200263 currentMatches <-
264 kqo@fields %>%
265 map_dfr( ~tibble(!!.x := logical() ) ) %>%
266 bind_rows(res$matches) %>%
Marc Kupietz19e2ebd2019-10-07 11:45:30 +0200267 dplyr::select(kqo@fields)
Marc Kupietz36d12d92019-09-27 18:13:27 +0200268 if ("pubDate" %in% kqo@fields) {
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200269 currentMatches$pubDate <- currentMatches$pubDate %>% as.Date(format = "%Y-%m-%d")
270 factorCols <- currentMatches %>% select(-pubDate) %>% colnames()
Marc Kupietz36d12d92019-09-27 18:13:27 +0200271 } else {
272 factorCols <- colnames(currentMatches)
273 }
Marc Kupietz5bbc9db2019-08-30 16:30:45 +0200274 currentMatches[factorCols] <- lapply(currentMatches[factorCols], factor)
Marc Kupietz62da2b52019-09-12 17:43:34 +0200275 if (!is.list(collectedMatches)) {
276 collectedMatches <- currentMatches
Marc Kupietz5bbc9db2019-08-30 16:30:45 +0200277 } else {
Marc Kupietz62da2b52019-09-12 17:43:34 +0200278 collectedMatches <- rbind(collectedMatches, currentMatches)
Marc Kupietz5bbc9db2019-08-30 16:30:45 +0200279 }
Marc Kupietzc2c59bd2019-08-30 16:50:49 +0200280 if (verbose) {
Marc Kupietzf6f71312019-09-23 18:35:27 +0200281 cat(paste0("Retrieved page ", page, "/", ceiling((res$meta$totalResults) / res$meta$itemsPerPage), ' in ', res$meta$benchmark, '\n'))
Marc Kupietzc2c59bd2019-08-30 16:50:49 +0200282 }
Marc Kupietz5bbc9db2019-08-30 16:30:45 +0200283 page <- page + 1
284 results <- results + res$meta$itemsPerPage
Marc Kupietz62da2b52019-09-12 17:43:34 +0200285 if (offset + results >= res$meta$totalResults || (!is.na(maxFetch) && results >= maxFetch)) {
Marc Kupietz5bbc9db2019-08-30 16:30:45 +0200286 break
287 }
288 }
Marc Kupietze95108e2019-09-18 13:23:58 +0200289 nextStartIndex <- min(res$meta$startIndex + res$meta$itemsPerPage, res$meta$totalResults)
290 KorAPQuery(nextStartIndex = nextStartIndex,
Marc Kupietzd0d3e9b2019-09-24 17:36:03 +0200291 korapConnection = kqo@korapConnection,
Marc Kupietze95108e2019-09-18 13:23:58 +0200292 fields = kqo@fields,
293 requestUrl = kqo@requestUrl,
294 request = kqo@request,
295 totalResults = res$meta$totalResults,
296 vc = kqo@vc,
297 webUIRequestUrl = kqo@webUIRequestUrl,
298 hasMoreMatches = (res$meta$totalResults > nextStartIndex),
299 apiResponse = res,
300 collectedMatches = collectedMatches)
301})
Marc Kupietz62da2b52019-09-12 17:43:34 +0200302
303#' Fetch all results of a KorAP query.
Marc Kupietz62da2b52019-09-12 17:43:34 +0200304#'
305#' @examples
Marc Kupietz05b22772020-02-18 21:58:42 +0100306#' \donttest{
Marc Kupietz69cc54a2019-09-30 12:06:54 +0200307#' q <- new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchAll()
Marc Kupietze95108e2019-09-18 13:23:58 +0200308#' q@collectedMatches
Marc Kupietz05b22772020-02-18 21:58:42 +0100309#' }
Marc Kupietz62da2b52019-09-12 17:43:34 +0200310#'
Marc Kupietze95108e2019-09-18 13:23:58 +0200311#' @aliases fetchAll
312#' @rdname KorAPQuery-class
Marc Kupietz62da2b52019-09-12 17:43:34 +0200313#' @export
Marc Kupietzf6f71312019-09-23 18:35:27 +0200314setMethod("fetchAll", "KorAPQuery", function(kqo, verbose = kqo@korapConnection@verbose) {
Marc Kupietze95108e2019-09-18 13:23:58 +0200315 return(fetchNext(kqo, offset = 0, maxFetch = NA, verbose = verbose))
316})
317
318#' Fetches the remaining results of a KorAP query.
319#'
320#' @examples
Marc Kupietz05b22772020-02-18 21:58:42 +0100321#' \donttest{
322#' q <- new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchRest()
Marc Kupietze95108e2019-09-18 13:23:58 +0200323#' q@collectedMatches
Marc Kupietz05b22772020-02-18 21:58:42 +0100324#' }
Marc Kupietze95108e2019-09-18 13:23:58 +0200325#'
326#' @aliases fetchRest
327#' @rdname KorAPQuery-class
328#' @export
Marc Kupietzf6f71312019-09-23 18:35:27 +0200329setMethod("fetchRest", "KorAPQuery", function(kqo, verbose = kqo@korapConnection@verbose) {
Marc Kupietze95108e2019-09-18 13:23:58 +0200330 return(fetchNext(kqo, maxFetch = NA, verbose = verbose))
331})
332
Marc Kupietz3f575282019-10-04 14:46:04 +0200333#' Query relative frequency of search term(s)
334#'
335#' \bold{\code{frequencyQuery}} combines \code{\link{corpusQuery}}, \code{\link{corpusStats}} and
336#' \code{\link{ci}} to compute a table with the relative frequencies and
337#' confidence intervals of one ore multiple search terms across one or multiple
338#' virtual corpora.
339#'
340#' @aliases frequencyQuery
341#' @rdname KorAPQuery-class
342#' @examples
Marc Kupietz05b22772020-02-18 21:58:42 +0100343#' \donttest{
Marc Kupietz3f575282019-10-04 14:46:04 +0200344#' new("KorAPConnection", verbose = TRUE) %>%
345#' frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
Marc Kupietz05b22772020-02-18 21:58:42 +0100346#' }
Marc Kupietz3f575282019-10-04 14:46:04 +0200347#'
348#' @param kco \code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}
349#' @param query string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.
Marc Kupietz43a6ade2020-02-18 17:01:44 +0100350#' @param conf.level confidence level of the returned confidence interval (passed through \code{\link{ci}} to \code{\link{prop.test}}).
Marc Kupietz71d6e052019-11-22 18:42:10 +0100351#' @param as.alternatives LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.
Marc Kupietz3f575282019-10-04 14:46:04 +0200352#' @export
353setMethod("frequencyQuery", "KorAPConnection",
Marc Kupietz71d6e052019-11-22 18:42:10 +0100354 function(kco, query, vc = "", conf.level = 0.95, as.alternatives = FALSE, ...) {
355 (if (as.alternatives) {
356 corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>%
357 group_by(vc) %>%
358 mutate(total = sum(totalResults))
359 } else {
360 corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>%
361 mutate(total = corpusStats(kco, vc=vc, as.df=TRUE)$tokens)
362 } ) %>%
Marc Kupietz0c29cea2019-10-09 08:44:36 +0200363 ci(conf.level = conf.level)
Marc Kupietz3f575282019-10-04 14:46:04 +0200364})
365
Marc Kupietze95108e2019-09-18 13:23:58 +0200366#´ format()
367#' @rdname KorAPQuery-class
368#' @param x KorAPQuery object
369#' @param ... further arguments passed to or from other methods
370#' @export
371format.KorAPQuery <- function(x, ...) {
372 cat("<KorAPQuery>\n")
373 q <- x
374 aurl = parse_url(q@request)
375 cat(" Query: ", aurl$query$q, "\n")
376 if (!is.null(aurl$query$vc) && aurl$query$vc != "") {
377 cat("Virtual corpus: ", aurl$query$vc, "\n")
378 }
379 if (!is.null(q@collectedMatches)) {
380 cat("==============================================================================================================", "\n")
381 print(summary(q@collectedMatches))
382 cat("==============================================================================================================", "\n")
383 }
384 cat(" Total results: ", q@totalResults, "\n")
385 cat(" Fetched results: ", q@nextStartIndex, "\n")
Marc Kupietz62da2b52019-09-12 17:43:34 +0200386}
387
Marc Kupietze95108e2019-09-18 13:23:58 +0200388#' show()
Marc Kupietz62da2b52019-09-12 17:43:34 +0200389#'
Marc Kupietze95108e2019-09-18 13:23:58 +0200390#' @rdname KorAPQuery-class
391#' @param object KorAPQuery object
Marc Kupietz62da2b52019-09-12 17:43:34 +0200392#' @export
Marc Kupietze95108e2019-09-18 13:23:58 +0200393setMethod("show", "KorAPQuery", function(object) {
394 format(object)
395})