Update and separate frequencyQuery documentation
Change-Id: I2fc31fd0e4d49001a739bcee63e30dac4a608e82
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index b79b585..02f6685 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -98,14 +98,14 @@
#' @param accessRewriteFatal abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).
#' @param verbose print some info
#' @param as.df return result as data frame instead of as S4 object?
-#' @param expand logical that decides if `query` and `vc` parameters are expanded to all of their combinations
+#' @param expand logical that decides if `query` and `vc` parameters are expanded to all of their combinations. Defaults to `TRUE`, iff `query` and `vc` have different lengths
#' @param context string that specifies the size of the left and the right context returned in `snippet`
#' (provided that `metadataOnly` is set to `false` and that the necessary access right are met).
#' The format of the context size specifcation (e.g. `3-token,3-token`) is described in the [Service: Search GET documentation of the Kustvakt Wiki](https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET).
#' If the parameter is not set, the default context size secification of the KorAP server instance will be used.
#' Note that you cannot overrule the maximum context size set in the KorAP server instance,
#' as this is typically legally motivated.
-#' @return Depending on the `as.df` parameter, a table or a [KorAPQuery()] object that, among other information, contains the total number of results in `@totalResults`. The resulting object can be used to fetch all query results (with [fetchAll()]) or the next page of results (with [fetchNext()]).
+#' @return Depending on the `as.df` parameter, a tibble or a [KorAPQuery()] object that, among other information, contains the total number of results in `@totalResults`. The resulting object can be used to fetch all query results (with [fetchAll()]) or the next page of results (with [fetchNext()]).
#' A corresponding URL to be used within a web browser is contained in `@webUIRequestUrl`
#' Please make sure to check `$collection$rewrites` to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
#'
@@ -421,35 +421,48 @@
return(fetchNext(kqo, maxFetch = NA, verbose = verbose, ...))
})
-#' Query relative frequency of search term(s)
+#' Query frequencies of search expressions in virtual corpora
#'
#' **`frequencyQuery`** combines [corpusQuery()], [corpusStats()] and
-#' [ci()] to compute a table with the relative frequencies and
+#' [ci()] to compute a tibble with the absolute and relative frequencies and
#' confidence intervals of one ore multiple search terms across one or multiple
#' virtual corpora.
#'
#' @aliases frequencyQuery
-#' @rdname KorAPQuery-class
#' @examples
#' \dontrun{
#'
-#' KorAPConnection(verbose = TRUE) %>%
+#' KorAPConnection(verbose = TRUE) |>
#' frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
#' }
#'
+# @inheritParams corpusQuery
#' @param kco [KorAPConnection()] object (obtained e.g. from `KorAPConnection()`
-#' @param query string that contains the corpus query. The query language depends on the `ql` parameter. Either `query` must be provided or `KorAPUrl`.
+#' @param query corpus query string(s.) (can be a vector). The query language depends on the `ql` parameter. Either `query` must be provided or `KorAPUrl`.
+#' @param vc virtual corpus definition(s) (can be a vector)
#' @param conf.level confidence level of the returned confidence interval (passed through [ci()] to [prop.test()]).
#' @param as.alternatives LOGICAL that specifies if the query terms should be treated as alternatives. If `as.alternatives` is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.
+#' @param ... further arguments passed to or from other methods (see [corpusQuery()]), most notably `expand`, a logical that decides if `query` and `vc` parameters are expanded to all of their combinations. It defaults to `TRUE`, if `query` and `vc` have different lengths, and to `FALSE` otherwise.
#' @export
+#'
+#' @return A tibble, with each row containing the following result columns for query and vc combinations:
+#' - **query**: the query string used for the frequency analysis.
+#' - **totalResults**: absolute frequency of query matches in the vc.
+#' - **vc**: virtual corpus used for the query.
+#' - **webUIRequestUrl**: URL of the corresponding web UI request with respect to query and vc.
+#' - **total**: total number of words in vc.
+#' - **f**: relative frequency of query matches in the vc.
+#' - **conf.low**: lower bound of the confidence interval for the relative frequency, given `conf.level`.
+#' - **conf.high**: upper bound of the confidence interval for the relative frequency, given `conf.level`.
+
setMethod("frequencyQuery", "KorAPConnection",
function(kco, query, vc = "", conf.level = 0.95, as.alternatives = FALSE, ...) {
(if (as.alternatives) {
- corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>%
+ corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) |>
group_by(vc) %>%
mutate(total = sum(totalResults))
} else {
- corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>%
+ corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) |>
mutate(total = corpusStats(kco, vc=vc, as.df=TRUE)$tokens)
} ) %>%
ci(conf.level = conf.level)
diff --git a/man/KorAPQuery-class.Rd b/man/KorAPQuery-class.Rd
index 792919e..e6705e3 100644
--- a/man/KorAPQuery-class.Rd
+++ b/man/KorAPQuery-class.Rd
@@ -13,8 +13,6 @@
\alias{fetchAll}
\alias{fetchRest,KorAPQuery-method}
\alias{fetchRest}
-\alias{frequencyQuery,KorAPConnection-method}
-\alias{frequencyQuery}
\alias{buildWebUIRequestUrlFromString}
\alias{buildWebUIRequestUrl}
\alias{format.KorAPQuery}
@@ -67,15 +65,6 @@
\S4method{fetchRest}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...)
-\S4method{frequencyQuery}{KorAPConnection}(
- kco,
- query,
- vc = "",
- conf.level = 0.95,
- as.alternatives = FALSE,
- ...
-)
-
buildWebUIRequestUrlFromString(KorAPUrl, query, vc = "", ql = "poliqarp")
buildWebUIRequestUrl(
@@ -136,7 +125,7 @@
\item{verbose}{print progress information if true}
-\item{expand}{logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations}
+\item{expand}{logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations. Defaults to \code{TRUE}, iff \code{query} and \code{vc} have different lengths}
\item{as.df}{return result as data frame instead of as S4 object?}
@@ -157,16 +146,12 @@
\item{...}{further arguments passed to or from other methods}
-\item{conf.level}{confidence level of the returned confidence interval (passed through \code{\link[=ci]{ci()}} to \code{\link[=prop.test]{prop.test()}}).}
-
-\item{as.alternatives}{LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.}
-
\item{x}{KorAPQuery object}
\item{object}{KorAPQuery object}
}
\value{
-Depending on the \code{as.df} parameter, a table or a \code{\link[=KorAPQuery]{KorAPQuery()}} object that, among other information, contains the total number of results in \verb{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link[=fetchAll]{fetchAll()}}) or the next page of results (with \code{\link[=fetchNext]{fetchNext()}}).
+Depending on the \code{as.df} parameter, a tibble or a \code{\link[=KorAPQuery]{KorAPQuery()}} object that, among other information, contains the total number of results in \verb{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link[=fetchAll]{fetchAll()}}) or the next page of results (with \code{\link[=fetchNext]{fetchNext()}}).
A corresponding URL to be used within a web browser is contained in \verb{@webUIRequestUrl}
Please make sure to check \verb{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
@@ -182,11 +167,6 @@
\strong{\code{fetchNext}} fetches the next bunch of results of a KorAP query.
\strong{\code{fetchAll}} fetches all results of a KorAP query.
-
-\strong{\code{frequencyQuery}} combines \code{\link[=corpusQuery]{corpusQuery()}}, \code{\link[=corpusStats]{corpusStats()}} and
-\code{\link[=ci]{ci()}} to compute a table with the relative frequencies and
-confidence intervals of one ore multiple search terms across one or multiple
-virtual corpora.
}
\examples{
\dontrun{
@@ -241,12 +221,6 @@
q@collectedMatches
}
-\dontrun{
-
-KorAPConnection(verbose = TRUE) \%>\%
- frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
-}
-
}
\references{
\url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
diff --git a/man/frequencyQuery-KorAPConnection-method.Rd b/man/frequencyQuery-KorAPConnection-method.Rd
new file mode 100644
index 0000000..9f934e6
--- /dev/null
+++ b/man/frequencyQuery-KorAPConnection-method.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/KorAPQuery.R
+\name{frequencyQuery,KorAPConnection-method}
+\alias{frequencyQuery,KorAPConnection-method}
+\alias{frequencyQuery}
+\title{Query frequencies of search expressions in virtual corpora}
+\usage{
+\S4method{frequencyQuery}{KorAPConnection}(
+ kco,
+ query,
+ vc = "",
+ conf.level = 0.95,
+ as.alternatives = FALSE,
+ ...
+)
+}
+\arguments{
+\item{kco}{\code{\link[=KorAPConnection]{KorAPConnection()}} object (obtained e.g. from \code{KorAPConnection()}}
+
+\item{query}{corpus query string(s.) (can be a vector). The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.}
+
+\item{vc}{virtual corpus definition(s) (can be a vector)}
+
+\item{conf.level}{confidence level of the returned confidence interval (passed through \code{\link[=ci]{ci()}} to \code{\link[=prop.test]{prop.test()}}).}
+
+\item{as.alternatives}{LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.}
+
+\item{...}{further arguments passed to or from other methods (see \code{\link[=corpusQuery]{corpusQuery()}}), most notably \code{expand}, a logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations. It defaults to \code{TRUE}, if \code{query} and \code{vc} have different lengths, and to \code{FALSE} otherwise.}
+}
+\value{
+A tibble, with each row containing the following result columns for query and vc combinations:
+\itemize{
+\item \strong{query}: the query string used for the frequency analysis.
+\item \strong{totalResults}: absolute frequency of query matches in the vc.
+\item \strong{vc}: virtual corpus used for the query.
+\item \strong{webUIRequestUrl}: URL of the corresponding web UI request with respect to query and vc.
+\item \strong{total}: total number of words in vc.
+\item \strong{f}: relative frequency of query matches in the vc.
+\item \strong{conf.low}: lower bound of the confidence interval for the relative frequency, given \code{conf.level}.
+\item \strong{conf.high}: upper bound of the confidence interval for the relative frequency, given \code{conf.level}.
+}
+}
+\description{
+\strong{\code{frequencyQuery}} combines \code{\link[=corpusQuery]{corpusQuery()}}, \code{\link[=corpusStats]{corpusStats()}} and
+\code{\link[=ci]{ci()}} to compute a tibble with the absolute and relative frequencies and
+confidence intervals of one ore multiple search terms across one or multiple
+virtual corpora.
+}
+\examples{
+\dontrun{
+
+KorAPConnection(verbose = TRUE) |>
+ frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
+}
+
+}