Add function frequencyQuery (corpusQuery + corpusStats + ci)
Change-Id: Icb7ed900ea588f606a812d1e3122867a81cefaa2
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index f71a4f3..868089c 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -65,6 +65,7 @@
setGeneric("fetchAll", function(kqo, ...) standardGeneric("fetchAll") )
setGeneric("fetchNext", function(kqo, ...) standardGeneric("fetchNext") )
setGeneric("fetchRest", function(kqo, ...) standardGeneric("fetchRest") )
+setGeneric("frequencyQuery", function(kco, ...) standardGeneric("frequencyQuery") )
maxResultsPerPage <- 50
@@ -94,7 +95,8 @@
#' @param accessRewriteFatal abort if query or given vc had to be rewritten due to insufficent rights (not yet implemented).
#' @param verbose print some info
#' @param as.df return result as data frame instead of as S4 object?
-#' @return A \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
+#' @param expand logical that deicdes if \code{query} and \code{vc} parameters are expanded to all of their combinations
+#' @return Depending on the \code{as.df} parameter, a table or a \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
#' A corresponding URL to be used within a web browser is contained in \code{@webUIRequestUrl}
#' Please make sure to check \code{$collection$rewrites} to see if any unforseen access rewrites of the query's virtual corpus had to be performed.
#'
@@ -145,13 +147,15 @@
"availability", "textClass", "snippet"),
accessRewriteFatal = TRUE,
verbose = kco@verbose,
+ expand = length(vc) != length(query),
as.df = FALSE) {
- ifelse(length(query) > 1 , {
- #grid <- expand_grid(query=query, vc=vc)
- return(
+ ifelse(length(query) > 1 || length(vc) > 1, {
+ grid <- { if (expand) expand_grid(query=query, vc=vc) else tibble(query=query, vc=vc) }
+ return(
do.call(rbind,
- Map(function(q, cq) corpusQuery(kco, query=q, vc=cq,
- verbose=verbose, as.df = TRUE), query, vc))
+ Map(function(q, cq) corpusQuery(kco, query=q, vc=cq, ql=ql,
+ verbose=verbose, as.df = TRUE), grid$query, grid$vc)) %>%
+ remove_rownames()
)}, {
contentFields <- c("snippet")
fields <- fields[!fields %in% contentFields]
@@ -185,6 +189,8 @@
#' Fetch the next bunch of results of a KorAP query.
#'
+#' \bold{\code{fetchNext}} fetches the next bunch of results of a KorAP query.
+#'
#' @param kqo object obtained from \code{\link{corpusQuery}}
#' @param offset start offset for query results to fetch
#' @param maxFetch maximum number of query results to fetch
@@ -281,6 +287,29 @@
return(fetchNext(kqo, maxFetch = NA, verbose = verbose))
})
+#' Query relative frequency of search term(s)
+#'
+#' \bold{\code{frequencyQuery}} combines \code{\link{corpusQuery}}, \code{\link{corpusStats}} and
+#' \code{\link{ci}} to compute a table with the relative frequencies and
+#' confidence intervals of one ore multiple search terms across one or multiple
+#' virtual corpora.
+#'
+#' @aliases frequencyQuery
+#' @rdname KorAPQuery-class
+#' @examples
+#' new("KorAPConnection", verbose = TRUE) %>%
+#' frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
+#'
+#' @param kco \code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}
+#' @param query string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.
+#' @export
+setMethod("frequencyQuery", "KorAPConnection",
+ function(kco, query, vc = "", ...) {
+ corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df=TRUE, ...) %>%
+ mutate(tokens=corpusStats(kco, vc=vc, as.df=TRUE)$tokens) %>%
+ ci()
+})
+
#´ format()
#' @rdname KorAPQuery-class
#' @param x KorAPQuery object
diff --git a/R/ci.R b/R/ci.R
index b9dd987..1d54977 100644
--- a/R/ci.R
+++ b/R/ci.R
@@ -2,8 +2,12 @@
#' Add confidence interval and relative frequency variables
#'
#' Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
-#' 1. relative frequency (\code{f}) 2. lower bound of a confidence interval
-#' (\code{ci.low}) 3. upper bound of a confidence interval
+#' 1. relative frequency (\code{f})
+#' 2. lower bound of a confidence interval (\code{ci.low})
+#' 3. upper bound of a confidence interval
+#'
+#' @seealso
+#' \code{ci} is alread included in \code{\link{frequencyQuery}}
#'
#' @param df table with columns for absolute and total frequencies.
#' @param x column with the observed absolute frequency.
@@ -13,6 +17,7 @@
#'
#' @export
#' @importFrom stats prop.test
+#' @importFrom tibble remove_rownames
#' @examples
#' library(ggplot2)
#' kco <- new("KorAPConnection", verbose=TRUE)