Add as.alternative parameter to frequencyQuery method
And also rename "tokens" to the more generic "total" in the result
tibble.
Change-Id: Iae2ec16307f993e5b95792f2f66c6a38dc9ff67d
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index f337d4a..7a59ae7 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -332,11 +332,18 @@
#' @param kco \code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}
#' @param query string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.
#' @param conf.level confidence level of the returned confidence interval (passed throgh \code{\link{ci}} to \code{\link{prop.test}}).
+#' @param as.alternatives LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.
#' @export
setMethod("frequencyQuery", "KorAPConnection",
- function(kco, query, vc = "", conf.level = 0.95, ...) {
- corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>%
- mutate(tokens=corpusStats(kco, vc=vc, as.df=TRUE)$tokens) %>%
+ function(kco, query, vc = "", conf.level = 0.95, as.alternatives = FALSE, ...) {
+ (if (as.alternatives) {
+ corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>%
+ group_by(vc) %>%
+ mutate(total = sum(totalResults))
+ } else {
+ corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>%
+ mutate(total = corpusStats(kco, vc=vc, as.df=TRUE)$tokens)
+ } ) %>%
ci(conf.level = conf.level)
})