Add as.alternative parameter to frequencyQuery method

And also rename "tokens" to the more generic "total" in the result
tibble.

Change-Id: Iae2ec16307f993e5b95792f2f66c6a38dc9ff67d
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index f337d4a..7a59ae7 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -332,11 +332,18 @@
 #' @param kco \code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}
 #' @param query string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.
 #' @param conf.level confidence level of the returned confidence interval (passed throgh \code{\link{ci}}  to \code{\link{prop.test}}).
+#' @param as.alternatives LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.
 #' @export
 setMethod("frequencyQuery", "KorAPConnection",
-  function(kco, query, vc = "", conf.level = 0.95, ...) {
-      corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>%
-      mutate(tokens=corpusStats(kco, vc=vc, as.df=TRUE)$tokens) %>%
+  function(kco, query, vc = "", conf.level = 0.95, as.alternatives = FALSE, ...) {
+      (if (as.alternatives) {
+        corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>%
+        group_by(vc) %>%
+        mutate(total = sum(totalResults))
+      } else {
+        corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) %>%
+        mutate(total = corpusStats(kco, vc=vc, as.df=TRUE)$tokens)
+      } ) %>%
       ci(conf.level = conf.level)
 })
 
diff --git a/R/ci.R b/R/ci.R
index b5d06dc..5410b3b 100644
--- a/R/ci.R
+++ b/R/ci.R
@@ -24,12 +24,12 @@
 #' kco <- new("KorAPConnection", verbose=TRUE)
 #' expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) %>%
 #'   bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in %d", .$year))) %>%
-#'   mutate(tokens=corpusStats(kco, vc=vc)$tokens) %>%
+#'   mutate(total=corpusStats(kco, vc=vc)$tokens) %>%
 #'   ci() %>%
 #'   ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
 #'     geom_point() + geom_line() + geom_ribbon(alpha=.3)
 #'
-ci <- function(df, x = totalResults, N = tokens, conf.level = 0.95) {
+ci <- function(df, x = totalResults, N = total, conf.level = 0.95) {
   x <- enquo(x)
   N <- enquo(N)
   df %>%
@@ -42,10 +42,10 @@
 }
 
 ## Mute notes: "Undefined global functions or variables:"
-globalVariables(c("totalResults", "tokens", "estimate", "tst"))
+globalVariables(c("totalResults", "total", "estimate", "tst"))
 
 
-# ci.old <- function(df, x = totalResults, N = tokens, conf.level = 0.95) {
+# ci.old <- function(df, x = totalResults, N = total, conf.level = 0.95) {
 #   x <- deparse(substitute(x))
 #   N <- deparse(substitute(N))
 #   df <- data.frame(df)
diff --git a/demo/alternativesOverTime.R b/demo/alternativesOverTime.R
index dca55b3..eb85ac8 100755
--- a/demo/alternativesOverTime.R
+++ b/demo/alternativesOverTime.R
@@ -9,9 +9,7 @@
 
 alternativesOverTime <- function(alternatives, years, kco = new("KorAPConnection", verbose=TRUE)) {
   df <- expand_grid(Variant = alternatives, year = years) %>%
-    cbind(corpusQuery(kco, .$Variant, sprintf("textType = /Zeit.*/ & pubDate in %d", .$year))) %>%
-    group_by(year) %>% mutate(tokens = sum(totalResults)) %>%
-    ci() %>%
+    cbind(frequencyQuery(kco, .$Variant, sprintf("textType = /Zeit.*/ & pubDate in %d", .$year), as.alternatives=TRUE)) %>%
     rename(share=f)
   g <- ggplot(data = df, mapping = aes(x = year, y = share, colour = Variant, fill = Variant)) +
     geom_freq_by_year_ci() +
diff --git a/demo/frequenciesOverDomains.R b/demo/frequenciesOverDomains.R
index a991e25..cf02649 100755
--- a/demo/frequenciesOverDomains.R
+++ b/demo/frequenciesOverDomains.R
@@ -13,7 +13,7 @@
     group_by(Domain) %>%
     dplyr::filter(!is.na(Domain)) %>%
     summarise(count = dplyr::n()) %>%
-    mutate(tokens = (corpusStats(con, sprintf("textClass = /%s.*/", .$Domain)))$tokens) %>%
+    mutate(total = (corpusStats(con, sprintf("textClass = /%s.*/", .$Domain)))$tokens) %>%
     ci(x = count) %>%
     ipm() %>%
     { df <<- . } %>%
diff --git a/man/KorAPQuery-class.Rd b/man/KorAPQuery-class.Rd
index 2f343f6..7749151 100644
--- a/man/KorAPQuery-class.Rd
+++ b/man/KorAPQuery-class.Rd
@@ -34,7 +34,7 @@
   verbose = kqo@korapConnection@verbose)
 
 \S4method{frequencyQuery}{KorAPConnection}(kco, query, vc = "",
-  conf.level = 0.95, ...)
+  conf.level = 0.95, as.alternatives = FALSE, ...)
 
 \method{format}{KorAPQuery}(x, ...)
 
@@ -79,6 +79,8 @@
 
 \item{conf.level}{confidence level of the returned confidence interval (passed throgh \code{\link{ci}}  to \code{\link{prop.test}}).}
 
+\item{as.alternatives}{LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.}
+
 \item{...}{further arguments passed to or from other methods}
 
 \item{x}{KorAPQuery object}
diff --git a/man/ci.Rd b/man/ci.Rd
index 381adb0..ee6e18d 100644
--- a/man/ci.Rd
+++ b/man/ci.Rd
@@ -4,7 +4,7 @@
 \alias{ci}
 \title{Add confidence interval and relative frequency variables}
 \usage{
-ci(df, x = totalResults, N = tokens, conf.level = 0.95)
+ci(df, x = totalResults, N = total, conf.level = 0.95)
 }
 \arguments{
 \item{df}{table with columns for absolute and total frequencies.}
@@ -27,7 +27,7 @@
 kco <- new("KorAPConnection", verbose=TRUE)
 expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\%
   bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\%
-  mutate(tokens=corpusStats(kco, vc=vc)$tokens) \%>\%
+  mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\%
   ci() \%>\%
   ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
     geom_point() + geom_line() + geom_ribbon(alpha=.3)