blob: b9dd9874304a3217c75e85767ad0b04d2792e7fa [file] [log] [blame]
Marc Kupietzdcc1de62019-10-04 09:10:36 +02001
2#' Add confidence interval and relative frequency variables
3#'
4#' Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
5#' 1. relative frequency (\code{f}) 2. lower bound of a confidence interval
6#' (\code{ci.low}) 3. upper bound of a confidence interval
7#'
8#' @param df table with columns for absolute and total frequencies.
9#' @param x column with the observed absolute frequency.
10#' @param N column with the total frequncies
11#' @param conf.level confidence level of the returned confidence interval. Must
12#' be a single number between 0 and 1.
13#'
14#' @export
15#' @importFrom stats prop.test
16#' @examples
17#' library(ggplot2)
18#' kco <- new("KorAPConnection", verbose=TRUE)
19#' expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) %>%
20#' bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in %d", .$year))) %>%
21#' mutate(tokens=corpusStats(kco, vc=vc)$tokens) %>%
22#' ci() %>%
23#' ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
24#' geom_point() + geom_line() + geom_ribbon(alpha=.3)
25#'
26ci <- function(df, x = totalResults, N = tokens, conf.level = 0.95) {
27 x <- enquo(x)
28 N <- enquo(N)
29 df %>%
30 rowwise %>%
31 mutate(tst = list(broom::tidy(prop.test(!!x, !!N, conf.level = conf.level)) %>%
32 select("estimate", starts_with("conf.")) %>%
33 rename(f = estimate)
34 )) %>%
35 tidyr::unnest(tst)
36}
37
38
39## quiets concerns of R CMD check re: the .'s that appear in pipelines
40if(getRversion() >= "2.15.1") utils::globalVariables(c("."))
41
42
43# ci.old <- function(df, x = totalResults, N = tokens, conf.level = 0.95) {
44# x <- deparse(substitute(x))
45# N <- deparse(substitute(N))
46# df <- data.frame(df)
47# df$f <- df[,x] / df[,N]
48# df[, c("conf.low", "conf.high")] <- t(sapply(Map(function(a, b) prop.test(a, b, conf.level = conf.level), df[,x], df[,N]), "[[","conf.int"))
49# return(df)
50# }