Blame - R/ci.R - KorAP/RKorAPClient

blob: b9dd9874304a3217c75e85767ad0b04d2792e7fa [file] [log] [blame]

Marc Kupietz	dcc1de6	2019-10-04 09:10:36 +0200	[diff] [blame]	1
				2	#' Add confidence interval and relative frequency variables
				3	#'
				4	#' Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
				5	#' 1. relative frequency (\code{f}) 2. lower bound of a confidence interval
				6	#' (\code{ci.low}) 3. upper bound of a confidence interval
				7	#'
				8	#' @param df table with columns for absolute and total frequencies.
				9	#' @param x column with the observed absolute frequency.
				10	#' @param N column with the total frequncies
				11	#' @param conf.level confidence level of the returned confidence interval. Must
				12	#' be a single number between 0 and 1.
				13	#'
				14	#' @export
				15	#' @importFrom stats prop.test
				16	#' @examples
				17	#' library(ggplot2)
				18	#' kco <- new("KorAPConnection", verbose=TRUE)
				19	#' expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) %>%
				20	#' bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in %d", .$year))) %>%
				21	#' mutate(tokens=corpusStats(kco, vc=vc)$tokens) %>%
				22	#' ci() %>%
				23	#' ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
				24	#' geom_point() + geom_line() + geom_ribbon(alpha=.3)
				25	#'
				26	ci <- function(df, x = totalResults, N = tokens, conf.level = 0.95) {
				27	x <- enquo(x)
				28	N <- enquo(N)
				29	df %>%
				30	rowwise %>%
				31	mutate(tst = list(broom::tidy(prop.test(!!x, !!N, conf.level = conf.level)) %>%
				32	select("estimate", starts_with("conf.")) %>%
				33	rename(f = estimate)
				34	)) %>%
				35	tidyr::unnest(tst)
				36	}
				37
				38
				39	## quiets concerns of R CMD check re: the .'s that appear in pipelines
				40	if(getRversion() >= "2.15.1") utils::globalVariables(c("."))
				41
				42
				43	# ci.old <- function(df, x = totalResults, N = tokens, conf.level = 0.95) {
				44	# x <- deparse(substitute(x))
				45	# N <- deparse(substitute(N))
				46	# df <- data.frame(df)
				47	# df$f <- df[,x] / df[,N]
				48	# df[, c("conf.low", "conf.high")] <- t(sapply(Map(function(a, b) prop.test(a, b, conf.level = conf.level), df[,x], df[,N]), "[[","conf.int"))
				49	# return(df)
				50	# }