Blame - R/ci.R - KorAP/RKorAPClient

blob: 797252c10f4cec99200fa8c86adb5ca91b8e1206 [file] [log] [blame]

Marc Kupietz	dcc1de6	2019-10-04 09:10:36 +0200	[diff] [blame]	1
				2	#' Add confidence interval and relative frequency variables
				3	#'
				4	#' Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	5	#' 1. relative frequency (\code{f})
				6	#' 2. lower bound of a confidence interval (\code{ci.low})
				7	#' 3. upper bound of a confidence interval
				8	#'
				9	#' @seealso
				10	#' \code{ci} is alread included in \code{\link{frequencyQuery}}
Marc Kupietz	dcc1de6	2019-10-04 09:10:36 +0200	[diff] [blame]	11	#'
				12	#' @param df table with columns for absolute and total frequencies.
				13	#' @param x column with the observed absolute frequency.
				14	#' @param N column with the total frequncies
				15	#' @param conf.level confidence level of the returned confidence interval. Must
				16	#' be a single number between 0 and 1.
				17	#'
				18	#' @export
				19	#' @importFrom stats prop.test
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	20	#' @importFrom tibble remove_rownames
Marc Kupietz	dcc1de6	2019-10-04 09:10:36 +0200	[diff] [blame]	21	#' @examples
				22	#' library(ggplot2)
				23	#' kco <- new("KorAPConnection", verbose=TRUE)
				24	#' expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) %>%
				25	#' bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in %d", .$year))) %>%
				26	#' mutate(tokens=corpusStats(kco, vc=vc)$tokens) %>%
				27	#' ci() %>%
				28	#' ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
				29	#' geom_point() + geom_line() + geom_ribbon(alpha=.3)
				30	#'
				31	ci <- function(df, x = totalResults, N = tokens, conf.level = 0.95) {
				32	x <- enquo(x)
				33	N <- enquo(N)
				34	df %>%
				35	rowwise %>%
				36	mutate(tst = list(broom::tidy(prop.test(!!x, !!N, conf.level = conf.level)) %>%
				37	select("estimate", starts_with("conf.")) %>%
				38	rename(f = estimate)
				39	)) %>%
				40	tidyr::unnest(tst)
				41	}
				42
Marc Kupietz	7d61387	2019-10-04 22:47:20 +0200	[diff] [blame]	43	## Mute notes: "Undefined global functions or variables:"
				44	globalVariables(c("totalResults", "tokens", "estimate", "tst"))
Marc Kupietz	dcc1de6	2019-10-04 09:10:36 +0200	[diff] [blame]	45
				46
				47	# ci.old <- function(df, x = totalResults, N = tokens, conf.level = 0.95) {
				48	# x <- deparse(substitute(x))
				49	# N <- deparse(substitute(N))
				50	# df <- data.frame(df)
				51	# df$f <- df[,x] / df[,N]
				52	# df[, c("conf.low", "conf.high")] <- t(sapply(Map(function(a, b) prop.test(a, b, conf.level = conf.level), df[,x], df[,N]), "[[","conf.int"))
				53	# return(df)
				54	# }