Add ci function

The ci function adds confidence interval and
relative frequencies to a table.

Change-Id: Id702522d35e557f20540bc24301195d0513b7138
diff --git a/DESCRIPTION b/DESCRIPTION
index 276c531..506ee08 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -12,6 +12,9 @@
 RoxygenNote: 6.1.1
 Imports:
     R.cache,
+    broom,
+    ggplot2,
+    tibble,
     magrittr,
     tidyr,
     dplyr,
@@ -27,4 +30,5 @@
     'KorAPCorpusStats.R'
     'RKorAPClient.R'
     'KorAPQuery.R'
+    'ci.R'
     'reexports.R'
diff --git a/NAMESPACE b/NAMESPACE
index 3074a48..3488a72 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,11 +2,15 @@
 
 S3method(format,KorAPQuery)
 export("%>%")
+export(as_tibble)
+export(bind_cols)
+export(ci)
 export(complete)
 export(group_by)
 export(mutate)
 export(select)
 export(summarise)
+export(tidy)
 export(year)
 exportClasses(KorAPConnection)
 exportClasses(KorAPCorpusStats)
@@ -27,6 +31,7 @@
 import(purrr)
 import(tidyr)
 import(utils)
+importFrom(broom,tidy)
 importFrom(dplyr,group_by)
 importFrom(dplyr,mutate)
 importFrom(dplyr,select)
@@ -34,4 +39,5 @@
 importFrom(jsonlite,fromJSON)
 importFrom(lubridate,year)
 importFrom(magrittr,"%>%")
+importFrom(stats,prop.test)
 importFrom(tidyr,complete)
diff --git a/R/ci.R b/R/ci.R
new file mode 100644
index 0000000..b9dd987
--- /dev/null
+++ b/R/ci.R
@@ -0,0 +1,50 @@
+
+#' Add confidence interval and relative frequency variables
+#'
+#' Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
+#' 1. relative frequency (\code{f}) 2. lower bound of a confidence interval
+#' (\code{ci.low}) 3. upper bound of a confidence interval
+#'
+#' @param df table with columns for absolute and total frequencies.
+#' @param x  column with the observed absolute frequency.
+#' @param N  column with the total frequncies
+#' @param conf.level confidence level of the returned confidence interval. Must
+#'   be a single number between 0 and 1.
+#'
+#' @export
+#' @importFrom stats prop.test
+#' @examples
+#' library(ggplot2)
+#' kco <- new("KorAPConnection", verbose=TRUE)
+#' expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) %>%
+#'   bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in %d", .$year))) %>%
+#'   mutate(tokens=corpusStats(kco, vc=vc)$tokens) %>%
+#'   ci() %>%
+#'   ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
+#'     geom_point() + geom_line() + geom_ribbon(alpha=.3)
+#'
+ci <- function(df, x = totalResults, N = tokens, conf.level = 0.95) {
+  x <- enquo(x)
+  N <- enquo(N)
+  df %>%
+    rowwise %>%
+    mutate(tst = list(broom::tidy(prop.test(!!x, !!N, conf.level = conf.level)) %>%
+                        select("estimate", starts_with("conf.")) %>%
+                        rename(f = estimate)
+    )) %>%
+    tidyr::unnest(tst)
+}
+
+
+## quiets concerns of R CMD check re: the .'s that appear in pipelines
+if(getRversion() >= "2.15.1")  utils::globalVariables(c("."))
+
+
+# ci.old <- function(df, x = totalResults, N = tokens, conf.level = 0.95) {
+#   x <- deparse(substitute(x))
+#   N <- deparse(substitute(N))
+#   df <- data.frame(df)
+#   df$f <- df[,x] / df[,N]
+#   df[, c("conf.low", "conf.high")] <- t(sapply(Map(function(a, b) prop.test(a, b, conf.level = conf.level), df[,x], df[,N]), "[[","conf.int"))
+#   return(df)
+# }
diff --git a/man/ci.Rd b/man/ci.Rd
new file mode 100644
index 0000000..e4f7e54
--- /dev/null
+++ b/man/ci.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ci.R
+\name{ci}
+\alias{ci}
+\title{Add confidence interval and relative frequency variables}
+\usage{
+ci(df, x = totalResults, N = tokens, conf.level = 0.95)
+}
+\arguments{
+\item{df}{table with columns for absolute and total frequencies.}
+
+\item{x}{column with the observed absolute frequency.}
+
+\item{N}{column with the total frequncies}
+
+\item{conf.level}{confidence level of the returned confidence interval. Must
+be a single number between 0 and 1.}
+}
+\description{
+Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
+1. relative frequency (\code{f}) 2. lower bound of a confidence interval
+(\code{ci.low}) 3. upper bound of a confidence interval
+}
+\examples{
+library(ggplot2)
+kco <- new("KorAPConnection", verbose=TRUE)
+expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\%
+  bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\%
+  mutate(tokens=corpusStats(kco, vc=vc)$tokens) \%>\%
+  ci() \%>\%
+  ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
+    geom_point() + geom_line() + geom_ribbon(alpha=.3)
+
+}