blob: d9433a32edda19465f01d04a206ac9f86063a148 [file] [log] [blame]
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ci.R, R/misc.R
\name{ci}
\alias{ci}
\alias{misc-functions}
\alias{ipm}
\alias{percent}
\alias{queryStringToLabel}
\alias{geom_freq_by_year_ci}
\title{Add confidence interval and relative frequency variables}
\usage{
ci(df, x = totalResults, N = total, conf.level = 0.95)
ipm(df)
percent(df)
queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE)
geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...)
}
\arguments{
\item{df}{table returned from \code{\link[=frequencyQuery]{frequencyQuery()}}}
\item{x}{column with the observed absolute frequency.}
\item{N}{column with the total frequencies}
\item{conf.level}{confidence level of the returned confidence interval. Must
be a single number between 0 and 1.}
\item{data}{string or vector of query or vc definition strings}
\item{pubDateOnly}{discard all but the publication date}
\item{excludePubDate}{discard publication date constraints}
\item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.}
\item{...}{Other arguments passed to geom_ribbon, geom_line, and geom_click_point.}
}
\value{
original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high}
original table with converted columns \code{f}, \code{conf.low} and \code{conf.high}
string or vector of strings with clipped off common prefixes and suffixes
}
\description{
Using \code{\link[=prop.test]{prop.test()}}, \code{ci} adds three columns to a data frame:
\enumerate{
\item relative frequency (\code{f})
\item lower bound of a confidence interval (\code{ci.low})
\item upper bound of a confidence interval
}
Convenience function for converting frequency tables to instances per
million.
Convenience function for converting frequency tables of alternative variants
(generated with \code{as.alternatives=TRUE}) to percent.
Converts a vector of query or vc strings to typically appropriate legend labels
by clipping off prefixes and suffixes that are common to all query strings.
Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2.
\strong{Warning:} This function may be moved to a new package.
}
\details{
Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \verb{column ipm}
und multiplies conf.low and \code{conf.high} with 10^6.
}
\examples{
\dontrun{
library(ggplot2)
kco <- new("KorAPConnection", verbose=TRUE)
expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\%
bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\%
mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\%
ci() \%>\%
ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
geom_point() + geom_line() + geom_ribbon(alpha=.3)
}
\dontrun{
new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm()
}
\dontrun{
new("KorAPConnection") \%>\%
frequencyQuery(c("Tollpatsch", "Tolpatsch"),
vc=paste0("pubDate in ", 2000:2002),
as.alternatives = TRUE) \%>\%
percent()
}
queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019)))
queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"))
queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"))
\dontrun{
library(ggplot2)
kco <- new("KorAPConnection", verbose=TRUE)
expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
year = (2005:2011)) \%>\%
cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
paste0(.$condition," & pubDate in ", .$year))) \%>\%
ipm() \%>\%
ggplot(aes(year, ipm, fill = condition, color = condition)) +
geom_freq_by_year_ci()
}
}
\seealso{
\code{ci} is already included in \code{\link[=frequencyQuery]{frequencyQuery()}}
}