blob: d9433a32edda19465f01d04a206ac9f86063a148 [file] [log] [blame]
Marc Kupietza6e4ee62021-03-05 09:00:15 +01001% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/ci.R, R/misc.R
3\name{ci}
4\alias{ci}
5\alias{misc-functions}
6\alias{ipm}
7\alias{percent}
8\alias{queryStringToLabel}
9\alias{geom_freq_by_year_ci}
Marc Kupietza6e4ee62021-03-05 09:00:15 +010010\title{Add confidence interval and relative frequency variables}
11\usage{
12ci(df, x = totalResults, N = total, conf.level = 0.95)
13
14ipm(df)
15
16percent(df)
17
18queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE)
19
20geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...)
Marc Kupietza6e4ee62021-03-05 09:00:15 +010021}
22\arguments{
Marc Kupietz67edcb52021-09-20 21:54:24 +020023\item{df}{table returned from \code{\link[=frequencyQuery]{frequencyQuery()}}}
Marc Kupietza6e4ee62021-03-05 09:00:15 +010024
25\item{x}{column with the observed absolute frequency.}
26
27\item{N}{column with the total frequencies}
28
29\item{conf.level}{confidence level of the returned confidence interval. Must
30be a single number between 0 and 1.}
31
32\item{data}{string or vector of query or vc definition strings}
33
34\item{pubDateOnly}{discard all but the publication date}
35
36\item{excludePubDate}{discard publication date constraints}
37
38\item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.}
39
Marc Kupietz5fb892e2021-03-05 08:18:25 +010040\item{...}{Other arguments passed to geom_ribbon, geom_line, and geom_click_point.}
Marc Kupietza6e4ee62021-03-05 09:00:15 +010041}
42\value{
43original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high}
44
45original table with converted columns \code{f}, \code{conf.low} and \code{conf.high}
46
47string or vector of strings with clipped off common prefixes and suffixes
48}
49\description{
Marc Kupietz67edcb52021-09-20 21:54:24 +020050Using \code{\link[=prop.test]{prop.test()}}, \code{ci} adds three columns to a data frame:
51\enumerate{
52\item relative frequency (\code{f})
53\item lower bound of a confidence interval (\code{ci.low})
54\item upper bound of a confidence interval
55}
Marc Kupietza6e4ee62021-03-05 09:00:15 +010056
57Convenience function for converting frequency tables to instances per
58million.
59
60Convenience function for converting frequency tables of alternative variants
61(generated with \code{as.alternatives=TRUE}) to percent.
62
63Converts a vector of query or vc strings to typically appropriate legend labels
64by clipping off prefixes and suffixes that are common to all query strings.
65
66Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2.
Marc Kupietz67edcb52021-09-20 21:54:24 +020067\strong{Warning:} This function may be moved to a new package.
Marc Kupietza6e4ee62021-03-05 09:00:15 +010068}
69\details{
Marc Kupietz67edcb52021-09-20 21:54:24 +020070Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \verb{column ipm}
Marc Kupietza6e4ee62021-03-05 09:00:15 +010071und multiplies conf.low and \code{conf.high} with 10^6.
72}
73\examples{
Marc Kupietz6ae76052021-09-21 10:34:00 +020074\dontrun{
75
Marc Kupietza6e4ee62021-03-05 09:00:15 +010076library(ggplot2)
77kco <- new("KorAPConnection", verbose=TRUE)
78expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\%
79 bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\%
80 mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\%
81 ci() \%>\%
82 ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
83 geom_point() + geom_line() + geom_ribbon(alpha=.3)
84}
Marc Kupietz6ae76052021-09-21 10:34:00 +020085\dontrun{
86
Marc Kupietza6e4ee62021-03-05 09:00:15 +010087new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm()
88}
Marc Kupietz6ae76052021-09-21 10:34:00 +020089\dontrun{
90
Marc Kupietza6e4ee62021-03-05 09:00:15 +010091new("KorAPConnection") \%>\%
92 frequencyQuery(c("Tollpatsch", "Tolpatsch"),
93 vc=paste0("pubDate in ", 2000:2002),
94 as.alternatives = TRUE) \%>\%
95 percent()
96}
97queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019)))
98queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"))
99queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"))
100
Marc Kupietz548ac352023-04-18 17:38:37 +0200101\dontrun{
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100102library(ggplot2)
103kco <- new("KorAPConnection", verbose=TRUE)
Marc Kupietz6ae76052021-09-21 10:34:00 +0200104
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100105expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
106 year = (2005:2011)) \%>\%
107 cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
108 paste0(.$condition," & pubDate in ", .$year))) \%>\%
109 ipm() \%>\%
110 ggplot(aes(year, ipm, fill = condition, color = condition)) +
111 geom_freq_by_year_ci()
112}
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100113}
114\seealso{
Marc Kupietz67edcb52021-09-20 21:54:24 +0200115\code{ci} is already included in \code{\link[=frequencyQuery]{frequencyQuery()}}
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100116}