blob: e3c0630bbe890bb4273ec164bf05b8e8a1545c65 [file] [log] [blame]
Marc Kupietza6e4ee62021-03-05 09:00:15 +01001% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/ci.R, R/misc.R
3\name{ci}
4\alias{ci}
5\alias{misc-functions}
6\alias{ipm}
7\alias{percent}
8\alias{queryStringToLabel}
9\alias{geom_freq_by_year_ci}
Marc Kupietza6e4ee62021-03-05 09:00:15 +010010\title{Add confidence interval and relative frequency variables}
11\usage{
12ci(df, x = totalResults, N = total, conf.level = 0.95)
13
14ipm(df)
15
16percent(df)
17
18queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE)
19
20geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...)
Marc Kupietza6e4ee62021-03-05 09:00:15 +010021}
22\arguments{
23\item{df}{table returned from \code{\link{frequencyQuery}}}
24
25\item{x}{column with the observed absolute frequency.}
26
27\item{N}{column with the total frequencies}
28
29\item{conf.level}{confidence level of the returned confidence interval. Must
30be a single number between 0 and 1.}
31
32\item{data}{string or vector of query or vc definition strings}
33
34\item{pubDateOnly}{discard all but the publication date}
35
36\item{excludePubDate}{discard publication date constraints}
37
38\item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.}
39
Marc Kupietz5fb892e2021-03-05 08:18:25 +010040\item{...}{Other arguments passed to geom_ribbon, geom_line, and geom_click_point.}
Marc Kupietza6e4ee62021-03-05 09:00:15 +010041}
42\value{
43original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high}
44
45original table with converted columns \code{f}, \code{conf.low} and \code{conf.high}
46
47string or vector of strings with clipped off common prefixes and suffixes
48}
49\description{
50Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
511. relative frequency (\code{f})
522. lower bound of a confidence interval (\code{ci.low})
533. upper bound of a confidence interval
54
55Convenience function for converting frequency tables to instances per
56million.
57
58Convenience function for converting frequency tables of alternative variants
59(generated with \code{as.alternatives=TRUE}) to percent.
60
61Converts a vector of query or vc strings to typically appropriate legend labels
62by clipping off prefixes and suffixes that are common to all query strings.
63
64Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2.
65\bold{Warning:} This function may be moved to a new package.
Marc Kupietza6e4ee62021-03-05 09:00:15 +010066}
67\details{
68Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \code{column ipm}
69und multiplies conf.low and \code{conf.high} with 10^6.
70}
71\examples{
72\donttest{
73library(ggplot2)
74kco <- new("KorAPConnection", verbose=TRUE)
75expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\%
76 bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\%
77 mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\%
78 ci() \%>\%
79 ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
80 geom_point() + geom_line() + geom_ribbon(alpha=.3)
81}
82\donttest{
83new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm()
84}
85\donttest{
86new("KorAPConnection") \%>\%
87 frequencyQuery(c("Tollpatsch", "Tolpatsch"),
88 vc=paste0("pubDate in ", 2000:2002),
89 as.alternatives = TRUE) \%>\%
90 percent()
91}
92queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019)))
93queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"))
94queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"))
95
96library(ggplot2)
97kco <- new("KorAPConnection", verbose=TRUE)
98\donttest{
99expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
100 year = (2005:2011)) \%>\%
101 cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
102 paste0(.$condition," & pubDate in ", .$year))) \%>\%
103 ipm() \%>\%
104 ggplot(aes(year, ipm, fill = condition, color = condition)) +
105 geom_freq_by_year_ci()
106}
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100107}
108\seealso{
109\code{ci} is already included in \code{\link{frequencyQuery}}
110}