blob: b0d9c0d8574e38aff49a806c2147b6a14bbe3512 [file] [log] [blame]
Marc Kupietza6e4ee62021-03-05 09:00:15 +01001% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/ci.R, R/misc.R
3\name{ci}
4\alias{ci}
5\alias{misc-functions}
6\alias{ipm}
7\alias{percent}
8\alias{queryStringToLabel}
9\alias{geom_freq_by_year_ci}
10\alias{ggplotly}
11\title{Add confidence interval and relative frequency variables}
12\usage{
13ci(df, x = totalResults, N = total, conf.level = 0.95)
14
15ipm(df)
16
17percent(df)
18
19queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE)
20
21geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...)
22
23ggplotly(p = ggplot2::last_plot(), tooltip = c("x", "y", "colour", "url"), ...)
24}
25\arguments{
26\item{df}{table returned from \code{\link{frequencyQuery}}}
27
28\item{x}{column with the observed absolute frequency.}
29
30\item{N}{column with the total frequencies}
31
32\item{conf.level}{confidence level of the returned confidence interval. Must
33be a single number between 0 and 1.}
34
35\item{data}{string or vector of query or vc definition strings}
36
37\item{pubDateOnly}{discard all but the publication date}
38
39\item{excludePubDate}{discard publication date constraints}
40
41\item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.}
42
43\item{...}{Other arguments passed to \code{plotly::ggplotly}}
44
45\item{p}{a ggplot object.}
46
47\item{tooltip}{a character vector specifying which aesthetic mappings to show
48in the tooltip. If you want hyperlinks to KorAP queries you need to include
49\code{"url"} here.}
50}
51\value{
52original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high}
53
54original table with converted columns \code{f}, \code{conf.low} and \code{conf.high}
55
56string or vector of strings with clipped off common prefixes and suffixes
57}
58\description{
59Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
601. relative frequency (\code{f})
612. lower bound of a confidence interval (\code{ci.low})
623. upper bound of a confidence interval
63
64Convenience function for converting frequency tables to instances per
65million.
66
67Convenience function for converting frequency tables of alternative variants
68(generated with \code{as.alternatives=TRUE}) to percent.
69
70Converts a vector of query or vc strings to typically appropriate legend labels
71by clipping off prefixes and suffixes that are common to all query strings.
72
73Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2.
74\bold{Warning:} This function may be moved to a new package.
75
76\code{RKorAPClient::ggplotly} converts a \code{ggplot2::ggplot()} object to a plotly
77object with hyperlinks from data points to corresponding KorAP queries.
78\bold{Warning:} This function may be moved to a new package.
79}
80\details{
81Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \code{column ipm}
82und multiplies conf.low and \code{conf.high} with 10^6.
83}
84\examples{
85\donttest{
86library(ggplot2)
87kco <- new("KorAPConnection", verbose=TRUE)
88expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\%
89 bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\%
90 mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\%
91 ci() \%>\%
92 ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
93 geom_point() + geom_line() + geom_ribbon(alpha=.3)
94}
95\donttest{
96new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm()
97}
98\donttest{
99new("KorAPConnection") \%>\%
100 frequencyQuery(c("Tollpatsch", "Tolpatsch"),
101 vc=paste0("pubDate in ", 2000:2002),
102 as.alternatives = TRUE) \%>\%
103 percent()
104}
105queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019)))
106queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"))
107queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"))
108
109library(ggplot2)
110kco <- new("KorAPConnection", verbose=TRUE)
111\donttest{
112expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
113 year = (2005:2011)) \%>\%
114 cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
115 paste0(.$condition," & pubDate in ", .$year))) \%>\%
116 ipm() \%>\%
117 ggplot(aes(year, ipm, fill = condition, color = condition)) +
118 geom_freq_by_year_ci()
119}
120library(ggplot2)
121kco <- new("KorAPConnection", verbose=TRUE)
122\donttest{year <- (2003:2011)}\dontshow{year <- c(2005)}
123\donttest{condition <- c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/")}\dontshow{condition <- c("textDomain = /Wirtschaft.*/")}
124g <- expand_grid(condition, year) \%>\%
125 cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
126 paste0(.$condition," & pubDate in ", .$year))) \%>\%
127 ipm() \%>\%
128 ggplot(aes(year, ipm, fill = condition, color = condition)) +
129 ## theme_light(base_size = 20) +
130 geom_freq_by_year_ci()
131p <- ggplotly(g)
132print(p)
133## saveWidget(p, paste0(tmpdir(), "heuschrecke.html")
134
135
136}
137\seealso{
138\code{ci} is already included in \code{\link{frequencyQuery}}
139}