| % Generated by roxygen2: do not edit by hand |
| % Please edit documentation in R/ci.R, R/misc.R |
| \name{ci} |
| \alias{ci} |
| \alias{misc-functions} |
| \alias{ipm} |
| \alias{percent} |
| \alias{queryStringToLabel} |
| \alias{geom_freq_by_year_ci} |
| \title{Add confidence interval and relative frequency variables} |
| \usage{ |
| ci(df, x = totalResults, N = total, conf.level = 0.95) |
| |
| ipm(df) |
| |
| percent(df) |
| |
| queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE) |
| |
| geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...) |
| } |
| \arguments{ |
| \item{df}{table returned from \code{\link[=frequencyQuery]{frequencyQuery()}}} |
| |
| \item{x}{column with the observed absolute frequency.} |
| |
| \item{N}{column with the total frequencies} |
| |
| \item{conf.level}{confidence level of the returned confidence interval. Must |
| be a single number between 0 and 1.} |
| |
| \item{data}{string or vector of query or vc definition strings} |
| |
| \item{pubDateOnly}{discard all but the publication date} |
| |
| \item{excludePubDate}{discard publication date constraints} |
| |
| \item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.} |
| |
| \item{...}{Other arguments passed to geom_ribbon, geom_line, and geom_click_point.} |
| } |
| \value{ |
| original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high} |
| |
| original table with converted columns \code{f}, \code{conf.low} and \code{conf.high} |
| |
| string or vector of strings with clipped off common prefixes and suffixes |
| } |
| \description{ |
| Using \code{\link[=prop.test]{prop.test()}}, \code{ci} adds three columns to a data frame: |
| \enumerate{ |
| \item relative frequency (\code{f}) |
| \item lower bound of a confidence interval (\code{ci.low}) |
| \item upper bound of a confidence interval |
| } |
| |
| Convenience function for converting frequency tables to instances per |
| million. |
| |
| Convenience function for converting frequency tables of alternative variants |
| (generated with \code{as.alternatives=TRUE}) to percent. |
| |
| Converts a vector of query or vc strings to typically appropriate legend labels |
| by clipping off prefixes and suffixes that are common to all query strings. |
| |
| Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2. |
| \strong{Warning:} This function may be moved to a new package. |
| } |
| \details{ |
| Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \verb{column ipm} |
| und multiplies conf.low and \code{conf.high} with 10^6. |
| } |
| \examples{ |
| \dontrun{ |
| |
| library(ggplot2) |
| kco <- new("KorAPConnection", verbose=TRUE) |
| expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\% |
| bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\% |
| mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\% |
| ci() \%>\% |
| ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) + |
| geom_point() + geom_line() + geom_ribbon(alpha=.3) |
| } |
| \dontrun{ |
| |
| new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm() |
| } |
| \dontrun{ |
| |
| new("KorAPConnection") \%>\% |
| frequencyQuery(c("Tollpatsch", "Tolpatsch"), |
| vc=paste0("pubDate in ", 2000:2002), |
| as.alternatives = TRUE) \%>\% |
| percent() |
| } |
| queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019))) |
| queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]")) |
| queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]")) |
| |
| library(ggplot2) |
| kco <- new("KorAPConnection", verbose=TRUE) |
| \dontrun{ |
| |
| expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"), |
| year = (2005:2011)) \%>\% |
| cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]", |
| paste0(.$condition," & pubDate in ", .$year))) \%>\% |
| ipm() \%>\% |
| ggplot(aes(year, ipm, fill = condition, color = condition)) + |
| geom_freq_by_year_ci() |
| } |
| } |
| \seealso{ |
| \code{ci} is already included in \code{\link[=frequencyQuery]{frequencyQuery()}} |
| } |