Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 1 | % Generated by roxygen2: do not edit by hand |
| 2 | % Please edit documentation in R/ci.R, R/misc.R |
| 3 | \name{ci} |
| 4 | \alias{ci} |
| 5 | \alias{misc-functions} |
| 6 | \alias{ipm} |
| 7 | \alias{percent} |
| 8 | \alias{queryStringToLabel} |
| 9 | \alias{geom_freq_by_year_ci} |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 10 | \title{Add confidence interval and relative frequency variables} |
| 11 | \usage{ |
| 12 | ci(df, x = totalResults, N = total, conf.level = 0.95) |
| 13 | |
| 14 | ipm(df) |
| 15 | |
| 16 | percent(df) |
| 17 | |
| 18 | queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE) |
| 19 | |
| 20 | geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...) |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 21 | } |
| 22 | \arguments{ |
| 23 | \item{df}{table returned from \code{\link{frequencyQuery}}} |
| 24 | |
| 25 | \item{x}{column with the observed absolute frequency.} |
| 26 | |
| 27 | \item{N}{column with the total frequencies} |
| 28 | |
| 29 | \item{conf.level}{confidence level of the returned confidence interval. Must |
| 30 | be a single number between 0 and 1.} |
| 31 | |
| 32 | \item{data}{string or vector of query or vc definition strings} |
| 33 | |
| 34 | \item{pubDateOnly}{discard all but the publication date} |
| 35 | |
| 36 | \item{excludePubDate}{discard publication date constraints} |
| 37 | |
| 38 | \item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.} |
| 39 | |
Marc Kupietz | 5fb892e | 2021-03-05 08:18:25 +0100 | [diff] [blame] | 40 | \item{...}{Other arguments passed to geom_ribbon, geom_line, and geom_click_point.} |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 41 | } |
| 42 | \value{ |
| 43 | original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high} |
| 44 | |
| 45 | original table with converted columns \code{f}, \code{conf.low} and \code{conf.high} |
| 46 | |
| 47 | string or vector of strings with clipped off common prefixes and suffixes |
| 48 | } |
| 49 | \description{ |
| 50 | Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame: |
| 51 | 1. relative frequency (\code{f}) |
| 52 | 2. lower bound of a confidence interval (\code{ci.low}) |
| 53 | 3. upper bound of a confidence interval |
| 54 | |
| 55 | Convenience function for converting frequency tables to instances per |
| 56 | million. |
| 57 | |
| 58 | Convenience function for converting frequency tables of alternative variants |
| 59 | (generated with \code{as.alternatives=TRUE}) to percent. |
| 60 | |
| 61 | Converts a vector of query or vc strings to typically appropriate legend labels |
| 62 | by clipping off prefixes and suffixes that are common to all query strings. |
| 63 | |
| 64 | Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2. |
| 65 | \bold{Warning:} This function may be moved to a new package. |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 66 | } |
| 67 | \details{ |
| 68 | Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \code{column ipm} |
| 69 | und multiplies conf.low and \code{conf.high} with 10^6. |
| 70 | } |
| 71 | \examples{ |
| 72 | \donttest{ |
| 73 | library(ggplot2) |
| 74 | kco <- new("KorAPConnection", verbose=TRUE) |
| 75 | expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\% |
| 76 | bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\% |
| 77 | mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\% |
| 78 | ci() \%>\% |
| 79 | ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) + |
| 80 | geom_point() + geom_line() + geom_ribbon(alpha=.3) |
| 81 | } |
| 82 | \donttest{ |
| 83 | new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm() |
| 84 | } |
| 85 | \donttest{ |
| 86 | new("KorAPConnection") \%>\% |
| 87 | frequencyQuery(c("Tollpatsch", "Tolpatsch"), |
| 88 | vc=paste0("pubDate in ", 2000:2002), |
| 89 | as.alternatives = TRUE) \%>\% |
| 90 | percent() |
| 91 | } |
| 92 | queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019))) |
| 93 | queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]")) |
| 94 | queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]")) |
| 95 | |
| 96 | library(ggplot2) |
| 97 | kco <- new("KorAPConnection", verbose=TRUE) |
| 98 | \donttest{ |
| 99 | expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"), |
| 100 | year = (2005:2011)) \%>\% |
| 101 | cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]", |
| 102 | paste0(.$condition," & pubDate in ", .$year))) \%>\% |
| 103 | ipm() \%>\% |
| 104 | ggplot(aes(year, ipm, fill = condition, color = condition)) + |
| 105 | geom_freq_by_year_ci() |
| 106 | } |
Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 107 | } |
| 108 | \seealso{ |
| 109 | \code{ci} is already included in \code{\link{frequencyQuery}} |
| 110 | } |