Marc Kupietz | a6e4ee6 | 2021-03-05 09:00:15 +0100 | [diff] [blame] | 1 | % Generated by roxygen2: do not edit by hand |
| 2 | % Please edit documentation in R/ci.R, R/misc.R |
| 3 | \name{ci} |
| 4 | \alias{ci} |
| 5 | \alias{misc-functions} |
| 6 | \alias{ipm} |
| 7 | \alias{percent} |
| 8 | \alias{queryStringToLabel} |
| 9 | \alias{geom_freq_by_year_ci} |
| 10 | \alias{ggplotly} |
| 11 | \title{Add confidence interval and relative frequency variables} |
| 12 | \usage{ |
| 13 | ci(df, x = totalResults, N = total, conf.level = 0.95) |
| 14 | |
| 15 | ipm(df) |
| 16 | |
| 17 | percent(df) |
| 18 | |
| 19 | queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE) |
| 20 | |
| 21 | geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...) |
| 22 | |
| 23 | ggplotly(p = ggplot2::last_plot(), tooltip = c("x", "y", "colour", "url"), ...) |
| 24 | } |
| 25 | \arguments{ |
| 26 | \item{df}{table returned from \code{\link{frequencyQuery}}} |
| 27 | |
| 28 | \item{x}{column with the observed absolute frequency.} |
| 29 | |
| 30 | \item{N}{column with the total frequencies} |
| 31 | |
| 32 | \item{conf.level}{confidence level of the returned confidence interval. Must |
| 33 | be a single number between 0 and 1.} |
| 34 | |
| 35 | \item{data}{string or vector of query or vc definition strings} |
| 36 | |
| 37 | \item{pubDateOnly}{discard all but the publication date} |
| 38 | |
| 39 | \item{excludePubDate}{discard publication date constraints} |
| 40 | |
| 41 | \item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.} |
| 42 | |
| 43 | \item{...}{Other arguments passed to \code{plotly::ggplotly}} |
| 44 | |
| 45 | \item{p}{a ggplot object.} |
| 46 | |
| 47 | \item{tooltip}{a character vector specifying which aesthetic mappings to show |
| 48 | in the tooltip. If you want hyperlinks to KorAP queries you need to include |
| 49 | \code{"url"} here.} |
| 50 | } |
| 51 | \value{ |
| 52 | original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high} |
| 53 | |
| 54 | original table with converted columns \code{f}, \code{conf.low} and \code{conf.high} |
| 55 | |
| 56 | string or vector of strings with clipped off common prefixes and suffixes |
| 57 | } |
| 58 | \description{ |
| 59 | Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame: |
| 60 | 1. relative frequency (\code{f}) |
| 61 | 2. lower bound of a confidence interval (\code{ci.low}) |
| 62 | 3. upper bound of a confidence interval |
| 63 | |
| 64 | Convenience function for converting frequency tables to instances per |
| 65 | million. |
| 66 | |
| 67 | Convenience function for converting frequency tables of alternative variants |
| 68 | (generated with \code{as.alternatives=TRUE}) to percent. |
| 69 | |
| 70 | Converts a vector of query or vc strings to typically appropriate legend labels |
| 71 | by clipping off prefixes and suffixes that are common to all query strings. |
| 72 | |
| 73 | Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2. |
| 74 | \bold{Warning:} This function may be moved to a new package. |
| 75 | |
| 76 | \code{RKorAPClient::ggplotly} converts a \code{ggplot2::ggplot()} object to a plotly |
| 77 | object with hyperlinks from data points to corresponding KorAP queries. |
| 78 | \bold{Warning:} This function may be moved to a new package. |
| 79 | } |
| 80 | \details{ |
| 81 | Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \code{column ipm} |
| 82 | und multiplies conf.low and \code{conf.high} with 10^6. |
| 83 | } |
| 84 | \examples{ |
| 85 | \donttest{ |
| 86 | library(ggplot2) |
| 87 | kco <- new("KorAPConnection", verbose=TRUE) |
| 88 | expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\% |
| 89 | bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\% |
| 90 | mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\% |
| 91 | ci() \%>\% |
| 92 | ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) + |
| 93 | geom_point() + geom_line() + geom_ribbon(alpha=.3) |
| 94 | } |
| 95 | \donttest{ |
| 96 | new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm() |
| 97 | } |
| 98 | \donttest{ |
| 99 | new("KorAPConnection") \%>\% |
| 100 | frequencyQuery(c("Tollpatsch", "Tolpatsch"), |
| 101 | vc=paste0("pubDate in ", 2000:2002), |
| 102 | as.alternatives = TRUE) \%>\% |
| 103 | percent() |
| 104 | } |
| 105 | queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019))) |
| 106 | queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]")) |
| 107 | queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]")) |
| 108 | |
| 109 | library(ggplot2) |
| 110 | kco <- new("KorAPConnection", verbose=TRUE) |
| 111 | \donttest{ |
| 112 | expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"), |
| 113 | year = (2005:2011)) \%>\% |
| 114 | cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]", |
| 115 | paste0(.$condition," & pubDate in ", .$year))) \%>\% |
| 116 | ipm() \%>\% |
| 117 | ggplot(aes(year, ipm, fill = condition, color = condition)) + |
| 118 | geom_freq_by_year_ci() |
| 119 | } |
| 120 | library(ggplot2) |
| 121 | kco <- new("KorAPConnection", verbose=TRUE) |
| 122 | \donttest{year <- (2003:2011)}\dontshow{year <- c(2005)} |
| 123 | \donttest{condition <- c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/")}\dontshow{condition <- c("textDomain = /Wirtschaft.*/")} |
| 124 | g <- expand_grid(condition, year) \%>\% |
| 125 | cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]", |
| 126 | paste0(.$condition," & pubDate in ", .$year))) \%>\% |
| 127 | ipm() \%>\% |
| 128 | ggplot(aes(year, ipm, fill = condition, color = condition)) + |
| 129 | ## theme_light(base_size = 20) + |
| 130 | geom_freq_by_year_ci() |
| 131 | p <- ggplotly(g) |
| 132 | print(p) |
| 133 | ## saveWidget(p, paste0(tmpdir(), "heuschrecke.html") |
| 134 | |
| 135 | |
| 136 | } |
| 137 | \seealso{ |
| 138 | \code{ci} is already included in \code{\link{frequencyQuery}} |
| 139 | } |