blob: 4529d7a5552ff23d457679e9deff02a0578aa961 [file] [log] [blame]
Marc Kupietza6e4ee62021-03-05 09:00:15 +01001#' Misc functions
2#'
3#' @name misc-functions
4NULL
5#' NULL
Marc Kupietzbb7d2322019-10-06 21:42:34 +02006
7#' Convert corpus frequency table to instances per million.
8#'
9#' Convenience function for converting frequency tables to instances per
10#' million.
11#'
12#' Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \code{column ipm}
13#' und multiplies conf.low and \code{conf.high} with 10^6.
14#'
15#' @param df table returned from \code{\link{frequencyQuery}}
16#'
17#' @return original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high}
18#' @export
19#'
Marc Kupietza6e4ee62021-03-05 09:00:15 +010020#' @rdname misc-functions
Marc Kupietzbb7d2322019-10-06 21:42:34 +020021#' @importFrom dplyr .data
22#'
23#' @examples
Marc Kupietz05b22772020-02-18 21:58:42 +010024#' \donttest{
Marc Kupietzbb7d2322019-10-06 21:42:34 +020025#' new("KorAPConnection") %>% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) %>% ipm()
Marc Kupietz05b22772020-02-18 21:58:42 +010026#' }
Marc Kupietzbb7d2322019-10-06 21:42:34 +020027ipm <- function(df) {
28 df %>%
29 mutate(ipm = .data$f * 10^6, conf.low = .data$conf.low * 10^6, conf.high = .data$conf.high * 10^6)
30}
31
Marc Kupietz23daf5b2019-11-27 10:28:07 +010032#' Convert corpus frequency table of alternatives to percent
33#'
34#' Convenience function for converting frequency tables of alternative variants
Marc Kupietzcf1771d2020-03-04 16:03:04 +010035#' (generated with \code{as.alternatives=TRUE}) to percent.
Marc Kupietz23daf5b2019-11-27 10:28:07 +010036#'
37#' @param df table returned from \code{\link{frequencyQuery}}
38#'
39#' @return original table with converted columns \code{f}, \code{conf.low} and \code{conf.high}
40#' @export
41#'
42#' @importFrom dplyr .data
43#'
Marc Kupietza6e4ee62021-03-05 09:00:15 +010044#' @rdname misc-functions
Marc Kupietz23daf5b2019-11-27 10:28:07 +010045#' @examples
Marc Kupietz05b22772020-02-18 21:58:42 +010046#' \donttest{
Marc Kupietz23daf5b2019-11-27 10:28:07 +010047#' new("KorAPConnection") %>%
48#' frequencyQuery(c("Tollpatsch", "Tolpatsch"),
49#' vc=paste0("pubDate in ", 2000:2002),
50#' as.alternatives = TRUE) %>%
51#' percent()
Marc Kupietz05b22772020-02-18 21:58:42 +010052#' }
Marc Kupietz23daf5b2019-11-27 10:28:07 +010053percent <- function(df) {
54 df %>%
55 mutate(f = .data$f * 10^2, conf.low = .data$conf.low * 10^2, conf.high = .data$conf.high * 10^2)
56}
57
Marc Kupietz95240e92019-11-27 18:19:04 +010058#' Convert query or vc strings to plot labels
59#'
60#' Converts a vector of query or vc strings to typically appropriate legend labels
61#' by clipping off prefixes and suffixes that are common to all query strings.
62#'
63#' @param data string or vector of query or vc definition strings
Marc Kupietz62d29a12020-01-18 12:38:36 +010064#' @param pubDateOnly discard all but the publication date
65#' @param excludePubDate discard publication date constraints
Marc Kupietz95240e92019-11-27 18:19:04 +010066#' @return string or vector of strings with clipped off common prefixes and suffixes
67#'
Marc Kupietza6e4ee62021-03-05 09:00:15 +010068#' @rdname misc-functions
69#'
Marc Kupietz95240e92019-11-27 18:19:04 +010070#' @examples
71#' queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019)))
72#' queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"))
73#' queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"))
74#'
75#' @importFrom PTXQC lcpCount
76#' @importFrom PTXQC lcsCount
77#'
78#' @export
Marc Kupietzcf1771d2020-03-04 16:03:04 +010079queryStringToLabel <- function(data, pubDateOnly = FALSE, excludePubDate = FALSE) {
Marc Kupietz62d29a12020-01-18 12:38:36 +010080 if (pubDateOnly) {
81 data <-substring(data, regexpr("pubDate", data)+7)
82 } else if(excludePubDate) {
83 data <-substring(data, 1, regexpr("pubDate", data))
84 }
Marc Kupietz95240e92019-11-27 18:19:04 +010085 leftCommon = lcpCount(data)
Marc Kupietz62d29a12020-01-18 12:38:36 +010086 while (leftCommon > 0 && grepl("[[:alnum:]/=.*!]", substring(data[1], leftCommon, leftCommon))) {
Marc Kupietz95240e92019-11-27 18:19:04 +010087 leftCommon <- leftCommon - 1
88 }
89 rightCommon = lcsCount(data)
Marc Kupietz62d29a12020-01-18 12:38:36 +010090 while (rightCommon > 0 && grepl("[[:alnum:]/=.*!]", substring(data[1], 1+nchar(data[1]) - rightCommon, 1+nchar(data[1]) - rightCommon))) {
Marc Kupietz95240e92019-11-27 18:19:04 +010091 rightCommon <- rightCommon - 1
92 }
93 substring(data, leftCommon + 1, nchar(data) - rightCommon)
94}
95
Marc Kupietzbb7d2322019-10-06 21:42:34 +020096
Marc Kupietz865760f2019-10-07 19:29:44 +020097## Mute notes: "Undefined global functions or variables:"
98globalVariables(c("conf.high", "conf.low", "onRender", "webUIRequestUrl"))
99
100
101#' Experimental: Plot frequency by year graphs with confidence intervals
Marc Kupietzd68f9712019-10-06 21:48:00 +0200102#'
Marc Kupietz865760f2019-10-07 19:29:44 +0200103#' Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2.
104#' \bold{Warning:} This function may be moved to a new package.
105#'
106#' @param mapping Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.
107#' @param ... Other arguments passed to geom_ribbon, geom_line, and geom_click_point.
Marc Kupietzd68f9712019-10-06 21:48:00 +0200108#'
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100109#' @rdname misc-functions
110#'
Marc Kupietzd68f9712019-10-06 21:48:00 +0200111#' @examples
112#' library(ggplot2)
113#' kco <- new("KorAPConnection", verbose=TRUE)
Marc Kupietz05b22772020-02-18 21:58:42 +0100114#' \donttest{
Marc Kupietzd68f9712019-10-06 21:48:00 +0200115#' expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
Marc Kupietz2fbac3d2020-01-18 11:01:21 +0100116#' year = (2005:2011)) %>%
Marc Kupietzd68f9712019-10-06 21:48:00 +0200117#' cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
118#' paste0(.$condition," & pubDate in ", .$year))) %>%
119#' ipm() %>%
Marc Kupietz865760f2019-10-07 19:29:44 +0200120#' ggplot(aes(year, ipm, fill = condition, color = condition)) +
Marc Kupietzd68f9712019-10-06 21:48:00 +0200121#' geom_freq_by_year_ci()
Marc Kupietz05b22772020-02-18 21:58:42 +0100122#' }
Marc Kupietz865760f2019-10-07 19:29:44 +0200123#' @importFrom ggplot2 ggplot aes geom_ribbon geom_line geom_point theme element_text scale_x_continuous
Marc Kupietzd68f9712019-10-06 21:48:00 +0200124#'
125#' @export
Marc Kupietz865760f2019-10-07 19:29:44 +0200126geom_freq_by_year_ci <- function(mapping = aes(ymin=conf.low, ymax=conf.high), ...) {
Marc Kupietzd68f9712019-10-06 21:48:00 +0200127 list(
Marc Kupietz865760f2019-10-07 19:29:44 +0200128 geom_ribbon(mapping,
129 alpha = .3, linetype = 0, show.legend = FALSE, ...),
130 geom_line(...),
131 geom_click_point(aes(url=webUIRequestUrl), ...),
132 theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.8, 0.2)),
Marc Kupietzd68f9712019-10-06 21:48:00 +0200133 scale_x_continuous(breaks = function(x) seq(ceiling(x[1]), floor(x[2]), by = 1 + floor(((x[2]-x[1])/30)))))
134}
135
Marc Kupietz865760f2019-10-07 19:29:44 +0200136#' @importFrom ggplot2 ggproto aes GeomPoint
137GeomClickPoint <- ggproto(
138 "GeomPoint",
139 GeomPoint,
140 required_aes = c("x", "y"),
141 default_aes = aes(
142 shape = 19, colour = "black", size = 1.5, fill = NA,
143 alpha = NA, stroke = 0.5, url = NA
144 ),
145 extra_params = c("na.rm", "url"),
146 draw_panel = function(data, panel_params,
147 coord, na.rm = FALSE, showpoints = TRUE, url = NULL) {
148 GeomPoint$draw_panel(data, panel_params, coord, na.rm = na.rm)
149 }
150)
151
152#' @importFrom ggplot2 layer
153geom_click_point <- function(mapping = NULL, data = NULL, stat = "identity",
154 position = "identity", na.rm = FALSE, show.legend = NA,
155 inherit.aes = TRUE, url = NA, ...) {
156 layer(
157 geom = GeomClickPoint, mapping = mapping, data = data, stat = stat,
158 position = position, show.legend = show.legend, inherit.aes = inherit.aes,
159 params = list(na.rm = na.rm, ...)
160 )
161}
162
163
164#' @importFrom htmlwidgets onRender
165tooltip2hyperlink <- function(p, attribute="webUIRequestUrl") {
166 pattern <- paste0(attribute, ": ([^<]+)")
167 for(i in grep(attribute, p$x$data)) {
168 x <- p[["x"]][["data"]][[i]][["text"]]
169 m <- regexpr(pattern, x)
170 matches <- sub(paste0(attribute, ": "), "", regmatches(x, m))
171 p$x$data[[i]]$customdata <- matches
172 p[["x"]][["data"]][[i]][["text"]] <- sub(paste0(attribute, ":[^<]*<br ?/?>"), "", p[["x"]][["data"]][[i]][["text"]] )
173 }
174 onRender(p, "function(el, x) { el.on('plotly_click', function(d) { var url=d.points[0].customdata; if(url) { window.open(url, 'korap') } })}")
175}
176
177#' Experimental: Convert ggplot2 to plotly with hyperlinks to KorAP queries
178#'
179#' \code{RKorAPClient::ggplotly} converts a \code{ggplot2::ggplot()} object to a plotly
180#' object with hyperlinks from data points to corresponding KorAP queries.
181#' \bold{Warning:} This function may be moved to a new package.
182#'
183#' @param p a ggplot object.
184#' @param tooltip a character vector specifying which aesthetic mappings to show
185#' in the tooltip. If you want hyperlinks to KorAP queries you need to include
186#' \code{"url"} here.
187#' @param ... Other arguments passed to \code{plotly::ggplotly}
188#'
Marc Kupietza6e4ee62021-03-05 09:00:15 +0100189#' @rdname misc-functions
190#'
Marc Kupietz865760f2019-10-07 19:29:44 +0200191#' @examples
192#' library(ggplot2)
193#' kco <- new("KorAPConnection", verbose=TRUE)
Marc Kupietz657d8e72020-02-25 18:31:50 +0100194#' \donttest{year <- (2003:2011)}\dontshow{year <- c(2005)}
195#' \donttest{condition <- c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/")}\dontshow{condition <- c("textDomain = /Wirtschaft.*/")}
196#' g <- expand_grid(condition, year) %>%
Marc Kupietz865760f2019-10-07 19:29:44 +0200197#' cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
198#' paste0(.$condition," & pubDate in ", .$year))) %>%
199#' ipm() %>%
200#' ggplot(aes(year, ipm, fill = condition, color = condition)) +
Marc Kupietz05b22772020-02-18 21:58:42 +0100201#' ## theme_light(base_size = 20) +
Marc Kupietz865760f2019-10-07 19:29:44 +0200202#' geom_freq_by_year_ci()
203#' p <- ggplotly(g)
204#' print(p)
205#' ## saveWidget(p, paste0(tmpdir(), "heuschrecke.html")
206#'
207#'
208#' @importFrom plotly ggplotly
209#' @importFrom htmlwidgets saveWidget
210#' @export
211ggplotly <- function(p = ggplot2::last_plot(), tooltip = c("x", "y", "colour", "url"), ...) {
212 pp <- plotly::ggplotly(p = p, tooltip = tooltip, ...)
213 tooltip2hyperlink(pp)
214}