Marc Kupietz | bb7d232 | 2019-10-06 21:42:34 +0200 | [diff] [blame] | 1 | |
| 2 | #' Convert corpus frequency table to instances per million. |
| 3 | #' |
| 4 | #' Convenience function for converting frequency tables to instances per |
| 5 | #' million. |
| 6 | #' |
| 7 | #' Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \code{column ipm} |
| 8 | #' und multiplies conf.low and \code{conf.high} with 10^6. |
| 9 | #' |
| 10 | #' @param df table returned from \code{\link{frequencyQuery}} |
| 11 | #' |
| 12 | #' @return original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high} |
| 13 | #' @export |
| 14 | #' |
| 15 | #' @importFrom dplyr .data |
| 16 | #' |
| 17 | #' @examples |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 18 | #' \donttest{ |
Marc Kupietz | bb7d232 | 2019-10-06 21:42:34 +0200 | [diff] [blame] | 19 | #' new("KorAPConnection") %>% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) %>% ipm() |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 20 | #' } |
Marc Kupietz | bb7d232 | 2019-10-06 21:42:34 +0200 | [diff] [blame] | 21 | ipm <- function(df) { |
| 22 | df %>% |
| 23 | mutate(ipm = .data$f * 10^6, conf.low = .data$conf.low * 10^6, conf.high = .data$conf.high * 10^6) |
| 24 | } |
| 25 | |
Marc Kupietz | 23daf5b | 2019-11-27 10:28:07 +0100 | [diff] [blame] | 26 | #' Convert corpus frequency table of alternatives to percent |
| 27 | #' |
| 28 | #' Convenience function for converting frequency tables of alternative variants |
| 29 | #' (generated with \code{as.alternatives=T}) to percent. |
| 30 | #' |
| 31 | #' @param df table returned from \code{\link{frequencyQuery}} |
| 32 | #' |
| 33 | #' @return original table with converted columns \code{f}, \code{conf.low} and \code{conf.high} |
| 34 | #' @export |
| 35 | #' |
| 36 | #' @importFrom dplyr .data |
| 37 | #' |
| 38 | #' @examples |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 39 | #' \donttest{ |
Marc Kupietz | 23daf5b | 2019-11-27 10:28:07 +0100 | [diff] [blame] | 40 | #' new("KorAPConnection") %>% |
| 41 | #' frequencyQuery(c("Tollpatsch", "Tolpatsch"), |
| 42 | #' vc=paste0("pubDate in ", 2000:2002), |
| 43 | #' as.alternatives = TRUE) %>% |
| 44 | #' percent() |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 45 | #' } |
Marc Kupietz | 23daf5b | 2019-11-27 10:28:07 +0100 | [diff] [blame] | 46 | percent <- function(df) { |
| 47 | df %>% |
| 48 | mutate(f = .data$f * 10^2, conf.low = .data$conf.low * 10^2, conf.high = .data$conf.high * 10^2) |
| 49 | } |
| 50 | |
Marc Kupietz | 95240e9 | 2019-11-27 18:19:04 +0100 | [diff] [blame] | 51 | #' Convert query or vc strings to plot labels |
| 52 | #' |
| 53 | #' Converts a vector of query or vc strings to typically appropriate legend labels |
| 54 | #' by clipping off prefixes and suffixes that are common to all query strings. |
| 55 | #' |
| 56 | #' @param data string or vector of query or vc definition strings |
Marc Kupietz | 62d29a1 | 2020-01-18 12:38:36 +0100 | [diff] [blame] | 57 | #' @param pubDateOnly discard all but the publication date |
| 58 | #' @param excludePubDate discard publication date constraints |
Marc Kupietz | 95240e9 | 2019-11-27 18:19:04 +0100 | [diff] [blame] | 59 | #' @return string or vector of strings with clipped off common prefixes and suffixes |
| 60 | #' |
| 61 | #' @examples |
| 62 | #' queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019))) |
| 63 | #' queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]")) |
| 64 | #' queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]")) |
| 65 | #' |
| 66 | #' @importFrom PTXQC lcpCount |
| 67 | #' @importFrom PTXQC lcsCount |
| 68 | #' |
| 69 | #' @export |
Marc Kupietz | 62d29a1 | 2020-01-18 12:38:36 +0100 | [diff] [blame] | 70 | queryStringToLabel <- function(data, pubDateOnly = F, excludePubDate = F) { |
| 71 | if (pubDateOnly) { |
| 72 | data <-substring(data, regexpr("pubDate", data)+7) |
| 73 | } else if(excludePubDate) { |
| 74 | data <-substring(data, 1, regexpr("pubDate", data)) |
| 75 | } |
Marc Kupietz | 95240e9 | 2019-11-27 18:19:04 +0100 | [diff] [blame] | 76 | leftCommon = lcpCount(data) |
Marc Kupietz | 62d29a1 | 2020-01-18 12:38:36 +0100 | [diff] [blame] | 77 | while (leftCommon > 0 && grepl("[[:alnum:]/=.*!]", substring(data[1], leftCommon, leftCommon))) { |
Marc Kupietz | 95240e9 | 2019-11-27 18:19:04 +0100 | [diff] [blame] | 78 | leftCommon <- leftCommon - 1 |
| 79 | } |
| 80 | rightCommon = lcsCount(data) |
Marc Kupietz | 62d29a1 | 2020-01-18 12:38:36 +0100 | [diff] [blame] | 81 | while (rightCommon > 0 && grepl("[[:alnum:]/=.*!]", substring(data[1], 1+nchar(data[1]) - rightCommon, 1+nchar(data[1]) - rightCommon))) { |
Marc Kupietz | 95240e9 | 2019-11-27 18:19:04 +0100 | [diff] [blame] | 82 | rightCommon <- rightCommon - 1 |
| 83 | } |
| 84 | substring(data, leftCommon + 1, nchar(data) - rightCommon) |
| 85 | } |
| 86 | |
Marc Kupietz | bb7d232 | 2019-10-06 21:42:34 +0200 | [diff] [blame] | 87 | |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 88 | ## Mute notes: "Undefined global functions or variables:" |
| 89 | globalVariables(c("conf.high", "conf.low", "onRender", "webUIRequestUrl")) |
| 90 | |
| 91 | |
| 92 | #' Experimental: Plot frequency by year graphs with confidence intervals |
Marc Kupietz | d68f971 | 2019-10-06 21:48:00 +0200 | [diff] [blame] | 93 | #' |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 94 | #' Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2. |
| 95 | #' \bold{Warning:} This function may be moved to a new package. |
| 96 | #' |
| 97 | #' @param mapping Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping. |
| 98 | #' @param ... Other arguments passed to geom_ribbon, geom_line, and geom_click_point. |
Marc Kupietz | d68f971 | 2019-10-06 21:48:00 +0200 | [diff] [blame] | 99 | #' |
| 100 | #' @examples |
| 101 | #' library(ggplot2) |
| 102 | #' kco <- new("KorAPConnection", verbose=TRUE) |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 103 | #' \donttest{ |
Marc Kupietz | d68f971 | 2019-10-06 21:48:00 +0200 | [diff] [blame] | 104 | #' expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"), |
Marc Kupietz | 2fbac3d | 2020-01-18 11:01:21 +0100 | [diff] [blame] | 105 | #' year = (2005:2011)) %>% |
Marc Kupietz | d68f971 | 2019-10-06 21:48:00 +0200 | [diff] [blame] | 106 | #' cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]", |
| 107 | #' paste0(.$condition," & pubDate in ", .$year))) %>% |
| 108 | #' ipm() %>% |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 109 | #' ggplot(aes(year, ipm, fill = condition, color = condition)) + |
Marc Kupietz | d68f971 | 2019-10-06 21:48:00 +0200 | [diff] [blame] | 110 | #' geom_freq_by_year_ci() |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 111 | #' } |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 112 | #' @importFrom ggplot2 ggplot aes geom_ribbon geom_line geom_point theme element_text scale_x_continuous |
Marc Kupietz | d68f971 | 2019-10-06 21:48:00 +0200 | [diff] [blame] | 113 | #' |
| 114 | #' @export |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 115 | geom_freq_by_year_ci <- function(mapping = aes(ymin=conf.low, ymax=conf.high), ...) { |
Marc Kupietz | d68f971 | 2019-10-06 21:48:00 +0200 | [diff] [blame] | 116 | list( |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 117 | geom_ribbon(mapping, |
| 118 | alpha = .3, linetype = 0, show.legend = FALSE, ...), |
| 119 | geom_line(...), |
| 120 | geom_click_point(aes(url=webUIRequestUrl), ...), |
| 121 | theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.8, 0.2)), |
Marc Kupietz | d68f971 | 2019-10-06 21:48:00 +0200 | [diff] [blame] | 122 | scale_x_continuous(breaks = function(x) seq(ceiling(x[1]), floor(x[2]), by = 1 + floor(((x[2]-x[1])/30))))) |
| 123 | } |
| 124 | |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 125 | #' @importFrom ggplot2 ggproto aes GeomPoint |
| 126 | GeomClickPoint <- ggproto( |
| 127 | "GeomPoint", |
| 128 | GeomPoint, |
| 129 | required_aes = c("x", "y"), |
| 130 | default_aes = aes( |
| 131 | shape = 19, colour = "black", size = 1.5, fill = NA, |
| 132 | alpha = NA, stroke = 0.5, url = NA |
| 133 | ), |
| 134 | extra_params = c("na.rm", "url"), |
| 135 | draw_panel = function(data, panel_params, |
| 136 | coord, na.rm = FALSE, showpoints = TRUE, url = NULL) { |
| 137 | GeomPoint$draw_panel(data, panel_params, coord, na.rm = na.rm) |
| 138 | } |
| 139 | ) |
| 140 | |
| 141 | #' @importFrom ggplot2 layer |
| 142 | geom_click_point <- function(mapping = NULL, data = NULL, stat = "identity", |
| 143 | position = "identity", na.rm = FALSE, show.legend = NA, |
| 144 | inherit.aes = TRUE, url = NA, ...) { |
| 145 | layer( |
| 146 | geom = GeomClickPoint, mapping = mapping, data = data, stat = stat, |
| 147 | position = position, show.legend = show.legend, inherit.aes = inherit.aes, |
| 148 | params = list(na.rm = na.rm, ...) |
| 149 | ) |
| 150 | } |
| 151 | |
| 152 | |
| 153 | #' @importFrom htmlwidgets onRender |
| 154 | tooltip2hyperlink <- function(p, attribute="webUIRequestUrl") { |
| 155 | pattern <- paste0(attribute, ": ([^<]+)") |
| 156 | for(i in grep(attribute, p$x$data)) { |
| 157 | x <- p[["x"]][["data"]][[i]][["text"]] |
| 158 | m <- regexpr(pattern, x) |
| 159 | matches <- sub(paste0(attribute, ": "), "", regmatches(x, m)) |
| 160 | p$x$data[[i]]$customdata <- matches |
| 161 | p[["x"]][["data"]][[i]][["text"]] <- sub(paste0(attribute, ":[^<]*<br ?/?>"), "", p[["x"]][["data"]][[i]][["text"]] ) |
| 162 | } |
| 163 | onRender(p, "function(el, x) { el.on('plotly_click', function(d) { var url=d.points[0].customdata; if(url) { window.open(url, 'korap') } })}") |
| 164 | } |
| 165 | |
| 166 | #' Experimental: Convert ggplot2 to plotly with hyperlinks to KorAP queries |
| 167 | #' |
| 168 | #' \code{RKorAPClient::ggplotly} converts a \code{ggplot2::ggplot()} object to a plotly |
| 169 | #' object with hyperlinks from data points to corresponding KorAP queries. |
| 170 | #' \bold{Warning:} This function may be moved to a new package. |
| 171 | #' |
| 172 | #' @param p a ggplot object. |
| 173 | #' @param tooltip a character vector specifying which aesthetic mappings to show |
| 174 | #' in the tooltip. If you want hyperlinks to KorAP queries you need to include |
| 175 | #' \code{"url"} here. |
| 176 | #' @param ... Other arguments passed to \code{plotly::ggplotly} |
| 177 | #' |
| 178 | #' @examples |
| 179 | #' library(ggplot2) |
| 180 | #' kco <- new("KorAPConnection", verbose=TRUE) |
Marc Kupietz | f9ebbff | 2020-02-19 08:28:22 +0100 | [diff] [blame] | 181 | #' \donttest{year = (2003:2011)}\dontshow{year = c(2005)} |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 182 | #' g <- expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"), |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 183 | #' year) %>% |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 184 | #' cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]", |
| 185 | #' paste0(.$condition," & pubDate in ", .$year))) %>% |
| 186 | #' ipm() %>% |
| 187 | #' ggplot(aes(year, ipm, fill = condition, color = condition)) + |
Marc Kupietz | 05b2277 | 2020-02-18 21:58:42 +0100 | [diff] [blame] | 188 | #' ## theme_light(base_size = 20) + |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 189 | #' geom_freq_by_year_ci() |
| 190 | #' p <- ggplotly(g) |
| 191 | #' print(p) |
| 192 | #' ## saveWidget(p, paste0(tmpdir(), "heuschrecke.html") |
| 193 | #' |
| 194 | #' |
| 195 | #' @importFrom plotly ggplotly |
| 196 | #' @importFrom htmlwidgets saveWidget |
| 197 | #' @export |
| 198 | ggplotly <- function(p = ggplot2::last_plot(), tooltip = c("x", "y", "colour", "url"), ...) { |
| 199 | pp <- plotly::ggplotly(p = p, tooltip = tooltip, ...) |
| 200 | tooltip2hyperlink(pp) |
| 201 | } |