blob: 4c40a927f6bc588c31e1bb29290392a539659207 [file] [log] [blame]
Marc Kupietz91145b02020-01-29 15:58:36 +01001#' Experimental: Plot interactive frequency by year graphs with confidence intervals using highcharter
2#'
3#' Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using highcharter.
4#' \bold{Warning:} This function may be moved to a new package.
5#'
6#' @import highcharter
7#' @export
8#'
9#' @param df data frame like the value of a \code{\link{frequencyQuery}}
Marc Kupietz43a6ade2020-02-18 17:01:44 +010010#' @param as.alternatives boolean decides whether queries should be treated as mutually exclusive and exhaustive wrt. to some meaningful class (e.g. spelling variants of a certain word form).
Marc Kupietz91145b02020-01-29 15:58:36 +010011#' @param ylabel defaults to \% if \code{as.alternatives} is \code{true} and to "ipm" otherwise.
Marc Kupietz5b503f42020-04-09 15:26:00 +020012#' @param ... additional arguments passed to \code{\link{hc_add_series}}
Marc Kupietz91145b02020-01-29 15:58:36 +010013#'
14#' @examples
Marc Kupietz657d8e72020-02-25 18:31:50 +010015#' \donttest{year <- c(1990:2018)}\dontshow{year <- c(2013:2013)}
16#' \donttest{alternatives <- c("macht []{0,3} Sinn", "ergibt []{0,3} Sinn")}\dontshow{alternatives <- c("macht []{0,3} Sinn")}
Marc Kupietz91145b02020-01-29 15:58:36 +010017#' new("KorAPConnection", verbose = TRUE) %>%
Marc Kupietz657d8e72020-02-25 18:31:50 +010018#' frequencyQuery(query = alternatives,
Marc Kupietz05b22772020-02-18 21:58:42 +010019#' vc = paste("textType = /Zeit.*/ & pubDate in", year),
Marc Kupietz91145b02020-01-29 15:58:36 +010020#' as.alternatives = TRUE) %>%
21#' hc_freq_by_year_ci(as.alternatives = TRUE)
22#'
Marc Kupietz05b22772020-02-18 21:58:42 +010023#' \donttest{
Marc Kupietz10f65c42020-01-31 15:18:24 +010024#' kco <- new("KorAPConnection", verbose = TRUE)
25#' expand_grid(
26#' condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
27#' year = (2005:2011)
28#' ) %>%
29#' cbind(frequencyQuery(
30#' kco,
31#' "[tt/l=Heuschrecke]",
32#' paste0(.$condition, " & pubDate in ", .$year)
33#' )) %>%
34#' hc_freq_by_year_ci()
Marc Kupietz05b22772020-02-18 21:58:42 +010035#' }
Marc Kupietz10f65c42020-01-31 15:18:24 +010036#'
Marc Kupietz5b503f42020-04-09 15:26:00 +020037hc_freq_by_year_ci <- function(df, as.alternatives = FALSE, ylabel = if(as.alternatives) "%" else "ipm", ...) {
Marc Kupietz91145b02020-01-29 15:58:36 +010038 title <- ""
39 df <- df %>%
40 { if(! as.alternatives) ipm(.) else RKorAPClient::percent(.) }
41
42 if (!"year" %in% colnames(df)) {
Marc Kupietzcf1771d2020-03-04 16:03:04 +010043 df <- df %>% mutate(year = as.integer(queryStringToLabel(df$vc, pubDateOnly = TRUE)))
Marc Kupietz91145b02020-01-29 15:58:36 +010044 }
45 if (!"condition" %in% colnames(df)) {
46 if (length(base::unique(df$query)) > 1) {
47 df <- df %>% mutate(condition = query)
Marc Kupietzcf1771d2020-03-04 16:03:04 +010048 if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = TRUE ))) > 1) {
Marc Kupietz91145b02020-01-29 15:58:36 +010049 df <- df %>% mutate(condition = paste(condition, " & ",
Marc Kupietzcf1771d2020-03-04 16:03:04 +010050 queryStringToLabel(vc, excludePubDate = TRUE )))
Marc Kupietz91145b02020-01-29 15:58:36 +010051 }
52 } else {
Marc Kupietz5d70ffe2020-03-12 11:16:43 +010053 if (length(base::unique(queryStringToLabel(df$vc, excludePubDate = TRUE ))) > 1) {
54 title <- base::unique(df$query)
Marc Kupietzcf1771d2020-03-04 16:03:04 +010055 df <- df %>% mutate(condition = queryStringToLabel(vc, excludePubDate = TRUE ))
Marc Kupietz5d70ffe2020-03-12 11:16:43 +010056 } else {
57 df <- df %>% mutate(condition = query)
Marc Kupietz91145b02020-01-29 15:58:36 +010058 }
59 }
60 }
61 # use the D3 palette which provides 20 attractive and distinguishable colours
62 palette <- c("#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F", "#BCBD22", "#17BECF", "#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896", "#C5B0D5", "#C49C94", "#F7B6D2", "#C7C7C7", "#DBDB8D", "#9EDAE5")
63 highcharter::highchart() %>%
64 hc_title(text=title) %>%
65 hc_chart(zoomType="xy") %>%
66 hc_yAxis(
67 title = list(text = if (as.alternatives) "" else ylabel),
68 ceiling = if (as.alternatives) 100 else NULL,
69 floor = 0,
70 labels = if(as.alternatives) list(format = paste0("{value}\U2009", ylabel)) else NULL
71 ) %>%
Marc Kupietzcf1771d2020-03-04 16:03:04 +010072 hc_xAxis(allowDecimals=FALSE) %>%
Marc Kupietz91145b02020-01-29 15:58:36 +010073 hc_add_theme(hc_theme_google(colors=palette)) %>%
74 hc_plotOptions(
Marc Kupietzcf1771d2020-03-04 16:03:04 +010075 series = list(enabled = TRUE),
Marc Kupietz91145b02020-01-29 15:58:36 +010076 line = list(cursor = 'pointer', point = list(events = list(
77 click = JS("function() { window.open(this.click, 'korap'); }")
78 )))) %>%
Marc Kupietzcf1771d2020-03-04 16:03:04 +010079 hc_credits(enabled = TRUE,
Marc Kupietz900d0522020-03-19 13:36:45 +010080 text = "KorAP R Client Package",
81 href = "https://github.com/KorAP/RKorAPClient/") %>%
Marc Kupietzcf1771d2020-03-04 16:03:04 +010082 hc_exporting(enabled = TRUE) %>%
Marc Kupietz91145b02020-01-29 15:58:36 +010083 hc_tooltip(
84 formatter = JS(paste0("function (tooltip) {
85 var str = tooltip.defaultFormatter.call(this, tooltip);
86 if(Array.isArray(str)) {
87 str = str.join('');
88 }
89 for (var i = 0; i < this.points.length; i++) {
90 str = str.replace(/([0-9.,]+.?)", ylabel, "/, this.points[i].point.count+' ($1@)');
91 }
92 return str.replace(/@/g, '", ylabel, "')
93 } ")),
Marc Kupietzcf1771d2020-03-04 16:03:04 +010094 crosshairs = TRUE,
Marc Kupietz91145b02020-01-29 15:58:36 +010095 valueDecimals = 2,
Marc Kupietzcf1771d2020-03-04 16:03:04 +010096 shared = TRUE,
Marc Kupietz91145b02020-01-29 15:58:36 +010097 valueSuffix = paste0('\U2009', ylabel)
98 ) %>%
Marc Kupietz5b503f42020-04-09 15:26:00 +020099 hc_add_series_korap_frequencies(df, as.alternatives, ...)
Marc Kupietz91145b02020-01-29 15:58:36 +0100100}
101
Marc Kupietza4f36532020-02-03 22:50:08 +0100102## Mute notes: "no visible binding for global variable:"
103globalVariables(c("value", "query", "condition", "vc"))
104
Marc Kupietz5b503f42020-04-09 15:26:00 +0200105hc_add_series_korap_frequencies <- function(hc, df, as.alternatives = FALSE, ...) {
Marc Kupietz91145b02020-01-29 15:58:36 +0100106 index <- 0
107 for(q in unique(df$condition)) {
108 dat <- df[df$condition==q,]
109 hc <- hc %>% hc_add_series(
110 marker = list(radius = 2),
111 name = q,
112 data = data.frame(
113 year = dat$year,
114 value = if (as.alternatives) dat$f else dat$ipm,
115 count = dat$totalResults,
116 click = dat$webUIRequestUrl
117 ),
118 hcaes(year, value),
119 type = 'line',
120 colorIndex = index,
Marc Kupietz5b503f42020-04-09 15:26:00 +0200121 zIndex = 1,
122 ...
Marc Kupietz91145b02020-01-29 15:58:36 +0100123 ) %>%
124 hc_add_series(
125 name = "ci",
126 data = dat[,c('year', 'conf.low', 'conf.high')],
127 hcaes(x = year, low = conf.low, high = conf.high),
128 type = 'arearange',
129 fillOpacity = 0.3,
130 lineWidth = 0,
Marc Kupietzcf1771d2020-03-04 16:03:04 +0100131 marker = list(enabled = FALSE),
132 enableMouseTracking = FALSE,
Marc Kupietz91145b02020-01-29 15:58:36 +0100133 linkedTo= ':previous',
134 colorIndex = index,
135 zIndex = 0
136 )
137 index <- index+1
138 }
139 hc
140}
Marc Kupietz882f08c2020-03-18 13:30:31 +0100141
142.onAttach <- function(libname = find.package("RKorAPClient"),
143 pkgname = "RKorAPClient") {
144 packageStartupMessage(
145 "If you intend to use the Highcharts plot options, please note that Highcharts (www.highcharts.com) is a Highsoft software product which is not free for commercial and governmental use."
146 )
147}
148