blob: b33689a847fdc021037bdddc93714f9c76a8ec44 [file] [log] [blame]
Marc Kupietz91145b02020-01-29 15:58:36 +01001#' Experimental: Plot interactive frequency by year graphs with confidence intervals using highcharter
2#'
3#' Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using highcharter.
4#' \bold{Warning:} This function may be moved to a new package.
5#'
6#' @import highcharter
7#' @export
8#'
9#' @param df data frame like the value of a \code{\link{frequencyQuery}}
10#' @param as.alternatives boolean decides whether queries should be treatet queries as mutually exclusive and exahustive wrt. to some meaningful class (e.g. spelling variants of a certain word form).
11#' @param ylabel defaults to \% if \code{as.alternatives} is \code{true} and to "ipm" otherwise.
12#'
13#' @examples
14#' new("KorAPConnection", verbose = TRUE) %>%
15#' frequencyQuery(query = c("macht []{0,3} Sinn", "ergibt []{0,3} Sinn"),
16#' vc = paste("textType = /Zeit.*/ & pubDate in", c(2010:2014)),
17#' as.alternatives = TRUE) %>%
18#' hc_freq_by_year_ci(as.alternatives = TRUE)
19#'
Marc Kupietz10f65c42020-01-31 15:18:24 +010020#' kco <- new("KorAPConnection", verbose = TRUE)
21#' expand_grid(
22#' condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
23#' year = (2005:2011)
24#' ) %>%
25#' cbind(frequencyQuery(
26#' kco,
27#' "[tt/l=Heuschrecke]",
28#' paste0(.$condition, " & pubDate in ", .$year)
29#' )) %>%
30#' hc_freq_by_year_ci()
31#'
Marc Kupietz91145b02020-01-29 15:58:36 +010032hc_freq_by_year_ci <- function(df, as.alternatives = F, ylabel = if(as.alternatives) "%" else "ipm") {
33 title <- ""
34 df <- df %>%
35 { if(! as.alternatives) ipm(.) else RKorAPClient::percent(.) }
36
37 if (!"year" %in% colnames(df)) {
38 df <- df %>% mutate(year = as.integer(queryStringToLabel(df$vc, pubDateOnly = T)))
39 }
40 if (!"condition" %in% colnames(df)) {
41 if (length(base::unique(df$query)) > 1) {
42 df <- df %>% mutate(condition = query)
43 if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = T ))) > 1) {
44 df <- df %>% mutate(condition = paste(condition, " & ",
45 queryStringToLabel(vc, excludePubDate = T )))
46 }
47 } else {
48 title <- base::unique(df$query)
49 if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = T ))) > 1) {
50 df <- df %>% mutate(condition = queryStringToLabel(vc, excludePubDate = T ))
51 }
52 }
53 }
54 # use the D3 palette which provides 20 attractive and distinguishable colours
55 palette <- c("#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F", "#BCBD22", "#17BECF", "#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896", "#C5B0D5", "#C49C94", "#F7B6D2", "#C7C7C7", "#DBDB8D", "#9EDAE5")
56 highcharter::highchart() %>%
57 hc_title(text=title) %>%
58 hc_chart(zoomType="xy") %>%
59 hc_yAxis(
60 title = list(text = if (as.alternatives) "" else ylabel),
61 ceiling = if (as.alternatives) 100 else NULL,
62 floor = 0,
63 labels = if(as.alternatives) list(format = paste0("{value}\U2009", ylabel)) else NULL
64 ) %>%
65 hc_xAxis(allowDecimals=F) %>%
66 hc_add_theme(hc_theme_google(colors=palette)) %>%
67 hc_plotOptions(
68 series = list(enabled = T),
69 line = list(cursor = 'pointer', point = list(events = list(
70 click = JS("function() { window.open(this.click, 'korap'); }")
71 )))) %>%
72 hc_credits(enabled = T,
73 text = "KorAP R Client Pakckage",
74 href = "//github.com/KorAP/RKorAPClient/") %>%
75 hc_exporting(enabled = T) %>%
76 hc_tooltip(
77 formatter = JS(paste0("function (tooltip) {
78 var str = tooltip.defaultFormatter.call(this, tooltip);
79 if(Array.isArray(str)) {
80 str = str.join('');
81 }
82 for (var i = 0; i < this.points.length; i++) {
83 str = str.replace(/([0-9.,]+.?)", ylabel, "/, this.points[i].point.count+' ($1@)');
84 }
85 return str.replace(/@/g, '", ylabel, "')
86 } ")),
87 crosshairs = T,
88 valueDecimals = 2,
89 shared = T,
90 valueSuffix = paste0('\U2009', ylabel)
91 ) %>%
92 hc_add_series_korap_frequencies(df, as.alternatives)
93}
94
95hc_add_series_korap_frequencies <- function(hc, df, as.alternatives = F) {
96 index <- 0
97 for(q in unique(df$condition)) {
98 dat <- df[df$condition==q,]
99 hc <- hc %>% hc_add_series(
100 marker = list(radius = 2),
101 name = q,
102 data = data.frame(
103 year = dat$year,
104 value = if (as.alternatives) dat$f else dat$ipm,
105 count = dat$totalResults,
106 click = dat$webUIRequestUrl
107 ),
108 hcaes(year, value),
109 type = 'line',
110 colorIndex = index,
111 zIndex = 1
112 ) %>%
113 hc_add_series(
114 name = "ci",
115 data = dat[,c('year', 'conf.low', 'conf.high')],
116 hcaes(x = year, low = conf.low, high = conf.high),
117 type = 'arearange',
118 fillOpacity = 0.3,
119 lineWidth = 0,
120 marker = list(enabled = F),
121 enableMouseTracking = F,
122 linkedTo= ':previous',
123 colorIndex = index,
124 zIndex = 0
125 )
126 index <- index+1
127 }
128 hc
129}