Merge remote-tracking branch 'origin/highcharter'
Change-Id: I98105c0315fc224838162774a6620d8e30263e68
diff --git a/DESCRIPTION b/DESCRIPTION
index 9f0b936..c47825f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -9,7 +9,7 @@
License: BSD_2_clause + file LICENSE
Encoding: UTF-8
LazyData: false
-RoxygenNote: 7.0.1
+RoxygenNote: 7.0.2
Imports:
R.cache,
broom,
@@ -21,6 +21,7 @@
purrr,
lubridate,
curl,
+ highcharter,
jsonlite,
keyring,
plotly,
@@ -35,5 +36,6 @@
'RKorAPClient.R'
'KorAPQuery.R'
'ci.R'
+ 'highcharter-helper.R'
'misc.R'
'reexports.R'
diff --git a/NAMESPACE b/NAMESPACE
index 8a84580..096d676 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,6 +10,7 @@
export(geom_freq_by_year_ci)
export(ggplotly)
export(group_by)
+export(hc_freq_by_year_ci)
export(ipm)
export(mutate)
export(percent)
@@ -34,6 +35,7 @@
exportMethods(persistAccessToken)
exportMethods(show)
import(R.cache)
+import(highcharter)
import(httr)
import(keyring)
import(methods)
diff --git a/R/KorAPCorpusStats.R b/R/KorAPCorpusStats.R
index 589d51e..55dbd15 100644
--- a/R/KorAPCorpusStats.R
+++ b/R/KorAPCorpusStats.R
@@ -45,9 +45,9 @@
paste0(kco@apiUrl,
'statistics?cq=',
URLencode(vc, reserved = TRUE))
- log.info(verbose, "Calculating size of corpus \"", vc, "\"", sep = "")
+ log.info(verbose, "Getting size of virtual corpus \"", vc, "\"", sep = "")
res <- apiCall(kco, url)
- log.info(verbose, "\n")
+ log.info(verbose, ": ", res$tokens, " tokens\n")
if (as.df)
data.frame(vc = vc, res, stringsAsFactors = FALSE)
else
diff --git a/R/ci.R b/R/ci.R
index 5410b3b..5bb81ea 100644
--- a/R/ci.R
+++ b/R/ci.R
@@ -29,16 +29,25 @@
#' ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
#' geom_point() + geom_line() + geom_ribbon(alpha=.3)
#'
-ci <- function(df, x = totalResults, N = total, conf.level = 0.95) {
+ci <- function(df,
+ x = totalResults,
+ N = total,
+ conf.level = 0.95) {
x <- enquo(x)
N <- enquo(N)
+ nas <- df %>%
+ dplyr::filter(total <= 0) %>%
+ mutate(f = NA, conf.low = NA, conf.high = NA)
df %>%
+ dplyr::filter(total > 0) %>%
rowwise %>%
- mutate(tst = list(broom::tidy(prop.test(!!x, !!N, conf.level = conf.level)) %>%
- select("estimate", "conf.low", "conf.high") %>%
- rename(f = estimate)
+ mutate(tst = list(
+ broom::tidy(prop.test(!!x,!!N, conf.level = conf.level)) %>%
+ select(estimate, conf.low, conf.high) %>%
+ rename(f = estimate)
)) %>%
- tidyr::unnest(tst)
+ tidyr::unnest(tst) %>%
+ bind_rows(nas)
}
## Mute notes: "Undefined global functions or variables:"
diff --git a/R/highcharter-helper.R b/R/highcharter-helper.R
new file mode 100644
index 0000000..981d4d7
--- /dev/null
+++ b/R/highcharter-helper.R
@@ -0,0 +1,132 @@
+#' Experimental: Plot interactive frequency by year graphs with confidence intervals using highcharter
+#'
+#' Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using highcharter.
+#' \bold{Warning:} This function may be moved to a new package.
+#'
+#' @import highcharter
+#' @export
+#'
+#' @param df data frame like the value of a \code{\link{frequencyQuery}}
+#' @param as.alternatives boolean decides whether queries should be treatet queries as mutually exclusive and exahustive wrt. to some meaningful class (e.g. spelling variants of a certain word form).
+#' @param ylabel defaults to \% if \code{as.alternatives} is \code{true} and to "ipm" otherwise.
+#'
+#' @examples
+#' new("KorAPConnection", verbose = TRUE) %>%
+#' frequencyQuery(query = c("macht []{0,3} Sinn", "ergibt []{0,3} Sinn"),
+#' vc = paste("textType = /Zeit.*/ & pubDate in", c(2010:2014)),
+#' as.alternatives = TRUE) %>%
+#' hc_freq_by_year_ci(as.alternatives = TRUE)
+#'
+#' kco <- new("KorAPConnection", verbose = TRUE)
+#' expand_grid(
+#' condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
+#' year = (2005:2011)
+#' ) %>%
+#' cbind(frequencyQuery(
+#' kco,
+#' "[tt/l=Heuschrecke]",
+#' paste0(.$condition, " & pubDate in ", .$year)
+#' )) %>%
+#' hc_freq_by_year_ci()
+#'
+hc_freq_by_year_ci <- function(df, as.alternatives = F, ylabel = if(as.alternatives) "%" else "ipm") {
+ title <- ""
+ df <- df %>%
+ { if(! as.alternatives) ipm(.) else RKorAPClient::percent(.) }
+
+ if (!"year" %in% colnames(df)) {
+ df <- df %>% mutate(year = as.integer(queryStringToLabel(df$vc, pubDateOnly = T)))
+ }
+ if (!"condition" %in% colnames(df)) {
+ if (length(base::unique(df$query)) > 1) {
+ df <- df %>% mutate(condition = query)
+ if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = T ))) > 1) {
+ df <- df %>% mutate(condition = paste(condition, " & ",
+ queryStringToLabel(vc, excludePubDate = T )))
+ }
+ } else {
+ title <- base::unique(df$query)
+ if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = T ))) > 1) {
+ df <- df %>% mutate(condition = queryStringToLabel(vc, excludePubDate = T ))
+ }
+ }
+ }
+ # use the D3 palette which provides 20 attractive and distinguishable colours
+ palette <- c("#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F", "#BCBD22", "#17BECF", "#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896", "#C5B0D5", "#C49C94", "#F7B6D2", "#C7C7C7", "#DBDB8D", "#9EDAE5")
+ highcharter::highchart() %>%
+ hc_title(text=title) %>%
+ hc_chart(zoomType="xy") %>%
+ hc_yAxis(
+ title = list(text = if (as.alternatives) "" else ylabel),
+ ceiling = if (as.alternatives) 100 else NULL,
+ floor = 0,
+ labels = if(as.alternatives) list(format = paste0("{value}\U2009", ylabel)) else NULL
+ ) %>%
+ hc_xAxis(allowDecimals=F) %>%
+ hc_add_theme(hc_theme_google(colors=palette)) %>%
+ hc_plotOptions(
+ series = list(enabled = T),
+ line = list(cursor = 'pointer', point = list(events = list(
+ click = JS("function() { window.open(this.click, 'korap'); }")
+ )))) %>%
+ hc_credits(enabled = T,
+ text = "KorAP R Client Pakckage",
+ href = "//github.com/KorAP/RKorAPClient/") %>%
+ hc_exporting(enabled = T) %>%
+ hc_tooltip(
+ formatter = JS(paste0("function (tooltip) {
+ var str = tooltip.defaultFormatter.call(this, tooltip);
+ if(Array.isArray(str)) {
+ str = str.join('');
+ }
+ for (var i = 0; i < this.points.length; i++) {
+ str = str.replace(/([0-9.,]+.?)", ylabel, "/, this.points[i].point.count+' ($1@)');
+ }
+ return str.replace(/@/g, '", ylabel, "')
+ } ")),
+ crosshairs = T,
+ valueDecimals = 2,
+ shared = T,
+ valueSuffix = paste0('\U2009', ylabel)
+ ) %>%
+ hc_add_series_korap_frequencies(df, as.alternatives)
+}
+
+## Mute notes: "no visible binding for global variable:"
+globalVariables(c("value", "query", "condition", "vc"))
+
+hc_add_series_korap_frequencies <- function(hc, df, as.alternatives = F) {
+ index <- 0
+ for(q in unique(df$condition)) {
+ dat <- df[df$condition==q,]
+ hc <- hc %>% hc_add_series(
+ marker = list(radius = 2),
+ name = q,
+ data = data.frame(
+ year = dat$year,
+ value = if (as.alternatives) dat$f else dat$ipm,
+ count = dat$totalResults,
+ click = dat$webUIRequestUrl
+ ),
+ hcaes(year, value),
+ type = 'line',
+ colorIndex = index,
+ zIndex = 1
+ ) %>%
+ hc_add_series(
+ name = "ci",
+ data = dat[,c('year', 'conf.low', 'conf.high')],
+ hcaes(x = year, low = conf.low, high = conf.high),
+ type = 'arearange',
+ fillOpacity = 0.3,
+ lineWidth = 0,
+ marker = list(enabled = F),
+ enableMouseTracking = F,
+ linkedTo= ':previous',
+ colorIndex = index,
+ zIndex = 0
+ )
+ index <- index+1
+ }
+ hc
+}
diff --git a/Readme.md b/Readme.md
index 1d845f8..70a92f6 100644
--- a/Readme.md
+++ b/Readme.md
@@ -28,7 +28,8 @@
new("KorAPConnection", verbose=TRUE) %>% corpusQuery("Hello world") %>% fetchAll()
```
-## Example
+## Examples
+### Frequencies over time and domains using ggplot2
```r
library(RKorAPClient)
library(ggplot2)
@@ -42,6 +43,19 @@
```
![](man/figures/Readme-Example-1.png)<!-- -->
+### Percentages over time using [highcharter](http://jkunst.com/highcharter/)
+```r
+library(RKorAPClient)
+query = c("macht []{0,3} Sinn", "ergibt []{0,3} Sinn")
+years = c(1980:2010)
+as.alternatives = TRUE
+vc = "textType = /Zeit.*/ & pubDate in"
+new("KorAPConnection", verbose=T) %>%
+ frequencyQuery(query, paste(vc, years), as.alternatives = as.alternatives) %>%
+ hc_freq_by_year_ci(as.alternatives)
+```
+[![Proportion of "ergibt … Sinn" versus "macht … Sinn" between 1980 and 2010 in newspapers and magazines](man/figures/Readme-Example-2.png)<!-- -->](https://korap.github.io/RKorAPClient/man/figures/Readme-Example-2.html)
+
## Demos
More elaborate R scripts demonstrating the use of the package can be found in the [demo](demo) folder.
diff --git a/demo/highcharter-example.R b/demo/highcharter-example.R
index cdf1362..4d312ba 100644
--- a/demo/highcharter-example.R
+++ b/demo/highcharter-example.R
@@ -1,104 +1,4 @@
library(RKorAPClient)
-library(highcharter)
-
-hc_freq_by_year_ci <- function(df, as.alternatives = F, ylabel = if(as.alternatives) "%" else "ipm") {
- title <- ""
- df <- df %>%
- { if(! as.alternatives) ipm(.) else RKorAPClient::percent(.) }
-
- if (!"year" %in% colnames(df)) {
- df <- df %>% mutate(year = as.integer(queryStringToLabel(df$vc, pubDateOnly = T)))
- }
- if (!"condition" %in% colnames(df)) {
- if (length(base::unique(df$query)) > 1) {
- df <- df %>% mutate(condition = query)
- if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = T ))) > 1) {
- df <- df %>% mutate(condition = paste(condition, " & ",
- queryStringToLabel(vc, excludePubDate = T )))
- }
- } else {
- title <- base::unique(df$query)
- if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = T ))) > 1) {
- df <- df %>% mutate(condition = queryStringToLabel(vc, excludePubDate = T ))
- }
- }
- }
- # use the D3 palette which provides 20 attractive and distinguishable colours
- palette <- c("#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F", "#BCBD22", "#17BECF", "#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896", "#C5B0D5", "#C49C94", "#F7B6D2", "#C7C7C7", "#DBDB8D", "#9EDAE5")
- highchart() %>%
- hc_title(text=title) %>%
- hc_chart(zoomType="xy") %>%
- hc_yAxis(
- title = list(text = if (as.alternatives) "" else ylabel),
- ceiling = if (as.alternatives) 100 else NULL,
- floor = 0,
- labels = if(as.alternatives) list(format = paste0("{value}\U2009", ylabel)) else NULL
- ) %>%
- hc_xAxis(allowDecimals=F) %>%
- hc_add_theme(hc_theme_google(colors=palette)) %>%
- hc_plotOptions(
- series = list(enabled = T),
- line = list(cursor = 'pointer', point = list(events = list(
- click = JS("function() { window.open(this.click, 'korap'); }")
- )))) %>%
- hc_credits(enabled = T,
- text = "KorAP R Client Pakckage",
- href = "//github.com/KorAP/RKorAPClient/") %>%
- hc_exporting(enabled = T) %>%
- hc_tooltip(
- formatter = JS(paste0("function (tooltip) {
- var str = tooltip.defaultFormatter.call(this, tooltip);
- if(Array.isArray(str)) {
- str = str.join('');
- }
- for (var i = 0; i < this.points.length; i++) {
- str = str.replace(/([0-9.,]+.?)", ylabel, "/, this.points[i].point.count+' ($1@)');
- }
- return str.replace(/@/g, '", ylabel, "')
- } ")),
- crosshairs = T,
- valueDecimals = 2,
- shared = T,
- valueSuffix = paste0('\U2009', ylabel)
- ) %>%
- hc_add_series_korap_frequencies(df, as.alternatives)
-}
-
-hc_add_series_korap_frequencies <- function(hc, df, as.alternatives = F) {
- index <- 0
- for(q in unique(df$condition)) {
- dat <- df[df$condition==q,]
- hc <- hc %>% hc_add_series(
- marker = list(radius = 2),
- name = q,
- data = data.frame(
- year = dat$year,
- value = if (as.alternatives) dat$f else dat$ipm,
- count = dat$totalResults,
- click = dat$webUIRequestUrl
- ),
- hcaes(year, value),
- type = 'line',
- colorIndex = index,
- zIndex = 1
- ) %>%
- hc_add_series(
- name = "ci",
- data = dat[,c('year', 'conf.low', 'conf.high')],
- hcaes(x = year, low = conf.low, high = conf.high),
- type = 'arearange',
- fillOpacity = 0.3,
- lineWidth = 0,
- marker = list(enabled = F),
- enableMouseTracking = F,
- linkedTo= ':previous',
- colorIndex = index,
- zIndex = 0
- )
- index <- index+1
- }
- hc
-}
plotHighchart <- function(query = "Schlumpf",
years = c(2000:2010),
@@ -120,3 +20,4 @@
h1 <- plotHighchart(c("Leser | Lesern | Lesers", 'Leserin | Leserinnen', 'LeserIn | LeserInnen', '"Leser[_\\*]in.*"'), c(1985:2018), as.alternatives = F)
#plotHighchart(c("Tollpatsch", "Tolpatsch"), c(1991:2018))
+
diff --git a/man/figures/Readme-Example-2.png b/man/figures/Readme-Example-2.png
new file mode 100644
index 0000000..3a9ed19
--- /dev/null
+++ b/man/figures/Readme-Example-2.png
Binary files differ
diff --git a/man/hc_freq_by_year_ci.Rd b/man/hc_freq_by_year_ci.Rd
new file mode 100644
index 0000000..8daa145
--- /dev/null
+++ b/man/hc_freq_by_year_ci.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/highcharter-helper.R
+\name{hc_freq_by_year_ci}
+\alias{hc_freq_by_year_ci}
+\title{Experimental: Plot interactive frequency by year graphs with confidence intervals using highcharter}
+\usage{
+hc_freq_by_year_ci(
+ df,
+ as.alternatives = F,
+ ylabel = if (as.alternatives) "\%" else "ipm"
+)
+}
+\arguments{
+\item{df}{data frame like the value of a \code{\link{frequencyQuery}}}
+
+\item{as.alternatives}{boolean decides whether queries should be treatet queries as mutually exclusive and exahustive wrt. to some meaningful class (e.g. spelling variants of a certain word form).}
+
+\item{ylabel}{defaults to \% if \code{as.alternatives} is \code{true} and to "ipm" otherwise.}
+}
+\description{
+Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using highcharter.
+\bold{Warning:} This function may be moved to a new package.
+}
+\examples{
+new("KorAPConnection", verbose = TRUE) \%>\%
+ frequencyQuery(query = c("macht []{0,3} Sinn", "ergibt []{0,3} Sinn"),
+ vc = paste("textType = /Zeit.*/ & pubDate in", c(2010:2014)),
+ as.alternatives = TRUE) \%>\%
+ hc_freq_by_year_ci(as.alternatives = TRUE)
+
+kco <- new("KorAPConnection", verbose = TRUE)
+expand_grid(
+ condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
+ year = (2005:2011)
+) \%>\%
+ cbind(frequencyQuery(
+ kco,
+ "[tt/l=Heuschrecke]",
+ paste0(.$condition, " & pubDate in ", .$year)
+ )) \%>\%
+ hc_freq_by_year_ci()
+
+}