Merge remote-tracking branch 'origin/highcharter'

Change-Id: I98105c0315fc224838162774a6620d8e30263e68
diff --git a/DESCRIPTION b/DESCRIPTION
index 9f0b936..c47825f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -9,7 +9,7 @@
 License: BSD_2_clause + file LICENSE
 Encoding: UTF-8
 LazyData: false
-RoxygenNote: 7.0.1
+RoxygenNote: 7.0.2
 Imports:
     R.cache,
     broom,
@@ -21,6 +21,7 @@
     purrr,
     lubridate,
     curl,
+    highcharter,
     jsonlite,
     keyring,
     plotly,
@@ -35,5 +36,6 @@
     'RKorAPClient.R'
     'KorAPQuery.R'
     'ci.R'
+    'highcharter-helper.R'
     'misc.R'
     'reexports.R'
diff --git a/NAMESPACE b/NAMESPACE
index 8a84580..096d676 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,6 +10,7 @@
 export(geom_freq_by_year_ci)
 export(ggplotly)
 export(group_by)
+export(hc_freq_by_year_ci)
 export(ipm)
 export(mutate)
 export(percent)
@@ -34,6 +35,7 @@
 exportMethods(persistAccessToken)
 exportMethods(show)
 import(R.cache)
+import(highcharter)
 import(httr)
 import(keyring)
 import(methods)
diff --git a/R/KorAPCorpusStats.R b/R/KorAPCorpusStats.R
index 589d51e..55dbd15 100644
--- a/R/KorAPCorpusStats.R
+++ b/R/KorAPCorpusStats.R
@@ -45,9 +45,9 @@
       paste0(kco@apiUrl,
              'statistics?cq=',
              URLencode(vc, reserved = TRUE))
-    log.info(verbose, "Calculating size of corpus \"", vc, "\"", sep = "")
+    log.info(verbose, "Getting size of virtual corpus \"", vc, "\"", sep = "")
     res <- apiCall(kco, url)
-    log.info(verbose, "\n")
+    log.info(verbose, ": ", res$tokens, " tokens\n")
     if (as.df)
       data.frame(vc = vc, res, stringsAsFactors = FALSE)
     else
diff --git a/R/ci.R b/R/ci.R
index 5410b3b..5bb81ea 100644
--- a/R/ci.R
+++ b/R/ci.R
@@ -29,16 +29,25 @@
 #'   ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
 #'     geom_point() + geom_line() + geom_ribbon(alpha=.3)
 #'
-ci <- function(df, x = totalResults, N = total, conf.level = 0.95) {
+ci <- function(df,
+               x = totalResults,
+               N = total,
+               conf.level = 0.95) {
   x <- enquo(x)
   N <- enquo(N)
+  nas <- df %>%
+    dplyr::filter(total <= 0) %>%
+    mutate(f = NA, conf.low = NA, conf.high = NA)
   df %>%
+    dplyr::filter(total > 0) %>%
     rowwise %>%
-    mutate(tst = list(broom::tidy(prop.test(!!x, !!N, conf.level = conf.level)) %>%
-                        select("estimate", "conf.low", "conf.high") %>%
-                        rename(f = estimate)
+    mutate(tst = list(
+      broom::tidy(prop.test(!!x,!!N, conf.level = conf.level)) %>%
+        select(estimate, conf.low, conf.high) %>%
+        rename(f = estimate)
     )) %>%
-    tidyr::unnest(tst)
+    tidyr::unnest(tst) %>%
+    bind_rows(nas)
 }
 
 ## Mute notes: "Undefined global functions or variables:"
diff --git a/R/highcharter-helper.R b/R/highcharter-helper.R
new file mode 100644
index 0000000..981d4d7
--- /dev/null
+++ b/R/highcharter-helper.R
@@ -0,0 +1,132 @@
+#' Experimental: Plot interactive frequency by year graphs with confidence intervals using highcharter
+#'
+#' Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using highcharter.
+#' \bold{Warning:} This function may be moved to a new package.
+#'
+#' @import highcharter
+#' @export
+#'
+#' @param df data frame like the value of a \code{\link{frequencyQuery}}
+#' @param as.alternatives boolean decides whether queries should be treatet queries as mutually exclusive and exahustive wrt. to some meaningful class (e.g. spelling variants of a certain word form).
+#' @param ylabel defaults to \% if \code{as.alternatives} is \code{true} and to "ipm" otherwise.
+#'
+#' @examples
+#' new("KorAPConnection", verbose = TRUE) %>%
+#'   frequencyQuery(query = c("macht []{0,3} Sinn", "ergibt []{0,3} Sinn"),
+#'                  vc = paste("textType = /Zeit.*/ & pubDate in", c(2010:2014)),
+#'                  as.alternatives = TRUE) %>%
+#'   hc_freq_by_year_ci(as.alternatives = TRUE)
+#'
+#' kco <- new("KorAPConnection", verbose = TRUE)
+#' expand_grid(
+#'   condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
+#'   year = (2005:2011)
+#' ) %>%
+#'   cbind(frequencyQuery(
+#'     kco,
+#'     "[tt/l=Heuschrecke]",
+#'     paste0(.$condition, " & pubDate in ", .$year)
+#'   ))  %>%
+#'   hc_freq_by_year_ci()
+#'
+hc_freq_by_year_ci <- function(df, as.alternatives = F, ylabel = if(as.alternatives) "%" else "ipm") {
+  title <- ""
+  df <- df %>%
+    { if(! as.alternatives) ipm(.) else RKorAPClient::percent(.) }
+
+  if (!"year" %in% colnames(df)) {
+    df <- df %>% mutate(year = as.integer(queryStringToLabel(df$vc, pubDateOnly = T)))
+  }
+  if (!"condition" %in% colnames(df)) {
+    if (length(base::unique(df$query)) > 1) {
+      df <- df %>% mutate(condition = query)
+      if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = T ))) > 1) {
+        df <- df %>% mutate(condition = paste(condition, " & ",
+                                              queryStringToLabel(vc, excludePubDate = T )))
+      }
+    } else {
+      title <- base::unique(df$query)
+      if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = T ))) > 1) {
+        df <- df %>% mutate(condition = queryStringToLabel(vc, excludePubDate = T ))
+      }
+    }
+  }
+  # use the D3 palette which provides 20 attractive and distinguishable colours
+  palette <- c("#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F", "#BCBD22", "#17BECF", "#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896", "#C5B0D5", "#C49C94", "#F7B6D2", "#C7C7C7", "#DBDB8D", "#9EDAE5")
+  highcharter::highchart() %>%
+    hc_title(text=title) %>%
+    hc_chart(zoomType="xy") %>%
+    hc_yAxis(
+      title = list(text = if (as.alternatives) "" else ylabel),
+      ceiling = if (as.alternatives) 100 else NULL,
+      floor = 0,
+      labels = if(as.alternatives) list(format = paste0("{value}\U2009", ylabel)) else NULL
+    ) %>%
+    hc_xAxis(allowDecimals=F) %>%
+    hc_add_theme(hc_theme_google(colors=palette)) %>%
+    hc_plotOptions(
+      series = list(enabled = T),
+      line = list(cursor = 'pointer', point = list(events = list(
+        click = JS("function() { window.open(this.click, 'korap'); }")
+      )))) %>%
+    hc_credits(enabled = T,
+               text = "KorAP R Client Pakckage",
+               href = "//github.com/KorAP/RKorAPClient/") %>%
+    hc_exporting(enabled = T) %>%
+    hc_tooltip(
+      formatter = JS(paste0("function (tooltip) {
+        var str = tooltip.defaultFormatter.call(this, tooltip);
+        if(Array.isArray(str))  {
+          str = str.join('');
+        }
+       for (var i = 0; i < this.points.length; i++) {
+         str = str.replace(/([0-9.,]+.?)", ylabel, "/, this.points[i].point.count+' ($1@)');
+       }
+       return str.replace(/@/g, '", ylabel, "')
+      } ")),
+      crosshairs =  T,
+      valueDecimals = 2,
+      shared = T,
+      valueSuffix = paste0('\U2009', ylabel)
+    ) %>%
+    hc_add_series_korap_frequencies(df, as.alternatives)
+}
+
+## Mute notes: "no visible binding for global variable:"
+globalVariables(c("value", "query", "condition", "vc"))
+
+hc_add_series_korap_frequencies <- function(hc, df, as.alternatives = F) {
+  index <- 0
+  for(q in unique(df$condition)) {
+    dat <- df[df$condition==q,]
+    hc <- hc %>% hc_add_series(
+      marker = list(radius = 2),
+      name = q,
+      data = data.frame(
+        year = dat$year,
+        value = if (as.alternatives) dat$f else dat$ipm,
+        count = dat$totalResults,
+        click = dat$webUIRequestUrl
+      ),
+      hcaes(year, value),
+      type = 'line',
+      colorIndex = index,
+      zIndex = 1
+    ) %>%
+      hc_add_series(
+        name = "ci",
+        data = dat[,c('year', 'conf.low', 'conf.high')],
+        hcaes(x = year, low = conf.low, high = conf.high),
+        type = 'arearange',
+        fillOpacity = 0.3,
+        lineWidth = 0,
+        marker = list(enabled = F),
+        enableMouseTracking = F,
+        linkedTo= ':previous',
+        colorIndex = index,
+        zIndex = 0
+      )
+    index <- index+1
+  }
+  hc
+}
diff --git a/Readme.md b/Readme.md
index 1d845f8..70a92f6 100644
--- a/Readme.md
+++ b/Readme.md
@@ -28,7 +28,8 @@
 new("KorAPConnection", verbose=TRUE) %>% corpusQuery("Hello world") %>% fetchAll()
 ```
 
-## Example
+## Examples
+### Frequencies over time and domains using ggplot2
 ```r
 library(RKorAPClient)
 library(ggplot2)
@@ -42,6 +43,19 @@
 ```
 ![](man/figures/Readme-Example-1.png)<!-- -->
 
+### Percentages over time using [highcharter](http://jkunst.com/highcharter/)
+```r
+library(RKorAPClient)
+query = c("macht []{0,3} Sinn", "ergibt []{0,3} Sinn")
+years = c(1980:2010)
+as.alternatives = TRUE
+vc = "textType = /Zeit.*/ & pubDate in"
+new("KorAPConnection", verbose=T) %>%
+  frequencyQuery(query, paste(vc, years), as.alternatives = as.alternatives) %>%
+  hc_freq_by_year_ci(as.alternatives)
+```
+[![Proportion of "ergibt … Sinn"  versus "macht … Sinn" between 1980 and 2010 in newspapers and magazines](man/figures/Readme-Example-2.png)<!-- -->](https://korap.github.io/RKorAPClient/man/figures/Readme-Example-2.html)
+
 ## Demos
 
 More elaborate R scripts demonstrating the use of the package can be found in the [demo](demo) folder.
diff --git a/demo/highcharter-example.R b/demo/highcharter-example.R
index cdf1362..4d312ba 100644
--- a/demo/highcharter-example.R
+++ b/demo/highcharter-example.R
@@ -1,104 +1,4 @@
 library(RKorAPClient)
-library(highcharter)
-
-hc_freq_by_year_ci <- function(df, as.alternatives = F, ylabel = if(as.alternatives) "%" else "ipm") {
-  title <- ""
-  df <- df %>%
-    { if(! as.alternatives) ipm(.) else RKorAPClient::percent(.) }
-
-  if (!"year" %in% colnames(df)) {
-    df <- df %>% mutate(year = as.integer(queryStringToLabel(df$vc, pubDateOnly = T)))
-  }
-  if (!"condition" %in% colnames(df)) {
-    if (length(base::unique(df$query)) > 1) {
-      df <- df %>% mutate(condition = query)
-      if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = T ))) > 1) {
-        df <- df %>% mutate(condition = paste(condition, " & ",
-                                              queryStringToLabel(vc, excludePubDate = T )))
-      }
-    } else {
-      title <- base::unique(df$query)
-      if(length(base::unique(queryStringToLabel(df$vc, excludePubDate = T ))) > 1) {
-        df <- df %>% mutate(condition = queryStringToLabel(vc, excludePubDate = T ))
-      }
-    }
-  }
-  # use the D3 palette which provides 20 attractive and distinguishable colours
-  palette <- c("#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F", "#BCBD22", "#17BECF", "#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896", "#C5B0D5", "#C49C94", "#F7B6D2", "#C7C7C7", "#DBDB8D", "#9EDAE5")
-  highchart() %>%
-    hc_title(text=title) %>%
-    hc_chart(zoomType="xy") %>%
-    hc_yAxis(
-      title = list(text = if (as.alternatives) "" else ylabel),
-      ceiling = if (as.alternatives) 100 else NULL,
-      floor = 0,
-      labels = if(as.alternatives) list(format = paste0("{value}\U2009", ylabel)) else NULL
-    ) %>%
-    hc_xAxis(allowDecimals=F) %>%
-    hc_add_theme(hc_theme_google(colors=palette)) %>%
-    hc_plotOptions(
-      series = list(enabled = T),
-      line = list(cursor = 'pointer', point = list(events = list(
-      click = JS("function() { window.open(this.click, 'korap'); }")
-    )))) %>%
-    hc_credits(enabled = T,
-               text = "KorAP R Client Pakckage",
-               href = "//github.com/KorAP/RKorAPClient/") %>%
-    hc_exporting(enabled = T) %>%
-    hc_tooltip(
-      formatter = JS(paste0("function (tooltip) {
-        var str = tooltip.defaultFormatter.call(this, tooltip);
-        if(Array.isArray(str))  {
-          str = str.join('');
-        }
-       for (var i = 0; i < this.points.length; i++) {
-         str = str.replace(/([0-9.,]+.?)", ylabel, "/, this.points[i].point.count+' ($1@)');
-       }
-       return str.replace(/@/g, '", ylabel, "')
-      } ")),
-      crosshairs =  T,
-      valueDecimals = 2,
-      shared = T,
-      valueSuffix = paste0('\U2009', ylabel)
-    ) %>%
-    hc_add_series_korap_frequencies(df, as.alternatives)
-}
-
-hc_add_series_korap_frequencies <- function(hc, df, as.alternatives = F) {
-  index <- 0
-  for(q in unique(df$condition)) {
-    dat <- df[df$condition==q,]
-    hc <- hc %>% hc_add_series(
-      marker = list(radius = 2),
-      name = q,
-      data = data.frame(
-        year = dat$year,
-        value = if (as.alternatives) dat$f else dat$ipm,
-        count = dat$totalResults,
-        click = dat$webUIRequestUrl
-      ),
-      hcaes(year, value),
-      type = 'line',
-      colorIndex = index,
-      zIndex = 1
-    ) %>%
-      hc_add_series(
-        name = "ci",
-        data = dat[,c('year', 'conf.low', 'conf.high')],
-        hcaes(x = year, low = conf.low, high = conf.high),
-        type = 'arearange',
-        fillOpacity = 0.3,
-        lineWidth = 0,
-        marker = list(enabled = F),
-        enableMouseTracking = F,
-        linkedTo= ':previous',
-        colorIndex = index,
-        zIndex = 0
-      )
-    index <- index+1
-  }
-  hc
-}
 
 plotHighchart <- function(query = "Schlumpf",
                           years = c(2000:2010),
@@ -120,3 +20,4 @@
 h1 <- plotHighchart(c("Leser | Lesern | Lesers", 'Leserin | Leserinnen', 'LeserIn | LeserInnen', '"Leser[_\\*]in.*"'), c(1985:2018), as.alternatives = F)
 #plotHighchart(c("Tollpatsch", "Tolpatsch"), c(1991:2018))
 
+
diff --git a/man/figures/Readme-Example-2.png b/man/figures/Readme-Example-2.png
new file mode 100644
index 0000000..3a9ed19
--- /dev/null
+++ b/man/figures/Readme-Example-2.png
Binary files differ
diff --git a/man/hc_freq_by_year_ci.Rd b/man/hc_freq_by_year_ci.Rd
new file mode 100644
index 0000000..8daa145
--- /dev/null
+++ b/man/hc_freq_by_year_ci.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/highcharter-helper.R
+\name{hc_freq_by_year_ci}
+\alias{hc_freq_by_year_ci}
+\title{Experimental: Plot interactive frequency by year graphs with confidence intervals using highcharter}
+\usage{
+hc_freq_by_year_ci(
+  df,
+  as.alternatives = F,
+  ylabel = if (as.alternatives) "\%" else "ipm"
+)
+}
+\arguments{
+\item{df}{data frame like the value of a \code{\link{frequencyQuery}}}
+
+\item{as.alternatives}{boolean decides whether queries should be treatet queries as mutually exclusive and exahustive wrt. to some meaningful class (e.g. spelling variants of a certain word form).}
+
+\item{ylabel}{defaults to \% if \code{as.alternatives} is \code{true} and to "ipm" otherwise.}
+}
+\description{
+Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using highcharter.
+\bold{Warning:} This function may be moved to a new package.
+}
+\examples{
+new("KorAPConnection", verbose = TRUE) \%>\%
+  frequencyQuery(query = c("macht []{0,3} Sinn", "ergibt []{0,3} Sinn"),
+                 vc = paste("textType = /Zeit.*/ & pubDate in", c(2010:2014)),
+                 as.alternatives = TRUE) \%>\%
+  hc_freq_by_year_ci(as.alternatives = TRUE)
+
+kco <- new("KorAPConnection", verbose = TRUE)
+expand_grid(
+  condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
+  year = (2005:2011)
+) \%>\%
+  cbind(frequencyQuery(
+    kco,
+    "[tt/l=Heuschrecke]",
+    paste0(.$condition, " & pubDate in ", .$year)
+  ))  \%>\%
+  hc_freq_by_year_ci()
+
+}