alternatibvesOverTime demo: use our vectorized api methods
Change-Id: I8db420cd0340ea48538f50161793ac0e5594716f
diff --git a/demo/alternativesOverTime.R b/demo/alternativesOverTime.R
index 63a058c..00306cd 100755
--- a/demo/alternativesOverTime.R
+++ b/demo/alternativesOverTime.R
@@ -1,41 +1,30 @@
#!/usr/bin/env Rscript
#
-# Plot frequency of alternative expressions or spellings variants over time
+# Plot proportions of alternative expressions or spellings variants over time
#
library(RKorAPClient)
library(ggplot2)
-library(tidyr)
-library(dplyr)
library(plotly)
library(htmlwidgets)
alternativesOverTime <- function(alternatives, years, kco = new("KorAPConnection", verbose=TRUE)) {
- vc = "textType = /Zeit.*/ & pubDate in"
- df <- data.frame(matrix(ncol = length(alternatives), nrow = length(years))) %>%
- setNames(alternatives) %>%
- mutate(year = years) %>%
- pivot_longer(cols = alternatives) %>%
- mutate(value = corpusQuery(kco, query=name, vc=paste(vc, year))$totalResults) %>%
- pivot_wider(id_cols= year, names_from = name) %>%
- mutate(total = rowSums(.[alternatives])) %>%
- pivot_longer(cols = alternatives) %>%
- mutate(share = value / total) %>%
- mutate(url = corpusQuery(kco, query=name, vc=paste(vc, year))$webUIRequestUrl) %>%
- rename(Variant = name)
- df$ci <- t(sapply(Map(prop.test, df$value, df$total), "[[","conf.int"))
- g <- ggplot(data = df, mapping = aes(x = year, y = share, color=Variant, fill=Variant)) +
- geom_ribbon(aes(ymin=ci[, 1], ymax=ci[, 2], color=Variant, fill=Variant), alpha=.3, linetype=0) +
+ df <- expand_grid(Variant = alternatives, year = years) %>%
+ cbind(corpusQuery(kco, .$Variant, sprintf("textType = /Zeit.*/ & pubDate in %d", .$year))) %>%
+ group_by(year) %>% mutate(tokens = sum(totalResults)) %>%
+ ci()
+ g <- ggplot(data = df, mapping = aes(x = year, y = f, color = Variant, fill = Variant)) +
+ geom_ribbon(aes(ymin = conf.low, ymax = conf.high, color = Variant, fill = Variant), alpha = .3, linetype = 0) +
geom_line() +
geom_point() +
ggtitle(paste0(alternatives, collapse = " vs. ")) +
xlab("TIME") +
ylab(sprintf("Observed frequency ratio")) +
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_continuous(breaks=unique(df$year))
+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_continuous(breaks = unique(df$year))
pp <- ggplotly(g, tooltip = c("x", "y"))
for (i in 1:length(alternatives)) {
- vdata <- df[df$Variant==alternatives[i],]
- pp$x$data[[2+i]]$customdata <- vdata$url
- pp$x$data[[2+i]]$text <- sprintf("%s<br />absolute: %d / %d", pp$x$data[[2+i]]$text, vdata$value, vdata$total)
+ vdata <- df[df$Variant == alternatives[i],]
+ pp$x$data[[2+i]]$customdata <- vdata$webUIRequestUrl
+ pp$x$data[[2+i]]$text <- sprintf("%s<br />absolute: %d / %d", pp$x$data[[2+i]]$text, vdata$totalResults, vdata$tokens)
}
ppp <- onRender(pp, "function(el, x) { el.on('plotly_click', function(d) { var url=d.points[0].customdata; window.open(url, 'korap') })}")
print(ppp)