blob: ae91ff56b7fd2eeb2a38e74218f2d08c850cd29f [file] [log] [blame]
Marc Kupietz827a3c12019-09-18 22:09:33 +02001#!/usr/bin/env Rscript
2#
Marc Kupietz1242a5e2019-10-05 18:28:01 +02003# Plot proportions of alternative expressions or spellings variants over time
Marc Kupietz827a3c12019-09-18 22:09:33 +02004#
5library(RKorAPClient)
6library(ggplot2)
Marc Kupietz827a3c12019-09-18 22:09:33 +02007library(plotly)
8library(htmlwidgets)
9
10alternativesOverTime <- function(alternatives, years, kco = new("KorAPConnection", verbose=TRUE)) {
Marc Kupietz1242a5e2019-10-05 18:28:01 +020011 df <- expand_grid(Variant = alternatives, year = years) %>%
12 cbind(corpusQuery(kco, .$Variant, sprintf("textType = /Zeit.*/ & pubDate in %d", .$year))) %>%
13 group_by(year) %>% mutate(tokens = sum(totalResults)) %>%
14 ci()
Marc Kupietz0de631d2019-10-07 10:10:18 +020015 g <- ggplot(data = df, mapping = aes(x = year, y = f, color = Variant, fill = Variant, ymin = conf.low, ymax = conf.high)) +
16 geom_freq_by_year_ci() +
Marc Kupietz827a3c12019-09-18 22:09:33 +020017 ggtitle(paste0(alternatives, collapse = " vs. ")) +
18 xlab("TIME") +
Marc Kupietz0de631d2019-10-07 10:10:18 +020019 ylab(sprintf("Observed frequency ratio"))
Marc Kupietz827a3c12019-09-18 22:09:33 +020020 pp <- ggplotly(g, tooltip = c("x", "y"))
21 for (i in 1:length(alternatives)) {
Marc Kupietz1242a5e2019-10-05 18:28:01 +020022 vdata <- df[df$Variant == alternatives[i],]
23 pp$x$data[[2+i]]$customdata <- vdata$webUIRequestUrl
24 pp$x$data[[2+i]]$text <- sprintf("%s<br />absolute: %d / %d", pp$x$data[[2+i]]$text, vdata$totalResults, vdata$tokens)
Marc Kupietz827a3c12019-09-18 22:09:33 +020025 }
26 ppp <- onRender(pp, "function(el, x) { el.on('plotly_click', function(d) { var url=d.points[0].customdata; window.open(url, 'korap') })}")
27 print(ppp)
28 df
29}
30
31df <- alternativesOverTime(c('so "genannte.?"', '"sogenannte.?"'), (1995:2018))