blob: 00306cde4b6c9bf91a003a6cf68dec44261d5e65 [file] [log] [blame]
Marc Kupietz827a3c12019-09-18 22:09:33 +02001#!/usr/bin/env Rscript
2#
Marc Kupietz1242a5e2019-10-05 18:28:01 +02003# Plot proportions of alternative expressions or spellings variants over time
Marc Kupietz827a3c12019-09-18 22:09:33 +02004#
5library(RKorAPClient)
6library(ggplot2)
Marc Kupietz827a3c12019-09-18 22:09:33 +02007library(plotly)
8library(htmlwidgets)
9
10alternativesOverTime <- function(alternatives, years, kco = new("KorAPConnection", verbose=TRUE)) {
Marc Kupietz1242a5e2019-10-05 18:28:01 +020011 df <- expand_grid(Variant = alternatives, year = years) %>%
12 cbind(corpusQuery(kco, .$Variant, sprintf("textType = /Zeit.*/ & pubDate in %d", .$year))) %>%
13 group_by(year) %>% mutate(tokens = sum(totalResults)) %>%
14 ci()
15 g <- ggplot(data = df, mapping = aes(x = year, y = f, color = Variant, fill = Variant)) +
16 geom_ribbon(aes(ymin = conf.low, ymax = conf.high, color = Variant, fill = Variant), alpha = .3, linetype = 0) +
Marc Kupietz827a3c12019-09-18 22:09:33 +020017 geom_line() +
18 geom_point() +
19 ggtitle(paste0(alternatives, collapse = " vs. ")) +
20 xlab("TIME") +
21 ylab(sprintf("Observed frequency ratio")) +
Marc Kupietz1242a5e2019-10-05 18:28:01 +020022 theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_continuous(breaks = unique(df$year))
Marc Kupietz827a3c12019-09-18 22:09:33 +020023 pp <- ggplotly(g, tooltip = c("x", "y"))
24 for (i in 1:length(alternatives)) {
Marc Kupietz1242a5e2019-10-05 18:28:01 +020025 vdata <- df[df$Variant == alternatives[i],]
26 pp$x$data[[2+i]]$customdata <- vdata$webUIRequestUrl
27 pp$x$data[[2+i]]$text <- sprintf("%s<br />absolute: %d / %d", pp$x$data[[2+i]]$text, vdata$totalResults, vdata$tokens)
Marc Kupietz827a3c12019-09-18 22:09:33 +020028 }
29 ppp <- onRender(pp, "function(el, x) { el.on('plotly_click', function(d) { var url=d.points[0].customdata; window.open(url, 'korap') })}")
30 print(ppp)
31 df
32}
33
34df <- alternativesOverTime(c('so "genannte.?"', '"sogenannte.?"'), (1995:2018))