blob: eb85ac892010ccaa56652b80152bad808010a633 [file] [log] [blame]
Marc Kupietz827a3c12019-09-18 22:09:33 +02001#!/usr/bin/env Rscript
2#
Marc Kupietz1242a5e2019-10-05 18:28:01 +02003# Plot proportions of alternative expressions or spellings variants over time
Marc Kupietz827a3c12019-09-18 22:09:33 +02004#
5library(RKorAPClient)
6library(ggplot2)
Marc Kupietz827a3c12019-09-18 22:09:33 +02007library(plotly)
8library(htmlwidgets)
9
10alternativesOverTime <- function(alternatives, years, kco = new("KorAPConnection", verbose=TRUE)) {
Marc Kupietz1242a5e2019-10-05 18:28:01 +020011 df <- expand_grid(Variant = alternatives, year = years) %>%
Marc Kupietz71d6e052019-11-22 18:42:10 +010012 cbind(frequencyQuery(kco, .$Variant, sprintf("textType = /Zeit.*/ & pubDate in %d", .$year), as.alternatives=TRUE)) %>%
Marc Kupietz865760f2019-10-07 19:29:44 +020013 rename(share=f)
14 g <- ggplot(data = df, mapping = aes(x = year, y = share, colour = Variant, fill = Variant)) +
Marc Kupietz0de631d2019-10-07 10:10:18 +020015 geom_freq_by_year_ci() +
Marc Kupietz827a3c12019-09-18 22:09:33 +020016 ggtitle(paste0(alternatives, collapse = " vs. ")) +
17 xlab("TIME") +
Marc Kupietz0de631d2019-10-07 10:10:18 +020018 ylab(sprintf("Observed frequency ratio"))
Marc Kupietz865760f2019-10-07 19:29:44 +020019 ppp <- RKorAPClient::ggplotly(g)
Marc Kupietz827a3c12019-09-18 22:09:33 +020020 print(ppp)
21 df
22}
23
24df <- alternativesOverTime(c('so "genannte.?"', '"sogenannte.?"'), (1995:2018))