blob: 07019bddd50879629a391771cad03e420adf083b [file] [log] [blame]
Marc Kupietz827a3c12019-09-18 22:09:33 +02001#!/usr/bin/env Rscript
2#
Marc Kupietz1242a5e2019-10-05 18:28:01 +02003# Plot proportions of alternative expressions or spellings variants over time
Marc Kupietz827a3c12019-09-18 22:09:33 +02004#
5library(RKorAPClient)
6library(ggplot2)
Marc Kupietz827a3c12019-09-18 22:09:33 +02007
8alternativesOverTime <- function(alternatives, years, kco = new("KorAPConnection", verbose=TRUE)) {
Marc Kupietz1242a5e2019-10-05 18:28:01 +02009 df <- expand_grid(Variant = alternatives, year = years) %>%
Marc Kupietz71d6e052019-11-22 18:42:10 +010010 cbind(frequencyQuery(kco, .$Variant, sprintf("textType = /Zeit.*/ & pubDate in %d", .$year), as.alternatives=TRUE)) %>%
Marc Kupietz865760f2019-10-07 19:29:44 +020011 rename(share=f)
12 g <- ggplot(data = df, mapping = aes(x = year, y = share, colour = Variant, fill = Variant)) +
Marc Kupietz0de631d2019-10-07 10:10:18 +020013 geom_freq_by_year_ci() +
Marc Kupietz827a3c12019-09-18 22:09:33 +020014 ggtitle(paste0(alternatives, collapse = " vs. ")) +
15 xlab("TIME") +
Marc Kupietz0de631d2019-10-07 10:10:18 +020016 ylab(sprintf("Observed frequency ratio"))
Marc Kupietz5fb892e2021-03-05 08:18:25 +010017 print(g)
Marc Kupietz827a3c12019-09-18 22:09:33 +020018 df
19}
20
21df <- alternativesOverTime(c('so "genannte.?"', '"sogenannte.?"'), (1995:2018))