Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 1 | #!/usr/bin/env Rscript |
| 2 | # |
Marc Kupietz | 1242a5e | 2019-10-05 18:28:01 +0200 | [diff] [blame] | 3 | # Plot proportions of alternative expressions or spellings variants over time |
Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 4 | # |
| 5 | library(RKorAPClient) |
| 6 | library(ggplot2) |
Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 7 | |
| 8 | alternativesOverTime <- function(alternatives, years, kco = new("KorAPConnection", verbose=TRUE)) { |
Marc Kupietz | 1242a5e | 2019-10-05 18:28:01 +0200 | [diff] [blame] | 9 | df <- expand_grid(Variant = alternatives, year = years) %>% |
Marc Kupietz | 71d6e05 | 2019-11-22 18:42:10 +0100 | [diff] [blame] | 10 | cbind(frequencyQuery(kco, .$Variant, sprintf("textType = /Zeit.*/ & pubDate in %d", .$year), as.alternatives=TRUE)) %>% |
Marc Kupietz | 865760f | 2019-10-07 19:29:44 +0200 | [diff] [blame] | 11 | rename(share=f) |
| 12 | g <- ggplot(data = df, mapping = aes(x = year, y = share, colour = Variant, fill = Variant)) + |
Marc Kupietz | 0de631d | 2019-10-07 10:10:18 +0200 | [diff] [blame] | 13 | geom_freq_by_year_ci() + |
Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 14 | ggtitle(paste0(alternatives, collapse = " vs. ")) + |
| 15 | xlab("TIME") + |
Marc Kupietz | 0de631d | 2019-10-07 10:10:18 +0200 | [diff] [blame] | 16 | ylab(sprintf("Observed frequency ratio")) |
Marc Kupietz | 5fb892e | 2021-03-05 08:18:25 +0100 | [diff] [blame] | 17 | print(g) |
Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 18 | df |
| 19 | } |
| 20 | |
| 21 | df <- alternativesOverTime(c('so "genannte.?"', '"sogenannte.?"'), (1995:2018)) |