blob: dca55b3b2edf7bb4715209664eb157a365dc8371 [file] [log] [blame]
Marc Kupietz827a3c12019-09-18 22:09:33 +02001#!/usr/bin/env Rscript
2#
Marc Kupietz1242a5e2019-10-05 18:28:01 +02003# Plot proportions of alternative expressions or spellings variants over time
Marc Kupietz827a3c12019-09-18 22:09:33 +02004#
5library(RKorAPClient)
6library(ggplot2)
Marc Kupietz827a3c12019-09-18 22:09:33 +02007library(plotly)
8library(htmlwidgets)
9
10alternativesOverTime <- function(alternatives, years, kco = new("KorAPConnection", verbose=TRUE)) {
Marc Kupietz1242a5e2019-10-05 18:28:01 +020011 df <- expand_grid(Variant = alternatives, year = years) %>%
12 cbind(corpusQuery(kco, .$Variant, sprintf("textType = /Zeit.*/ & pubDate in %d", .$year))) %>%
13 group_by(year) %>% mutate(tokens = sum(totalResults)) %>%
Marc Kupietz865760f2019-10-07 19:29:44 +020014 ci() %>%
15 rename(share=f)
16 g <- ggplot(data = df, mapping = aes(x = year, y = share, colour = Variant, fill = Variant)) +
Marc Kupietz0de631d2019-10-07 10:10:18 +020017 geom_freq_by_year_ci() +
Marc Kupietz827a3c12019-09-18 22:09:33 +020018 ggtitle(paste0(alternatives, collapse = " vs. ")) +
19 xlab("TIME") +
Marc Kupietz0de631d2019-10-07 10:10:18 +020020 ylab(sprintf("Observed frequency ratio"))
Marc Kupietz865760f2019-10-07 19:29:44 +020021 ppp <- RKorAPClient::ggplotly(g)
Marc Kupietz827a3c12019-09-18 22:09:33 +020022 print(ppp)
23 df
24}
25
26df <- alternativesOverTime(c('so "genannte.?"', '"sogenannte.?"'), (1995:2018))