Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 1 | #!/usr/bin/env Rscript |
| 2 | # |
Marc Kupietz | 1242a5e | 2019-10-05 18:28:01 +0200 | [diff] [blame] | 3 | # Plot proportions of alternative expressions or spellings variants over time |
Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 4 | # |
| 5 | library(RKorAPClient) |
| 6 | library(ggplot2) |
Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 7 | library(plotly) |
| 8 | library(htmlwidgets) |
| 9 | |
| 10 | alternativesOverTime <- function(alternatives, years, kco = new("KorAPConnection", verbose=TRUE)) { |
Marc Kupietz | 1242a5e | 2019-10-05 18:28:01 +0200 | [diff] [blame] | 11 | df <- expand_grid(Variant = alternatives, year = years) %>% |
| 12 | cbind(corpusQuery(kco, .$Variant, sprintf("textType = /Zeit.*/ & pubDate in %d", .$year))) %>% |
| 13 | group_by(year) %>% mutate(tokens = sum(totalResults)) %>% |
| 14 | ci() |
Marc Kupietz | 0de631d | 2019-10-07 10:10:18 +0200 | [diff] [blame] | 15 | g <- ggplot(data = df, mapping = aes(x = year, y = f, color = Variant, fill = Variant, ymin = conf.low, ymax = conf.high)) + |
| 16 | geom_freq_by_year_ci() + |
Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 17 | ggtitle(paste0(alternatives, collapse = " vs. ")) + |
| 18 | xlab("TIME") + |
Marc Kupietz | 0de631d | 2019-10-07 10:10:18 +0200 | [diff] [blame] | 19 | ylab(sprintf("Observed frequency ratio")) |
Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 20 | pp <- ggplotly(g, tooltip = c("x", "y")) |
| 21 | for (i in 1:length(alternatives)) { |
Marc Kupietz | 1242a5e | 2019-10-05 18:28:01 +0200 | [diff] [blame] | 22 | vdata <- df[df$Variant == alternatives[i],] |
| 23 | pp$x$data[[2+i]]$customdata <- vdata$webUIRequestUrl |
| 24 | pp$x$data[[2+i]]$text <- sprintf("%s<br />absolute: %d / %d", pp$x$data[[2+i]]$text, vdata$totalResults, vdata$tokens) |
Marc Kupietz | 827a3c1 | 2019-09-18 22:09:33 +0200 | [diff] [blame] | 25 | } |
| 26 | ppp <- onRender(pp, "function(el, x) { el.on('plotly_click', function(d) { var url=d.points[0].customdata; window.open(url, 'korap') })}") |
| 27 | print(ppp) |
| 28 | df |
| 29 | } |
| 30 | |
| 31 | df <- alternativesOverTime(c('so "genannte.?"', '"sogenannte.?"'), (1995:2018)) |