blob: 443fbe41a3aa9e76c0a5249d09b20e7e8930dcc5 [file] [log] [blame]
Marc Kupietz827a3c12019-09-18 22:09:33 +02001#!/usr/bin/env Rscript
2#
3# Plot frequency of alternative expressions or spellings variants over time
4#
5library(RKorAPClient)
6library(ggplot2)
Marc Kupietz69cc54a2019-09-30 12:06:54 +02007library(tidyr)
8library(dplyr)
Marc Kupietz827a3c12019-09-18 22:09:33 +02009library(plotly)
10library(htmlwidgets)
11
12alternativesOverTime <- function(alternatives, years, kco = new("KorAPConnection", verbose=TRUE)) {
Marc Kupietz827a3c12019-09-18 22:09:33 +020013 vc = "textType = /Zeit.*/ & pubDate in"
Marc Kupietz69cc54a2019-09-30 12:06:54 +020014 df <- data.frame(matrix(ncol = length(alternatives), nrow = length(years))) %>%
15 setNames(alternatives) %>%
16 mutate(year = years) %>%
17 pivot_longer(cols = alternatives) %>%
18 rowwise %>% mutate(value = corpusQuery(kco, query=name, vc=paste(vc, year))@totalResults) %>%
19 pivot_wider(id_cols= year, names_from = name) %>%
20 mutate(total = rowSums(.[alternatives])) %>%
21 pivot_longer(cols = alternatives) %>%
22 mutate(share = value / total) %>%
23 rowwise %>% mutate(url = corpusQuery(kco, query=name, vc=paste(vc, year))@webUIRequestUrl) %>%
24 rename(Variant = name)
25 df$ci <- t(sapply(Map(prop.test, df$value, df$total), "[[","conf.int"))
Marc Kupietz827a3c12019-09-18 22:09:33 +020026 g <- ggplot(data = df, mapping = aes(x = year, y = share, color=Variant, fill=Variant)) +
27 geom_ribbon(aes(ymin=ci[, 1], ymax=ci[, 2], color=Variant, fill=Variant), alpha=.3, linetype=0) +
28 geom_line() +
29 geom_point() +
30 ggtitle(paste0(alternatives, collapse = " vs. ")) +
31 xlab("TIME") +
32 ylab(sprintf("Observed frequency ratio")) +
33 theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_continuous(breaks=unique(df$year))
34 pp <- ggplotly(g, tooltip = c("x", "y"))
35 for (i in 1:length(alternatives)) {
36 vdata <- df[df$Variant==alternatives[i],]
37 pp$x$data[[2+i]]$customdata <- vdata$url
Marc Kupietz69cc54a2019-09-30 12:06:54 +020038 pp$x$data[[2+i]]$text <- sprintf("%s<br />absolute: %d / %d", pp$x$data[[2+i]]$text, vdata$value, vdata$total)
Marc Kupietz827a3c12019-09-18 22:09:33 +020039 }
40 ppp <- onRender(pp, "function(el, x) { el.on('plotly_click', function(d) { var url=d.points[0].customdata; window.open(url, 'korap') })}")
41 print(ppp)
42 df
43}
44
45df <- alternativesOverTime(c('so "genannte.?"', '"sogenannte.?"'), (1995:2018))