blob: 3f85f8e92aa7262d9e4a005517b04283d0cdcc7c [file] [log] [blame]
Marc Kupietz9d57d4b2019-09-25 20:02:37 +02001#!/usr/bin/env Rscript
2#
3# Plot frequency of an expressions under multiple conditions over time
4#
5#library(devtools)
6#install_git("https://korap.ids-mannheim.de/gerrit/KorAP/RKorAPClient", upgrade="never")
7library(RKorAPClient)
8library(ggplot2)
9library(reshape2)
10#library(plotly)
11
12conditionsOverTime <- function(query, conditions, years, kco = new("KorAPConnection", verbose = TRUE)) {
Marc Kupietz296e4932019-10-04 22:51:11 +020013 g <- expand_grid(condition = conditions, year = years) %>%
Marc Kupietz1b70c802019-10-05 18:22:43 +020014 cbind(frequencyQuery(kco, query, sprintf("%s & pubDate in %d", .$condition, .$year))) %>%
Marc Kupietz296e4932019-10-04 22:51:11 +020015 ggplot(aes(x = year, y = f, fill=condition, color=condition)) +
Marc Kupietz9d57d4b2019-09-25 20:02:37 +020016 geom_point() +
17 geom_line() +
Marc Kupietz296e4932019-10-04 22:51:11 +020018 geom_ribbon(aes(ymin=conf.low, ymax=conf.high, fill=condition, color=condition), alpha=.3, linetype=0) +
Marc Kupietz9d57d4b2019-09-25 20:02:37 +020019 xlab("TIME") +
20 labs(color="Virtual Corpus", fill="Virtual Corpus") +
Marc Kupietze457d992019-09-29 18:17:05 +020021 ylab(sprintf("Observed frequency of \u201c%s\u201d", query)) +
Marc Kupietz9d57d4b2019-09-25 20:02:37 +020022 theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_continuous(breaks=unique(df$year))
23 print(g)
24 # print(ggplotly(g, tooltip = c("x", "y")))
Marc Kupietz9d57d4b2019-09-25 20:02:37 +020025}
Marc Kupietz9d57d4b2019-09-25 20:02:37 +020026#df <- conditionsOverTime("wegen dem [tt/p=NN]", c("textClass = /sport.*/", "textClass=/politik.*/", "textClass=/kultur.*/"), (1995:2005))
Marc Kupietz296e4932019-10-04 22:51:11 +020027
28conditionsOverTime("[tt/l=Heuschrecke]", c("textClass = /natur.*/", "textClass=/politik.*/", "textClass=/wirtschaft.*/"), (2002:2018))