Tidy up and plotlify frequenciesOverTimeDemo
Change-Id: I68030283668b3df0bce0c95e8f28a48db8cf7d77
diff --git a/demo/frequenciesOverTime.R b/demo/frequenciesOverTime.R
index 475d2eb..56b576f 100644
--- a/demo/frequenciesOverTime.R
+++ b/demo/frequenciesOverTime.R
@@ -4,29 +4,19 @@
#
library(RKorAPClient)
library(ggplot2)
+library(plotly)
-freqPerYear <- function(query, con = new("KorAPConnection", verbose = TRUE)) {
- vc <- "pubDate since 2000 & pubDate until 2018 & textType = /Zeit.*/"
- q <- corpusQuery(con, query = query, vc=vc)
- q <- fetchAll(q)
- tokensPerYear <- function(year) {
- return(corpusStats(con, sprintf("%s & pubDate in %s", vc, year))@tokens)
- }
- df <- as.data.frame(table(as.numeric(format(q@collectedMatches$pubDate,"%Y")), dnn="year"),
- stringsAsFactors = FALSE)
- df <- merge(data.frame(year=min(df$year):max(df$year)), df, all = TRUE)
- df[is.na(df$Freq),]$Freq <- 0
- df$total <- sapply(df$year, tokensPerYear)
- df$freq <- df$Freq / df$total
- df$ci <- t(sapply(Map(prop.test, df$Freq, df$total), "[[","conf.int"))
- g <- ggplot(data = df, aes(x = year, y = freq, group=1)) +
- geom_ribbon(aes(ymin=ci[, 1], ymax=ci[, 2]), alpha=.3) +
- geom_point() +
- geom_line() +
+freqPerYear <- function(query, kco = new("KorAPConnection", verbose = TRUE)) {
+ g <- data.frame(year = 2000:2018) %>%
+ cbind(frequencyQuery(kco, query, sprintf("pubDate in %d", .$year))) %>%
+ { . ->> df } %>%
+ ipm() %>%
+ ggplot(aes(year, ipm)) +
+ geom_freq_by_year_ci() +
xlab("TIME") +
- ylab(sprintf("Observed frequency of \u201c%s\u201d", query)) +
- theme(axis.text.x = element_text(angle = 45, hjust = 1))
- print(g)
+ ylab(sprintf("Observed frequency/million of \u201c%s\u201d", query))
+ p <- RKorAPClient::ggplotly(g)
+ print(p)
df
}
#df <- freqPerYear("Car-Bikini")