Adapt demos to vectorized methods
Change-Id: Ib7e8f24dd207e7464e08f03d3da702812235ff0a
diff --git a/demo/alternativesOverTime.R b/demo/alternativesOverTime.R
index 443fbe4..63a058c 100755
--- a/demo/alternativesOverTime.R
+++ b/demo/alternativesOverTime.R
@@ -15,12 +15,12 @@
setNames(alternatives) %>%
mutate(year = years) %>%
pivot_longer(cols = alternatives) %>%
- rowwise %>% mutate(value = corpusQuery(kco, query=name, vc=paste(vc, year))@totalResults) %>%
+ mutate(value = corpusQuery(kco, query=name, vc=paste(vc, year))$totalResults) %>%
pivot_wider(id_cols= year, names_from = name) %>%
mutate(total = rowSums(.[alternatives])) %>%
pivot_longer(cols = alternatives) %>%
mutate(share = value / total) %>%
- rowwise %>% mutate(url = corpusQuery(kco, query=name, vc=paste(vc, year))@webUIRequestUrl) %>%
+ mutate(url = corpusQuery(kco, query=name, vc=paste(vc, year))$webUIRequestUrl) %>%
rename(Variant = name)
df$ci <- t(sapply(Map(prop.test, df$value, df$total), "[[","conf.int"))
g <- ggplot(data = df, mapping = aes(x = year, y = share, color=Variant, fill=Variant)) +
diff --git a/demo/conditionsOverTime.R b/demo/conditionsOverTime.R
index ff832e0..c06a056 100644
--- a/demo/conditionsOverTime.R
+++ b/demo/conditionsOverTime.R
@@ -10,29 +10,19 @@
#library(plotly)
conditionsOverTime <- function(query, conditions, years, kco = new("KorAPConnection", verbose = TRUE)) {
- df = data.frame(year=years)
- for (c in conditions) {
- df[c] <- sapply(df$year, function(y)
- corpusQuery(kco, query, vc=paste(c, "& pubDate in", y))@totalResults)
-
- }
- df <- melt(df, measure.vars = conditions, value.name = "afreq", variable.name = "condition")
- df$total <- apply(df[,c('year','condition')], 1, function(x) corpusStats(kco, vc=paste(x[2], "& pubDate in", x[1]))@tokens )
- df$ci <- t(sapply(Map(prop.test, df$afreq, df$total), "[[","conf.int"))
- df$freq <- df$afreq / df$total
- g <- ggplot(data = df, mapping = aes(x = year, y = freq, fill=condition, color=condition)) +
+ g <- expand_grid(condition = conditions, year = years) %>%
+ cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]", sprintf("%s & pubDate in %d", .$condition, .$year))) %>%
+ ggplot(aes(x = year, y = f, fill=condition, color=condition)) +
geom_point() +
geom_line() +
- geom_ribbon(aes(ymin=ci[, 1], ymax=ci[, 2], fill=condition, color=condition), alpha=.3, linetype=0) +
+ geom_ribbon(aes(ymin=conf.low, ymax=conf.high, fill=condition, color=condition), alpha=.3, linetype=0) +
xlab("TIME") +
labs(color="Virtual Corpus", fill="Virtual Corpus") +
ylab(sprintf("Observed frequency of \u201c%s\u201d", query)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_continuous(breaks=unique(df$year))
print(g)
# print(ggplotly(g, tooltip = c("x", "y")))
-
- df
}
-
-df <- conditionsOverTime("[tt/l=Heuschrecke]", c("textClass = /natur.*/", "textClass=/politik.*/", "textClass=/wirtschaft.*/"), (2002:2018))
#df <- conditionsOverTime("wegen dem [tt/p=NN]", c("textClass = /sport.*/", "textClass=/politik.*/", "textClass=/kultur.*/"), (1995:2005))
+
+conditionsOverTime("[tt/l=Heuschrecke]", c("textClass = /natur.*/", "textClass=/politik.*/", "textClass=/wirtschaft.*/"), (2002:2018))