Make separate examples proper R-style demos
Change-Id: I0ac284cfc1d0c508030c91189f260299680c1485
diff --git a/.Rbuildignore b/.Rbuildignore
index 4ca26d3..a918770 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,4 +1,3 @@
^.*\.Rproj$
^\.Rproj\.user$
^Readme.md
-^examples/
diff --git a/Readme.md b/Readme.md
index be4a30b..aeef143 100644
--- a/Readme.md
+++ b/Readme.md
@@ -27,9 +27,9 @@
kqo <- corpusQuery(new("KorAPConnection", verbose=TRUE), "Hello world")
fetchAll(kqo)
```
-## Examples
+## Demos
-More elaborate R scripts demonstrating the use of the package can be found in the [examples](examples) folder.
+More elaborate R scripts demonstrating the use of the package can be found in the [demo](demo) folder.
## Development and License
diff --git a/demo/00Index b/demo/00Index
new file mode 100644
index 0000000..7fe8aff
--- /dev/null
+++ b/demo/00Index
@@ -0,0 +1,5 @@
+frequenciesOverTime Plot frequency of query expressions over time
+frequenciesOverDomains Box plot frequency of query expressions per topic domain
+conditionsOverTime Plot frequency of query expressions over time under different conditions
+alternativesOverTime Plot proportion of alternative spellings/variants over time
+regional Map plot regional frequencies of query expression
diff --git a/examples/simple/alternativesOverTime.R b/demo/alternativesOverTime.R
similarity index 100%
rename from examples/simple/alternativesOverTime.R
rename to demo/alternativesOverTime.R
diff --git a/examples/simple/conditionsOverTime.R b/demo/conditionsOverTime.R
similarity index 95%
rename from examples/simple/conditionsOverTime.R
rename to demo/conditionsOverTime.R
index 12083d9..ff832e0 100644
--- a/examples/simple/conditionsOverTime.R
+++ b/demo/conditionsOverTime.R
@@ -26,7 +26,7 @@
geom_ribbon(aes(ymin=ci[, 1], ymax=ci[, 2], fill=condition, color=condition), alpha=.3, linetype=0) +
xlab("TIME") +
labs(color="Virtual Corpus", fill="Virtual Corpus") +
- ylab(sprintf("Observed frequency of “%s”", query)) +
+ ylab(sprintf("Observed frequency of \u201c%s\u201d", query)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_continuous(breaks=unique(df$year))
print(g)
# print(ggplotly(g, tooltip = c("x", "y")))
diff --git a/examples/geo/data/regions.rds b/demo/data/regions.rds
similarity index 100%
rename from examples/geo/data/regions.rds
rename to demo/data/regions.rds
Binary files differ
diff --git a/examples/simple/frequenciesOverDomains.R b/demo/frequenciesOverDomains.R
old mode 100644
new mode 100755
similarity index 93%
rename from examples/simple/frequenciesOverDomains.R
rename to demo/frequenciesOverDomains.R
index c87a086..4c5c529
--- a/examples/simple/frequenciesOverDomains.R
+++ b/demo/frequenciesOverDomains.R
@@ -21,7 +21,7 @@
g <- ggplot(data = df, mapping = aes(x = Domain, y = freq)) +
geom_col() +
geom_errorbar(aes(ymin=ci[, 1], ymax=ci[, 2]), width=.5, alpha=.5) +
- ylab(sprintf("Observed frequency of “%s”", query)) +
+ ylab(sprintf("Observed frequency of \u201c%s\u201d", query)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(g)
df
diff --git a/demo/frequenciesOverTime.R b/demo/frequenciesOverTime.R
new file mode 100644
index 0000000..475d2eb
--- /dev/null
+++ b/demo/frequenciesOverTime.R
@@ -0,0 +1,35 @@
+#!/usr/bin/env Rscript
+#
+# Plot frequency of query expressions over time
+#
+library(RKorAPClient)
+library(ggplot2)
+
+freqPerYear <- function(query, con = new("KorAPConnection", verbose = TRUE)) {
+ vc <- "pubDate since 2000 & pubDate until 2018 & textType = /Zeit.*/"
+ q <- corpusQuery(con, query = query, vc=vc)
+ q <- fetchAll(q)
+ tokensPerYear <- function(year) {
+ return(corpusStats(con, sprintf("%s & pubDate in %s", vc, year))@tokens)
+ }
+ df <- as.data.frame(table(as.numeric(format(q@collectedMatches$pubDate,"%Y")), dnn="year"),
+ stringsAsFactors = FALSE)
+ df <- merge(data.frame(year=min(df$year):max(df$year)), df, all = TRUE)
+ df[is.na(df$Freq),]$Freq <- 0
+ df$total <- sapply(df$year, tokensPerYear)
+ df$freq <- df$Freq / df$total
+ df$ci <- t(sapply(Map(prop.test, df$Freq, df$total), "[[","conf.int"))
+ g <- ggplot(data = df, aes(x = year, y = freq, group=1)) +
+ geom_ribbon(aes(ymin=ci[, 1], ymax=ci[, 2]), alpha=.3) +
+ geom_point() +
+ geom_line() +
+ xlab("TIME") +
+ ylab(sprintf("Observed frequency of \u201c%s\u201d", query)) +
+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
+ print(g)
+ df
+}
+#df <- freqPerYear("Car-Bikini")
+#df <- freqPerYear("[tt/p=ART & opennlp/p=ART] [tt/l=teilweise] [tt/p=NN]")
+df <- freqPerYear("Buschzulage")
+
diff --git a/examples/geo/regional.R b/demo/regional.R
old mode 100644
new mode 100755
similarity index 94%
rename from examples/geo/regional.R
rename to demo/regional.R
index 5eb091a..067e3e2
--- a/examples/geo/regional.R
+++ b/demo/regional.R
@@ -6,7 +6,8 @@
library(plotly)
library(htmlwidgets)
-mapfile <- "examples/geo/data/cache/map-v2.rds"
+devAskNewPage(ask = FALSE)
+mapfile <- "demo/data/cache/map-v2.rds"
fetchAndPrepareMap <- function(map, pick) {
cat("Downloading GADM map data for ", map, "\n")
@@ -34,7 +35,7 @@
map <- fetchMaps(c("DEU_1", "AUT_0", "CHE_0", "LUX_0", "BEL_3", "ITA_1", "LIE_0"), c(0, 0, 0, 0, 34, 17, 0))
geoDistrib <- function(query, kco = new("KorAPConnection", verbose=TRUE)) {
- regions <- readRDS("examples/geo/data/regions.rds")
+ regions <- readRDS("demo/data/regions.rds")
regions$freq <- NA
regions$url <- NA
plot <- NULL
@@ -84,7 +85,7 @@
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
coord_equal(ratio=1.5) +
- labs(title = sprintf("Regional distribution of “%s”", query))
+ labs(title = sprintf("Regional distribution of \u201c%s\u201d", query))
print(regionsPlot)
regionsPlot
}