Add some demos for comparisons by country of publication
Change-Id: I609285947eead8b85877e0f507c7efe00791508b
diff --git a/demo/00Index b/demo/00Index
index ad41293..d2148bb 100644
--- a/demo/00Index
+++ b/demo/00Index
@@ -1,12 +1,15 @@
-frequenciesOverTime Plot frequency of query expressions over time
-frequenciesOverDomains Box plot frequency of query expressions per topic domain
-conditionsOverTime Plot frequency of query expressions over time under different conditions
-alternativesOverTime Plot proportion of alternative spellings/variants over time
-regional Map plot regional frequencies of query expression
-mosaicplot Visualize frequencies of alternative query terms in relation to other variables
-shiny-frequency-curves Web application that plots frequency curves with highcharts and shiny
-writtenVsSpoken Compare frequencies in written vs. spoken corpora
-displayKwics Display query results as KWICs via html
-light-verb-construction-ca Collocation analysis to identify light verb constructions matching the pattern "in NN setzen", with result rendered as HTML DataTable
-highcharter-example Visualize frequencies of optionally alternative terms over time with interactive HTML and JavaScript elements using the package highcharter as wrapper for Highcharts
-recursiveCA Show result dataframe of recursvie collocation analysis as pretty HTML tableb y Knitr via Markdown.
+frequenciesOverTime Plot frequency of query expressions over time
+frequenciesOverDomains Box plot frequency of query expressions per topic domain
+conditionsOverTime Plot frequency of query expressions over time under different conditions
+alternativesOverTime Plot proportion of alternative spellings/variants over time
+regional Map plot regional frequencies of query expression
+mosaicplot Visualize frequencies of alternative query terms in relation to other variables
+shiny-frequency-curves Web application that plots frequency curves with highcharts and shiny
+writtenVsSpoken Compare frequencies in written vs. spoken corpora
+displayKwics Display query results as KWICs via html
+light-verb-construction-ca Collocation analysis to identify light verb constructions matching the pattern "in NN setzen", with result rendered as HTML DataTable
+highcharter-example Visualize frequencies of optionally alternative terms over time with interactive HTML and JavaScript elements using the package highcharter as wrapper for Highcharts
+recursiveCA Show result dataframe of recursvie collocation analysis as pretty HTML tableb y Knitr via Markdown.
+frequency_by_country_ggplot Plot frequencies depending on country of publication using ggplot2.
+frequency_by_country_highcharts Plot frequencies depending on country of publication using highcharter.
+collocation_score_by_country Plot collocation scores depending on country of publication using ggplot2.
diff --git a/demo/collocation_score_by_country.R b/demo/collocation_score_by_country.R
new file mode 100644
index 0000000..9d6dc69
--- /dev/null
+++ b/demo/collocation_score_by_country.R
@@ -0,0 +1,21 @@
+library(RKorAPClient)
+library(tidyverse)
+
+NODE <- '[tt/l=Ei]' # see https://github.com/KorAP/RKorAPClient/issues/8
+COLLOCATES <- c('[tt/l=pellen]', '[tt/l=sch\u00e4len]') # Demos may only contain ASCII characters
+COUNTRIES <- c("AT", "BE", "CH", "DE", "IT", "LU")
+VC <- "textType=/Zeit.*/ & pubPlaceKey=%s"
+
+df <- expand_grid(node = NODE, collocate = COLLOCATES, country = COUNTRIES) %>%
+ mutate(vc = sprintf(VC, country))
+
+g <- new("KorAPConnection", verbose=TRUE) %>%
+ collocationScoreQuery(df$node, df$collocate, df$vc, smoothingConstant = 0) %>%
+ bind_cols(df %>% select(country)) %>%
+ ggplot(aes(x = country, y = logDice, label = sprintf("(%d)", O), fill = collocate)) +
+ geom_col(position="dodge") +
+ geom_text(position = position_dodge(width = 0.9), vjust=1.5) +
+ labs(title = sprintf("Collocates of '%s' by country of publication.", NODE),
+ caption = "(absolute cooccurrence frequencies in parentheses)")
+
+print(g)
diff --git a/demo/frequency_by_country_ggplot.R b/demo/frequency_by_country_ggplot.R
new file mode 100644
index 0000000..514e8cf
--- /dev/null
+++ b/demo/frequency_by_country_ggplot.R
@@ -0,0 +1,18 @@
+library(RKorAPClient)
+library(ggplot2)
+
+query <- "[tt/l=verunfallen]" # search TreeTagger lemma annotations
+countries <- c("AT", "BE", "CH", "DE", "IT", "LU")
+
+vcs <- sprintf("textType=/Zeit.*/ & pubPlaceKey=%s", countries) # limit virtual corpus to newspapers and magazines
+
+g <- new("KorAPConnection", verbose=TRUE) %>%
+ frequencyQuery(query, vc=vcs) %>%
+ ipm() %>%
+ mutate(Land = countries) %>%
+ ggplot(aes(x = Land, y = ipm, ymin = conf.low, ymax = conf.high)) +
+ geom_col() +
+ geom_errorbar(width = .3, alpha = .3) +
+ ggtitle(sprintf("Relative frequency of '%s'", query))
+
+print(g)
diff --git a/demo/frequency_by_country_highcharts.R b/demo/frequency_by_country_highcharts.R
new file mode 100644
index 0000000..c04ea7d
--- /dev/null
+++ b/demo/frequency_by_country_highcharts.R
@@ -0,0 +1,46 @@
+library(RKorAPClient)
+library(highcharter)
+library(tidyverse)
+
+QUERIES <- c("[tt/l=verunfallen]", "Sonnabend") # search in treetagger lemma annotations
+COUNTRIES <- c("AT", "BE", "CH", "DE", "IT", "LU")
+
+VCS <- sprintf("textType=/Zeit.*/ & pubPlaceKey=%s", COUNTRIES) # limit virtual corpus to newspapers and magazines
+
+
+df <- new("KorAPConnection", verbose=TRUE) %>%
+ frequencyQuery(QUERIES, vc=VCS) %>%
+ ipm() %>%
+ mutate(country = rep(COUNTRIES, length(QUERIES)))
+
+
+hc_add_series_with_errorbar <- function(hc, data) {
+ for (g in unique(data$country)) {
+ df <- data %>% filter(country == g)
+ hc <-
+ hc %>%
+ hc_add_series(type = "column", data = df, hcaes(group = country, x = query, y = ipm)) %>%
+ hc_add_series(type = "errorbar",
+ data = df,
+ stemWidth = 1,
+ whiskerWidth = 1,
+ whiskerLength = 10, hcaes(low = conf.low, high = conf.high)
+ )
+ }
+ hc
+}
+
+hc <- highchart() %>%
+ hc_add_theme(hc_theme_smpl()) %>%
+ hc_yAxis(title=list(text = "IPM"), type = "logarithmic") %>%
+ hc_add_series_with_errorbar(data = df) %>%
+ hc_add_onclick_korap_search() %>%
+ hc_xAxis(categories = QUERIES) %>%
+ hc_legend(enabled = TRUE) %>%
+ hc_title(text = "Relative frequency by country of publication") %>%
+ hc_caption(text= "Click on a bar to launch the respectively corresponding KorAP query.")
+
+
+print(hc)
+
+