Add some demos for comparisons by country of publication Change-Id: I609285947eead8b85877e0f507c7efe00791508b

commit: 2819fc7ce7204b05fca74b492a05c78ae78a60e2 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Thu Jun 30 12:26:27 2022 +0200
committer: Marc Kupietz <kupietz@ids-mannheim.de> Thu Jun 30 16:23:01 2022 +0200
tree: 2116c1f6707411518f9ad51bbeffa324fd3737aa
parent: 12d1074110dccd90401017160597561ba03b4cfa [diff]
diff --git a/demo/00Index b/demo/00Index
index ad41293..d2148bb 100644
--- a/demo/00Index
+++ b/demo/00Index

@@ -1,12 +1,15 @@
-frequenciesOverTime         Plot frequency of query expressions over time
-frequenciesOverDomains      Box plot frequency of query expressions per topic domain
-conditionsOverTime          Plot frequency of query expressions over time under different conditions
-alternativesOverTime        Plot proportion of alternative spellings/variants over time
-regional                    Map plot regional frequencies of query expression
-mosaicplot                  Visualize frequencies of alternative query terms in relation to other variables
-shiny-frequency-curves      Web application that plots frequency curves with highcharts and shiny
-writtenVsSpoken             Compare frequencies in written vs. spoken corpora
-displayKwics                Display query results as KWICs via html
-light-verb-construction-ca  Collocation analysis to identify light verb constructions matching the pattern "in NN setzen", with result rendered as HTML DataTable
-highcharter-example         Visualize frequencies of optionally alternative terms over time with interactive HTML and JavaScript elements using the package highcharter as wrapper for Highcharts
-recursiveCA                 Show result dataframe of recursvie collocation analysis as pretty HTML tableb y Knitr via Markdown.
+frequenciesOverTime           Plot frequency of query expressions over time
+frequenciesOverDomains          Box plot frequency of query expressions per topic domain
+conditionsOverTime              Plot frequency of query expressions over time under different conditions
+alternativesOverTime            Plot proportion of alternative spellings/variants over time
+regional                        Map plot regional frequencies of query expression
+mosaicplot                      Visualize frequencies of alternative query terms in relation to other variables
+shiny-frequency-curves          Web application that plots frequency curves with highcharts and shiny
+writtenVsSpoken                 Compare frequencies in written vs. spoken corpora
+displayKwics                    Display query results as KWICs via html
+light-verb-construction-ca      Collocation analysis to identify light verb constructions matching the pattern "in NN setzen", with result rendered as HTML DataTable
+highcharter-example             Visualize frequencies of optionally alternative terms over time with interactive HTML and JavaScript elements using the package highcharter as wrapper for Highcharts
+recursiveCA                     Show result dataframe of recursvie collocation analysis as pretty HTML tableb y Knitr via Markdown.
+frequency_by_country_ggplot     Plot frequencies depending on country of publication using ggplot2.
+frequency_by_country_highcharts Plot frequencies depending on country of publication using highcharter.
+collocation_score_by_country    Plot collocation scores depending on country of publication using ggplot2.

diff --git a/demo/collocation_score_by_country.R b/demo/collocation_score_by_country.R
new file mode 100644
index 0000000..9d6dc69
--- /dev/null
+++ b/demo/collocation_score_by_country.R

@@ -0,0 +1,21 @@
+library(RKorAPClient)
+library(tidyverse)
+
+NODE <- '[tt/l=Ei]' # see https://github.com/KorAP/RKorAPClient/issues/8
+COLLOCATES <- c('[tt/l=pellen]', '[tt/l=sch\u00e4len]') # Demos may only contain ASCII characters
+COUNTRIES <- c("AT", "BE", "CH", "DE", "IT", "LU")
+VC <- "textType=/Zeit.*/ & pubPlaceKey=%s"
+
+df <- expand_grid(node = NODE, collocate = COLLOCATES, country = COUNTRIES) %>%
+  mutate(vc = sprintf(VC, country))
+
+g <- new("KorAPConnection", verbose=TRUE) %>%
+  collocationScoreQuery(df$node, df$collocate, df$vc, smoothingConstant = 0) %>%
+  bind_cols(df %>% select(country)) %>%
+  ggplot(aes(x = country, y = logDice, label = sprintf("(%d)", O), fill = collocate))  +
+  geom_col(position="dodge") +
+  geom_text(position = position_dodge(width = 0.9), vjust=1.5) +
+  labs(title = sprintf("Collocates of '%s' by country of publication.", NODE),
+       caption = "(absolute cooccurrence frequencies in parentheses)")
+
+print(g)

diff --git a/demo/frequency_by_country_ggplot.R b/demo/frequency_by_country_ggplot.R
new file mode 100644
index 0000000..514e8cf
--- /dev/null
+++ b/demo/frequency_by_country_ggplot.R

@@ -0,0 +1,18 @@
+library(RKorAPClient)
+library(ggplot2)
+
+query <- "[tt/l=verunfallen]" # search TreeTagger lemma annotations
+countries <- c("AT", "BE", "CH", "DE", "IT", "LU")
+
+vcs <- sprintf("textType=/Zeit.*/ & pubPlaceKey=%s", countries) # limit virtual corpus to newspapers and magazines
+
+g <- new("KorAPConnection", verbose=TRUE) %>%
+  frequencyQuery(query, vc=vcs) %>%
+  ipm() %>%
+  mutate(Land = countries) %>%
+  ggplot(aes(x = Land, y = ipm, ymin = conf.low, ymax = conf.high))  +
+  geom_col() +
+  geom_errorbar(width = .3, alpha = .3) +
+  ggtitle(sprintf("Relative frequency of '%s'", query))
+
+print(g)

diff --git a/demo/frequency_by_country_highcharts.R b/demo/frequency_by_country_highcharts.R
new file mode 100644
index 0000000..c04ea7d
--- /dev/null
+++ b/demo/frequency_by_country_highcharts.R

@@ -0,0 +1,46 @@
+library(RKorAPClient)
+library(highcharter)
+library(tidyverse)
+
+QUERIES <- c("[tt/l=verunfallen]", "Sonnabend") # search in treetagger lemma annotations
+COUNTRIES <- c("AT", "BE", "CH", "DE", "IT", "LU")
+
+VCS <- sprintf("textType=/Zeit.*/ & pubPlaceKey=%s", COUNTRIES) # limit virtual corpus to newspapers and magazines
+
+
+df <- new("KorAPConnection", verbose=TRUE) %>%
+  frequencyQuery(QUERIES, vc=VCS) %>%
+  ipm() %>%
+  mutate(country = rep(COUNTRIES, length(QUERIES)))
+
+
+hc_add_series_with_errorbar <- function(hc, data) {
+  for (g in unique(data$country)) {
+    df <- data %>% filter(country == g)
+    hc <-
+      hc %>%
+      hc_add_series(type = "column", data = df, hcaes(group = country, x = query, y = ipm)) %>%
+      hc_add_series(type = "errorbar",
+                    data = df,
+                    stemWidth = 1,
+                    whiskerWidth = 1,
+                    whiskerLength = 10, hcaes(low = conf.low, high = conf.high)
+      )
+  }
+  hc
+}
+
+hc <- highchart() %>%
+  hc_add_theme(hc_theme_smpl()) %>%
+  hc_yAxis(title=list(text = "IPM"), type = "logarithmic") %>%
+  hc_add_series_with_errorbar(data = df) %>%
+  hc_add_onclick_korap_search() %>%
+  hc_xAxis(categories = QUERIES) %>%
+  hc_legend(enabled = TRUE) %>%
+  hc_title(text = "Relative frequency by country of publication") %>%
+  hc_caption(text= "Click on a bar to launch the respectively corresponding KorAP query.")
+
+
+print(hc)
+
+
commit	2819fc7ce7204b05fca74b492a05c78ae78a60e2	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Jun 30 12:26:27 2022 +0200
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Thu Jun 30 16:23:01 2022 +0200
tree	2116c1f6707411518f9ad51bbeffa324fd3737aa
parent	12d1074110dccd90401017160597561ba03b4cfa [diff]