Add mosaicplot demo
Change-Id: I43ae73b97ac2f6989ec1111a53302878aca72045
diff --git a/demo/00Index b/demo/00Index
index 7fe8aff..b954748 100644
--- a/demo/00Index
+++ b/demo/00Index
@@ -3,3 +3,4 @@
conditionsOverTime Plot frequency of query expressions over time under different conditions
alternativesOverTime Plot proportion of alternative spellings/variants over time
regional Map plot regional frequencies of query expression
+mosaicplot Visualize frequencies of alternative query terms in relation to other variables
diff --git a/demo/mosaicplot.R b/demo/mosaicplot.R
new file mode 100644
index 0000000..d0d16ae
--- /dev/null
+++ b/demo/mosaicplot.R
@@ -0,0 +1,44 @@
+#!/usr/bin/env Rscript
+#
+# Visualize frequencies of alternative query terms in relation to other variables
+# specified in virtual corpus definitions.
+#
+library(RKorAPClient)
+library(vcd)
+library(tibble)
+library(dplyr)
+library(PTXQC)
+
+queryStringToLabel <- function(data) {
+ leftCommon = lcpCount(data)
+ while (leftCommon > 0 && grepl("[[:alpha:]]", substring(data[1], leftCommon, leftCommon))) {
+ leftCommon <- leftCommon - 1
+ }
+ rightCommon = lcsCount(data)
+ while (rightCommon > 0 && grepl("[[:alpha:]]", substring(data[1], rightCommon, rightCommon))) {
+ rightCommon <- rightCommon - 1
+ }
+ substring(data, leftCommon + 1, nchar(data) - rightCommon)
+}
+
+mosaicplot <- function(query, vc, kco = new("KorAPConnection", verbose = TRUE)) {
+ frequencyQuery(
+ query = query,
+ vc = vc,
+ kco,
+ expand = TRUE,
+ as.alternatives = TRUE
+ ) %>%
+ mutate(alternative = queryStringToLabel(query), condition = queryStringToLabel(vc)) %>%
+ { . ->> queryResult } %>%
+ { xtabs(totalResults ~ condition + alternative, .)} %>%
+ vcd::mosaic(shade = TRUE) # , labeling = labeling_border(rot_labels = c(45,0,0,0), just_labels = c("left", "center", "center", "right")))
+ df
+}
+queryResult <- mosaicplot(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"), c("textDomain=Wirtschaft", "textDomain=Kultur", "textDomain=Sport"))
+#mosaicplot(c("Asylbewerber", "Asylwerber"), c("pubPlaceKey=DE", "pubPlaceKey=AT"))
+#mosaicplot(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"), c("pubPlaceKey=DE", "pubPlaceKey=AT"))
+#mosaicplot(c("Samstag", "Sonnabend"), c("pubPlace=Hamburg", "pubPlace=Berlin"))
+#mosaicplot(c("Tomaten", "Paradeiser"), c("pubPlaceKey=DE", "pubPlaceKey=AT"))
+#mosaicplot(c("Samstag", "Sonnabend"), c("pubPlace=Hamburg", "pubPlace=Berlin", 'pubPlaceKey=AT'))
+