blob: d0d16aef834fa4710c60141a8f4c14dc3517c5cf [file] [log] [blame]
Marc Kupietz83aee032019-11-22 18:45:25 +01001#!/usr/bin/env Rscript
2#
3# Visualize frequencies of alternative query terms in relation to other variables
4# specified in virtual corpus definitions.
5#
6library(RKorAPClient)
7library(vcd)
8library(tibble)
9library(dplyr)
10library(PTXQC)
11
12queryStringToLabel <- function(data) {
13 leftCommon = lcpCount(data)
14 while (leftCommon > 0 && grepl("[[:alpha:]]", substring(data[1], leftCommon, leftCommon))) {
15 leftCommon <- leftCommon - 1
16 }
17 rightCommon = lcsCount(data)
18 while (rightCommon > 0 && grepl("[[:alpha:]]", substring(data[1], rightCommon, rightCommon))) {
19 rightCommon <- rightCommon - 1
20 }
21 substring(data, leftCommon + 1, nchar(data) - rightCommon)
22}
23
24mosaicplot <- function(query, vc, kco = new("KorAPConnection", verbose = TRUE)) {
25 frequencyQuery(
26 query = query,
27 vc = vc,
28 kco,
29 expand = TRUE,
30 as.alternatives = TRUE
31 ) %>%
32 mutate(alternative = queryStringToLabel(query), condition = queryStringToLabel(vc)) %>%
33 { . ->> queryResult } %>%
34 { xtabs(totalResults ~ condition + alternative, .)} %>%
35 vcd::mosaic(shade = TRUE) # , labeling = labeling_border(rot_labels = c(45,0,0,0), just_labels = c("left", "center", "center", "right")))
36 df
37}
38queryResult <- mosaicplot(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"), c("textDomain=Wirtschaft", "textDomain=Kultur", "textDomain=Sport"))
39#mosaicplot(c("Asylbewerber", "Asylwerber"), c("pubPlaceKey=DE", "pubPlaceKey=AT"))
40#mosaicplot(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"), c("pubPlaceKey=DE", "pubPlaceKey=AT"))
41#mosaicplot(c("Samstag", "Sonnabend"), c("pubPlace=Hamburg", "pubPlace=Berlin"))
42#mosaicplot(c("Tomaten", "Paradeiser"), c("pubPlaceKey=DE", "pubPlaceKey=AT"))
43#mosaicplot(c("Samstag", "Sonnabend"), c("pubPlace=Hamburg", "pubPlace=Berlin", 'pubPlaceKey=AT'))
44