blob: 94541f95d108f9ea1b8c1a72857db9d4f58e7f84 [file] [log] [blame]
Marc Kupietz6ac65072023-11-17 20:31:39 +01001library(shiny)
2library(shinythemes)
3library(highcharter)
4library(RKorAPClient)
5library(tidyverse)
6library(idsThemeR)
7
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +01008corpus=c("", "referTo ratskorpus-2023-1", "referTo drukola.20180909.1b_words")
9
10vcFromString <- function(string) {
11 string %>%
12 str_split_1(" *[;] *") %>%
13 str_replace("^(.+)$", "(\\1) & ") %>%
14 str_replace_all(" +", " ")
15}
16
Marc Kupietze52fef32023-11-22 20:33:24 +010017hc_add_log_linear_toggle <- function(hc, index=50) {
18 hc_add_series(hc, name="[toggle log/linear]", legendIndex=index, visible=TRUE, type="spline", color="white") %>%
19
20 hc_plotOptions(spline = list(
21 events = list(legendItemClick = JS("
22 function() {
23 var conall = $(this.chart.container).parents('.hc-link-legend').find('div.highchart');
24 for(var i = 0; i < conall.length; i++) {
25 var hc = $(conall[i]).highcharts();
26 hc.yAxis[0].update({type: hc.yAxis[0].options['type']=='logarithmic' ? 'linear' : 'logarithmic'});
27 }
28 }
29 "))
30 ))
31}
32
33
Marc Kupietz6ac65072023-11-17 20:31:39 +010034ui <- fluidPage(
35
36 theme = shinytheme("paper"),
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +010037 fluidRow(column(width = 12, textInput("cq", "Corpus definitions", paste0(corpus, collapse = ";"), width="100%"))),
Marc Kupietz6ac65072023-11-17 20:31:39 +010038 fluidRow(
39 column(width = 6, highchartOutput("country")),
40 column(width = 6, highchartOutput("domain")),
41 column(width = 6, highchartOutput("decade")),
42 column(width = 6, highchartOutput("texttype")),
43 ) %>% tagAppendAttributes(class="hc-link-legend")
44
45)
46
47server <- function(input, output, session) {
48
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +010049# corpus <- str_split(input$corpus, ",")
50# corpus <- corpus %>% str_replace("^(.+)$", "\\1 & ")
51
Marc Kupietz6ac65072023-11-17 20:31:39 +010052 observe({
53 query <- parseQueryString(session$clientData$url_search)
54 if (!is.null(query[['cq']])) {
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +010055 corpus = as.vector(unlist(query))
56 updateTextInput(session, "cq", value = corpus)
Marc Kupietz6ac65072023-11-17 20:31:39 +010057 }
58 })
59
60 sharelegend = JS('function(event){
61 var vis = this.visible;
62 var conall = $(this.chart.container).parents(".hc-link-legend").find("div.highchart");
63 for(var i = 0; i < conall.length; i++){
64 var hc = $(conall[i]).highcharts();
65 var series = hc.series[this.index];
66 if(series){
67 if(vis){
68 series.hide();
69 } else{
70 series.show();
71 }
72 }
73 }
74 return false;
75 }')
76
Marc Kupietz6ac65072023-11-17 20:31:39 +010077 kco <- new("KorAPConnection", verbose=TRUE)
78 highchart <- function(...) {
79 highcharter::highchart() %>%
80 hc_add_theme(hc_theme_ids_light()) %>%
81 hc_add_onclick_korap_search() %>%
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +010082 hc_yAxis(type = "logarithmic") %>%
83 hc_legend(enabled=F) %>%
Marc Kupietze52fef32023-11-22 20:33:24 +010084 hc_plotOptions(series = list(events = list(legendItemClick = sharelegend))) %>%
85 hc_add_log_linear_toggle()
Marc Kupietz6ac65072023-11-17 20:31:39 +010086 }
87
88 prettifyCorpusNames <- function(df) {
89 rownames(df) = NULL
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +010090
Marc Kupietz6ac65072023-11-17 20:31:39 +010091 df %>%
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +010092 mutate(corpus = corpus %>% str_replace("referTo *", "") %>% str_replace(" *& *$", "") %>%
93 str_replace_all("[)()]", "") %>%
94 str_replace("^ *$", "DeReKo-KorAP"))
Marc Kupietz6ac65072023-11-17 20:31:39 +010095
96 }
97
Rainer Perkuhn91313852023-12-19 10:52:37 +010098 pureCorpus <- corpus %>% str_replace(" *& *$", "")
99 dfPureCorpus <- corpusStats(kco, pureCorpus)
100 dfPureCorpus$corpus <- dfPureCorpus$vc
101 dfPureCorpus <- prettifyCorpusNames(dfPureCorpus)
Marc Kupietz6ac65072023-11-17 20:31:39 +0100102
103 output$country <- renderHighchart({
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +0100104 corpus <- vcFromString(input$cq)
Marc Kupietz6ac65072023-11-17 20:31:39 +0100105
106 countries <- c("DE", "AT", "CH", "IT", "BE", "LU") %>% sort()
107
108 df <- expand_grid(corpus=corpus, country=countries) %>%
109 mutate(vc = sprintf("%spubPlaceKey=%s", corpus, country)) %>%
110 prettifyCorpusNames() %>%
111 bind_cols(corpusStats(kco, .$vc) %>% select(-vc))
112
Rainer Perkuhn91313852023-12-19 10:52:37 +0100113 dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
114 dfAssigned$country <- "NE/NA"
115 dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
116 dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
117 df <- bind_rows(df, dfAssigned)
118 df <- df[order(df$corpus),]
119
Marc Kupietz6ac65072023-11-17 20:31:39 +0100120 highchart() %>%
121 hc_add_series(type = "column", data = df, hcaes(x=country, y=tokens, group=corpus)) %>%
122 hc_xAxis(categories = df$country) %>%
Marc Kupietz6ac65072023-11-17 20:31:39 +0100123 hc_legend(enabled=T) %>%
124 hc_title(text="Land")
Marc Kupietz6ac65072023-11-17 20:31:39 +0100125 })
126
127 output$domain <- renderHighchart({
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +0100128 corpus <- vcFromString(input$cq)
129
Marc Kupietz6ac65072023-11-17 20:31:39 +0100130 topics <-
131 c(
132 "freizeit-unterhaltung",
133 "gesundheit-ernaehrung",
134 "kultur",
135 "politik",
136 "sport",
137 "staat-gesellschaft",
138 "technik-industrie",
139 "wissenschaft",
140 "wirtschaft-finanzen",
141 "natur-umwelt",
142 "fiktion"
143 )
144
Rainer Perkuhn91313852023-12-19 10:52:37 +0100145 index <- 1
146 NASTRING <- paste0("textClass != ", topics[index])
147 while (index < length(topics)) {
148 index <-index+1
149 NASTRING <- paste0(NASTRING, " & textClass != ", topics[index])
150 }
151
Marc Kupietz6ac65072023-11-17 20:31:39 +0100152 df <- expand_grid(corpus=corpus, domain=topics) %>%
153 mutate(vc = sprintf("%stextClass=%s", corpus, domain)) %>%
154 bind_cols(corpusStats(kco, .$vc)%>% select(-vc)) %>%
155 prettifyCorpusNames()
156
Rainer Perkuhn91313852023-12-19 10:52:37 +0100157 dfNotAssigned <-
158 corpusStats(kco, vc=sprintf("%s%s", corpus, NASTRING)) %>%
159 prettifyCorpusNames()
160
161 dfNotAssigned$domain <- "NE/NA"
162
163 df <- bind_rows(df, dfNotAssigned)
164 df <- df[order(df$corpus),]
165
Marc Kupietz6ac65072023-11-17 20:31:39 +0100166 highchart() %>%
167 hc_add_series(type = "bar", data = df, hcaes(domain, tokens, group=corpus)) %>%
Rainer Perkuhn9b9b2002023-12-19 10:56:43 +0100168 hc_xAxis(categories = df$domain %>% tools::toTitleCase() %>% str_replace_all("ae", "ä") )%>%
Marc Kupietz6ac65072023-11-17 20:31:39 +0100169 hc_title(text="Thema")
170
171 })
172
173 output$decade <- renderHighchart({
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +0100174 corpus <- vcFromString(input$cq)
Marc Kupietz6ac65072023-11-17 20:31:39 +0100175 decades <-
176 c(1951, 1961, 1971, 1981, 1991, 2001, 2011, 2021)
177 decade_labels <- function(start_year) {
178 sprintf("%d-%d", start_year, start_year+9)
179 }
180
181 df <- expand_grid(corpus=corpus, decade=decades) %>%
182 mutate(vc = sprintf("%spubDate since %d & pubDate until %d", corpus, decade, decade+9)) %>%
183 bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>%
184 mutate(decade = decade_labels(decade)) %>%
185 prettifyCorpusNames()
186
Rainer Perkuhn91313852023-12-19 10:52:37 +0100187 dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
188 dfAssigned$decade <- "NE/NA"
189 dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
190 dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
191 df <- bind_rows(df, dfAssigned)
192 df <- df[order(df$corpus),]
193
Marc Kupietz6ac65072023-11-17 20:31:39 +0100194 highchart() %>%
195 hc_add_series(type = "bar", data = df, hcaes(decade, tokens, group=corpus)) %>%
196 hc_xAxis(categories = df$decade )%>%
Marc Kupietz6ac65072023-11-17 20:31:39 +0100197 hc_title(text="Dekade")
198 })
199
200 output$texttype <- renderHighchart({
Marc Kupietzf6bb6cb2023-11-18 17:17:50 +0100201 corpus <- vcFromString(input$cq)
202
Marc Kupietz6ac65072023-11-17 20:31:39 +0100203 texttypes <-
Rainer Perkuhn89f71382023-12-06 15:19:15 +0100204 c("/[^:]*[Zz]eitung.*/", "/(Zeitschrift|Magazin).*/", "/Agenturmeldung.*/", "/Enzyklopädie.*/", "/.*Diskussion.*/",
Rainer Perkuhn9dd0da82023-12-19 10:36:14 +0100205 "/.*[Rr]oman([^z].*)?/", "/Tagebuch.*/", "/.*Sachbuch.*/", "/Protokoll.*/", "/Chat/",
Rainer Perkuhnd143cd02023-12-13 16:19:27 +0100206 "/.*[Bb]ericht.*/", "/.*Abhandlung.*/")
Marc Kupietz6ac65072023-11-17 20:31:39 +0100207
208 df <- expand_grid(corpus=corpus, texttype=texttypes) %>%
209 mutate(vc = sprintf("%stextType=%s", corpus, texttype)) %>%
210 bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>%
211 prettifyCorpusNames()
212
Rainer Perkuhn91313852023-12-19 10:52:37 +0100213 dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
214 dfAssigned$texttype <- "NE|NA"
215 dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
216 print (dfAssigned)
217
218 dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
219 print (dfAssigned)
220 df <- bind_rows(df, dfAssigned)
221 df <- df[order(df$corpus),]
222
Marc Kupietz6ac65072023-11-17 20:31:39 +0100223 hc <- highchart() %>%
224 hc_add_series(type = "bar", data = df, hcaes(texttype, tokens, group=corpus)) %>%
Marc Kupietz660bff82023-12-05 14:45:53 +0100225 hc_xAxis(categories = df$texttype %>%
226 str_replace_all("Zz", "Z") %>%
Rainer Perkuhn28bbc1c2023-12-06 11:28:15 +0100227 str_replace_all("Rr", "R") %>%
Rainer Perkuhn89f71382023-12-06 15:19:15 +0100228 str_replace_all("Bb", "B") %>%
Rainer Perkuhn28bbc1c2023-12-06 11:28:15 +0100229 str_replace_all("z]", "") %>%
Rainer Perkuhn89f71382023-12-06 15:19:15 +0100230 str_replace_all("[/.*)():^\\[\\]\\?]", "") %>%
Rainer Perkuhn28bbc1c2023-12-06 11:28:15 +0100231 str_replace_all("\\|", "/")) %>%
Marc Kupietz6ac65072023-11-17 20:31:39 +0100232 hc_title(text="Texttyp")
233 hc
234 })
235
236}
237
238shinyApp(ui, server)