Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 1 | library(shiny) |
| 2 | library(shinythemes) |
| 3 | library(highcharter) |
| 4 | library(RKorAPClient) |
| 5 | library(tidyverse) |
| 6 | library(idsThemeR) |
| 7 | |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 8 | corpus=c("", "referTo ratskorpus-2023-1", "referTo drukola.20180909.1b_words") |
| 9 | |
| 10 | vcFromString <- function(string) { |
| 11 | string %>% |
| 12 | str_split_1(" *[;] *") %>% |
| 13 | str_replace("^(.+)$", "(\\1) & ") %>% |
| 14 | str_replace_all(" +", " ") |
| 15 | } |
| 16 | |
Marc Kupietz | e52fef3 | 2023-11-22 20:33:24 +0100 | [diff] [blame] | 17 | hc_add_log_linear_toggle <- function(hc, index=50) { |
| 18 | hc_add_series(hc, name="[toggle log/linear]", legendIndex=index, visible=TRUE, type="spline", color="white") %>% |
| 19 | |
| 20 | hc_plotOptions(spline = list( |
| 21 | events = list(legendItemClick = JS(" |
| 22 | function() { |
| 23 | var conall = $(this.chart.container).parents('.hc-link-legend').find('div.highchart'); |
| 24 | for(var i = 0; i < conall.length; i++) { |
| 25 | var hc = $(conall[i]).highcharts(); |
| 26 | hc.yAxis[0].update({type: hc.yAxis[0].options['type']=='logarithmic' ? 'linear' : 'logarithmic'}); |
| 27 | } |
| 28 | } |
| 29 | ")) |
| 30 | )) |
| 31 | } |
| 32 | |
| 33 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 34 | ui <- fluidPage( |
| 35 | |
| 36 | theme = shinytheme("paper"), |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 37 | fluidRow(column(width = 12, textInput("cq", "Corpus definitions", paste0(corpus, collapse = ";"), width="100%"))), |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 38 | fluidRow( |
| 39 | column(width = 6, highchartOutput("country")), |
| 40 | column(width = 6, highchartOutput("domain")), |
| 41 | column(width = 6, highchartOutput("decade")), |
| 42 | column(width = 6, highchartOutput("texttype")), |
| 43 | ) %>% tagAppendAttributes(class="hc-link-legend") |
| 44 | |
| 45 | ) |
| 46 | |
| 47 | server <- function(input, output, session) { |
| 48 | |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 49 | # corpus <- str_split(input$corpus, ",") |
| 50 | # corpus <- corpus %>% str_replace("^(.+)$", "\\1 & ") |
| 51 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 52 | observe({ |
| 53 | query <- parseQueryString(session$clientData$url_search) |
| 54 | if (!is.null(query[['cq']])) { |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 55 | corpus = as.vector(unlist(query)) |
| 56 | updateTextInput(session, "cq", value = corpus) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 57 | } |
| 58 | }) |
| 59 | |
| 60 | sharelegend = JS('function(event){ |
| 61 | var vis = this.visible; |
| 62 | var conall = $(this.chart.container).parents(".hc-link-legend").find("div.highchart"); |
| 63 | for(var i = 0; i < conall.length; i++){ |
| 64 | var hc = $(conall[i]).highcharts(); |
| 65 | var series = hc.series[this.index]; |
| 66 | if(series){ |
| 67 | if(vis){ |
| 68 | series.hide(); |
| 69 | } else{ |
| 70 | series.show(); |
| 71 | } |
| 72 | } |
| 73 | } |
| 74 | return false; |
| 75 | }') |
| 76 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 77 | kco <- new("KorAPConnection", verbose=TRUE) |
| 78 | highchart <- function(...) { |
| 79 | highcharter::highchart() %>% |
| 80 | hc_add_theme(hc_theme_ids_light()) %>% |
| 81 | hc_add_onclick_korap_search() %>% |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 82 | hc_yAxis(type = "logarithmic") %>% |
| 83 | hc_legend(enabled=F) %>% |
Marc Kupietz | e52fef3 | 2023-11-22 20:33:24 +0100 | [diff] [blame] | 84 | hc_plotOptions(series = list(events = list(legendItemClick = sharelegend))) %>% |
| 85 | hc_add_log_linear_toggle() |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 86 | } |
| 87 | |
| 88 | prettifyCorpusNames <- function(df) { |
| 89 | rownames(df) = NULL |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 90 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 91 | df %>% |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 92 | mutate(corpus = corpus %>% str_replace("referTo *", "") %>% str_replace(" *& *$", "") %>% |
| 93 | str_replace_all("[)()]", "") %>% |
| 94 | str_replace("^ *$", "DeReKo-KorAP")) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 95 | |
| 96 | } |
| 97 | |
| 98 | |
| 99 | output$country <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 100 | corpus <- vcFromString(input$cq) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 101 | |
| 102 | countries <- c("DE", "AT", "CH", "IT", "BE", "LU") %>% sort() |
| 103 | |
| 104 | df <- expand_grid(corpus=corpus, country=countries) %>% |
| 105 | mutate(vc = sprintf("%spubPlaceKey=%s", corpus, country)) %>% |
| 106 | prettifyCorpusNames() %>% |
| 107 | bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) |
| 108 | |
| 109 | highchart() %>% |
| 110 | hc_add_series(type = "column", data = df, hcaes(x=country, y=tokens, group=corpus)) %>% |
| 111 | hc_xAxis(categories = df$country) %>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 112 | hc_legend(enabled=T) %>% |
| 113 | hc_title(text="Land") |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 114 | }) |
| 115 | |
| 116 | output$domain <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 117 | corpus <- vcFromString(input$cq) |
| 118 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 119 | topics <- |
| 120 | c( |
| 121 | "freizeit-unterhaltung", |
| 122 | "gesundheit-ernaehrung", |
| 123 | "kultur", |
| 124 | "politik", |
| 125 | "sport", |
| 126 | "staat-gesellschaft", |
| 127 | "technik-industrie", |
| 128 | "wissenschaft", |
| 129 | "wirtschaft-finanzen", |
| 130 | "natur-umwelt", |
| 131 | "fiktion" |
| 132 | ) |
| 133 | |
| 134 | df <- expand_grid(corpus=corpus, domain=topics) %>% |
| 135 | mutate(vc = sprintf("%stextClass=%s", corpus, domain)) %>% |
| 136 | bind_cols(corpusStats(kco, .$vc)%>% select(-vc)) %>% |
| 137 | prettifyCorpusNames() |
| 138 | |
| 139 | highchart() %>% |
| 140 | hc_add_series(type = "bar", data = df, hcaes(domain, tokens, group=corpus)) %>% |
| 141 | hc_xAxis(categories = df$domain %>% str_to_title(locale = "en") )%>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 142 | hc_title(text="Thema") |
| 143 | |
| 144 | }) |
| 145 | |
| 146 | output$decade <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 147 | corpus <- vcFromString(input$cq) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 148 | decades <- |
| 149 | c(1951, 1961, 1971, 1981, 1991, 2001, 2011, 2021) |
| 150 | decade_labels <- function(start_year) { |
| 151 | sprintf("%d-%d", start_year, start_year+9) |
| 152 | } |
| 153 | |
| 154 | df <- expand_grid(corpus=corpus, decade=decades) %>% |
| 155 | mutate(vc = sprintf("%spubDate since %d & pubDate until %d", corpus, decade, decade+9)) %>% |
| 156 | bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>% |
| 157 | mutate(decade = decade_labels(decade)) %>% |
| 158 | prettifyCorpusNames() |
| 159 | |
| 160 | highchart() %>% |
| 161 | hc_add_series(type = "bar", data = df, hcaes(decade, tokens, group=corpus)) %>% |
| 162 | hc_xAxis(categories = df$decade )%>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 163 | hc_title(text="Dekade") |
| 164 | }) |
| 165 | |
| 166 | output$texttype <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 167 | corpus <- vcFromString(input$cq) |
| 168 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 169 | texttypes <- |
Rainer Perkuhn | 74c98c8 | 2023-12-06 10:28:55 +0100 | [diff] [blame^] | 170 | c("/[^:]*[Zz]eitung.*/", "/(Zeitschrift|Magazin).*/", "/Agenturmeldung.*/", "/Enzyklopädie.*/", "/.*Diskussion.*/", "/.*[Rr]oman([^z].*|$)/", "/Newsgroup.*/", "/Tagebuch.*/", "/.*Sachbuch.*/") |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 171 | |
| 172 | df <- expand_grid(corpus=corpus, texttype=texttypes) %>% |
| 173 | mutate(vc = sprintf("%stextType=%s", corpus, texttype)) %>% |
| 174 | bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>% |
| 175 | prettifyCorpusNames() |
| 176 | |
| 177 | hc <- highchart() %>% |
| 178 | hc_add_series(type = "bar", data = df, hcaes(texttype, tokens, group=corpus)) %>% |
Marc Kupietz | 660bff8 | 2023-12-05 14:45:53 +0100 | [diff] [blame] | 179 | hc_xAxis(categories = df$texttype %>% |
| 180 | str_replace_all("Zz", "Z") %>% |
| 181 | str_replace_all("[/.*)():^\\[\\]]", "") %>% str_replace_all("\\|", "/")) %>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 182 | hc_title(text="Texttyp") |
| 183 | hc |
| 184 | }) |
| 185 | |
| 186 | } |
| 187 | |
| 188 | shinyApp(ui, server) |