Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 1 | library(shiny) |
| 2 | library(shinythemes) |
| 3 | library(highcharter) |
| 4 | library(RKorAPClient) |
| 5 | library(tidyverse) |
Rainer Perkuhn | 0f5d195 | 2023-12-06 10:59:12 +0100 | [diff] [blame^] | 6 | |
| 7 | #library(devtools) |
| 8 | #install_git("https://korap.ids-mannheim.de/gerrit/IDS-Mannheim/idsThemeR") |
| 9 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 10 | library(idsThemeR) |
| 11 | |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 12 | corpus=c("", "referTo ratskorpus-2023-1", "referTo drukola.20180909.1b_words") |
| 13 | |
| 14 | vcFromString <- function(string) { |
| 15 | string %>% |
| 16 | str_split_1(" *[;] *") %>% |
| 17 | str_replace("^(.+)$", "(\\1) & ") %>% |
| 18 | str_replace_all(" +", " ") |
| 19 | } |
| 20 | |
Marc Kupietz | e52fef3 | 2023-11-22 20:33:24 +0100 | [diff] [blame] | 21 | hc_add_log_linear_toggle <- function(hc, index=50) { |
| 22 | hc_add_series(hc, name="[toggle log/linear]", legendIndex=index, visible=TRUE, type="spline", color="white") %>% |
| 23 | |
| 24 | hc_plotOptions(spline = list( |
| 25 | events = list(legendItemClick = JS(" |
| 26 | function() { |
| 27 | var conall = $(this.chart.container).parents('.hc-link-legend').find('div.highchart'); |
| 28 | for(var i = 0; i < conall.length; i++) { |
| 29 | var hc = $(conall[i]).highcharts(); |
| 30 | hc.yAxis[0].update({type: hc.yAxis[0].options['type']=='logarithmic' ? 'linear' : 'logarithmic'}); |
| 31 | } |
| 32 | } |
| 33 | ")) |
| 34 | )) |
| 35 | } |
| 36 | |
| 37 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 38 | ui <- fluidPage( |
| 39 | |
| 40 | theme = shinytheme("paper"), |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 41 | fluidRow(column(width = 12, textInput("cq", "Corpus definitions", paste0(corpus, collapse = ";"), width="100%"))), |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 42 | fluidRow( |
| 43 | column(width = 6, highchartOutput("country")), |
| 44 | column(width = 6, highchartOutput("domain")), |
| 45 | column(width = 6, highchartOutput("decade")), |
| 46 | column(width = 6, highchartOutput("texttype")), |
| 47 | ) %>% tagAppendAttributes(class="hc-link-legend") |
| 48 | |
| 49 | ) |
| 50 | |
| 51 | server <- function(input, output, session) { |
| 52 | |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 53 | # corpus <- str_split(input$corpus, ",") |
| 54 | # corpus <- corpus %>% str_replace("^(.+)$", "\\1 & ") |
| 55 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 56 | observe({ |
| 57 | query <- parseQueryString(session$clientData$url_search) |
| 58 | if (!is.null(query[['cq']])) { |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 59 | corpus = as.vector(unlist(query)) |
| 60 | updateTextInput(session, "cq", value = corpus) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 61 | } |
| 62 | }) |
| 63 | |
| 64 | sharelegend = JS('function(event){ |
| 65 | var vis = this.visible; |
| 66 | var conall = $(this.chart.container).parents(".hc-link-legend").find("div.highchart"); |
| 67 | for(var i = 0; i < conall.length; i++){ |
| 68 | var hc = $(conall[i]).highcharts(); |
| 69 | var series = hc.series[this.index]; |
| 70 | if(series){ |
| 71 | if(vis){ |
| 72 | series.hide(); |
| 73 | } else{ |
| 74 | series.show(); |
| 75 | } |
| 76 | } |
| 77 | } |
| 78 | return false; |
| 79 | }') |
| 80 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 81 | kco <- new("KorAPConnection", verbose=TRUE) |
| 82 | highchart <- function(...) { |
| 83 | highcharter::highchart() %>% |
| 84 | hc_add_theme(hc_theme_ids_light()) %>% |
| 85 | hc_add_onclick_korap_search() %>% |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 86 | hc_yAxis(type = "logarithmic") %>% |
| 87 | hc_legend(enabled=F) %>% |
Marc Kupietz | e52fef3 | 2023-11-22 20:33:24 +0100 | [diff] [blame] | 88 | hc_plotOptions(series = list(events = list(legendItemClick = sharelegend))) %>% |
| 89 | hc_add_log_linear_toggle() |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 90 | } |
| 91 | |
| 92 | prettifyCorpusNames <- function(df) { |
| 93 | rownames(df) = NULL |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 94 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 95 | df %>% |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 96 | mutate(corpus = corpus %>% str_replace("referTo *", "") %>% str_replace(" *& *$", "") %>% |
| 97 | str_replace_all("[)()]", "") %>% |
| 98 | str_replace("^ *$", "DeReKo-KorAP")) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 99 | |
| 100 | } |
| 101 | |
| 102 | |
| 103 | output$country <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 104 | corpus <- vcFromString(input$cq) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 105 | |
| 106 | countries <- c("DE", "AT", "CH", "IT", "BE", "LU") %>% sort() |
| 107 | |
| 108 | df <- expand_grid(corpus=corpus, country=countries) %>% |
| 109 | mutate(vc = sprintf("%spubPlaceKey=%s", corpus, country)) %>% |
| 110 | prettifyCorpusNames() %>% |
| 111 | bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) |
| 112 | |
| 113 | highchart() %>% |
| 114 | hc_add_series(type = "column", data = df, hcaes(x=country, y=tokens, group=corpus)) %>% |
| 115 | hc_xAxis(categories = df$country) %>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 116 | hc_legend(enabled=T) %>% |
| 117 | hc_title(text="Land") |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 118 | }) |
| 119 | |
| 120 | output$domain <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 121 | corpus <- vcFromString(input$cq) |
| 122 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 123 | topics <- |
| 124 | c( |
| 125 | "freizeit-unterhaltung", |
| 126 | "gesundheit-ernaehrung", |
| 127 | "kultur", |
| 128 | "politik", |
| 129 | "sport", |
| 130 | "staat-gesellschaft", |
| 131 | "technik-industrie", |
| 132 | "wissenschaft", |
| 133 | "wirtschaft-finanzen", |
| 134 | "natur-umwelt", |
| 135 | "fiktion" |
| 136 | ) |
| 137 | |
| 138 | df <- expand_grid(corpus=corpus, domain=topics) %>% |
| 139 | mutate(vc = sprintf("%stextClass=%s", corpus, domain)) %>% |
| 140 | bind_cols(corpusStats(kco, .$vc)%>% select(-vc)) %>% |
| 141 | prettifyCorpusNames() |
| 142 | |
| 143 | highchart() %>% |
| 144 | hc_add_series(type = "bar", data = df, hcaes(domain, tokens, group=corpus)) %>% |
| 145 | hc_xAxis(categories = df$domain %>% str_to_title(locale = "en") )%>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 146 | hc_title(text="Thema") |
| 147 | |
| 148 | }) |
| 149 | |
| 150 | output$decade <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 151 | corpus <- vcFromString(input$cq) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 152 | decades <- |
| 153 | c(1951, 1961, 1971, 1981, 1991, 2001, 2011, 2021) |
| 154 | decade_labels <- function(start_year) { |
| 155 | sprintf("%d-%d", start_year, start_year+9) |
| 156 | } |
| 157 | |
| 158 | df <- expand_grid(corpus=corpus, decade=decades) %>% |
| 159 | mutate(vc = sprintf("%spubDate since %d & pubDate until %d", corpus, decade, decade+9)) %>% |
| 160 | bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>% |
| 161 | mutate(decade = decade_labels(decade)) %>% |
| 162 | prettifyCorpusNames() |
| 163 | |
| 164 | highchart() %>% |
| 165 | hc_add_series(type = "bar", data = df, hcaes(decade, tokens, group=corpus)) %>% |
| 166 | hc_xAxis(categories = df$decade )%>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 167 | hc_title(text="Dekade") |
| 168 | }) |
| 169 | |
| 170 | output$texttype <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 171 | corpus <- vcFromString(input$cq) |
| 172 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 173 | texttypes <- |
Rainer Perkuhn | 74c98c8 | 2023-12-06 10:28:55 +0100 | [diff] [blame] | 174 | c("/[^:]*[Zz]eitung.*/", "/(Zeitschrift|Magazin).*/", "/Agenturmeldung.*/", "/Enzyklopädie.*/", "/.*Diskussion.*/", "/.*[Rr]oman([^z].*|$)/", "/Newsgroup.*/", "/Tagebuch.*/", "/.*Sachbuch.*/") |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 175 | |
| 176 | df <- expand_grid(corpus=corpus, texttype=texttypes) %>% |
| 177 | mutate(vc = sprintf("%stextType=%s", corpus, texttype)) %>% |
| 178 | bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>% |
| 179 | prettifyCorpusNames() |
| 180 | |
| 181 | hc <- highchart() %>% |
| 182 | hc_add_series(type = "bar", data = df, hcaes(texttype, tokens, group=corpus)) %>% |
Marc Kupietz | 660bff8 | 2023-12-05 14:45:53 +0100 | [diff] [blame] | 183 | hc_xAxis(categories = df$texttype %>% |
| 184 | str_replace_all("Zz", "Z") %>% |
| 185 | str_replace_all("[/.*)():^\\[\\]]", "") %>% str_replace_all("\\|", "/")) %>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 186 | hc_title(text="Texttyp") |
| 187 | hc |
| 188 | }) |
| 189 | |
| 190 | } |
| 191 | |
| 192 | shinyApp(ui, server) |