Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 1 | library(shiny) |
| 2 | library(shinythemes) |
| 3 | library(highcharter) |
| 4 | library(RKorAPClient) |
| 5 | library(tidyverse) |
| 6 | library(idsThemeR) |
| 7 | |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 8 | corpus=c("", "referTo ratskorpus-2023-1", "referTo drukola.20180909.1b_words") |
| 9 | |
| 10 | vcFromString <- function(string) { |
| 11 | string %>% |
| 12 | str_split_1(" *[;] *") %>% |
| 13 | str_replace("^(.+)$", "(\\1) & ") %>% |
| 14 | str_replace_all(" +", " ") |
| 15 | } |
| 16 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 17 | ui <- fluidPage( |
| 18 | |
| 19 | theme = shinytheme("paper"), |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 20 | fluidRow(column(width = 12, textInput("cq", "Corpus definitions", paste0(corpus, collapse = ";"), width="100%"))), |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 21 | fluidRow( |
| 22 | column(width = 6, highchartOutput("country")), |
| 23 | column(width = 6, highchartOutput("domain")), |
| 24 | column(width = 6, highchartOutput("decade")), |
| 25 | column(width = 6, highchartOutput("texttype")), |
| 26 | ) %>% tagAppendAttributes(class="hc-link-legend") |
| 27 | |
| 28 | ) |
| 29 | |
| 30 | server <- function(input, output, session) { |
| 31 | |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 32 | # corpus <- str_split(input$corpus, ",") |
| 33 | # corpus <- corpus %>% str_replace("^(.+)$", "\\1 & ") |
| 34 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 35 | observe({ |
| 36 | query <- parseQueryString(session$clientData$url_search) |
| 37 | if (!is.null(query[['cq']])) { |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 38 | corpus = as.vector(unlist(query)) |
| 39 | updateTextInput(session, "cq", value = corpus) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 40 | } |
| 41 | }) |
| 42 | |
| 43 | sharelegend = JS('function(event){ |
| 44 | var vis = this.visible; |
| 45 | var conall = $(this.chart.container).parents(".hc-link-legend").find("div.highchart"); |
| 46 | for(var i = 0; i < conall.length; i++){ |
| 47 | var hc = $(conall[i]).highcharts(); |
| 48 | var series = hc.series[this.index]; |
| 49 | if(series){ |
| 50 | if(vis){ |
| 51 | series.hide(); |
| 52 | } else{ |
| 53 | series.show(); |
| 54 | } |
| 55 | } |
| 56 | } |
| 57 | return false; |
| 58 | }') |
| 59 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 60 | kco <- new("KorAPConnection", verbose=TRUE) |
| 61 | highchart <- function(...) { |
| 62 | highcharter::highchart() %>% |
| 63 | hc_add_theme(hc_theme_ids_light()) %>% |
| 64 | hc_add_onclick_korap_search() %>% |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 65 | hc_yAxis(type = "logarithmic") %>% |
| 66 | hc_legend(enabled=F) %>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 67 | hc_plotOptions(series = list(events = list(legendItemClick = sharelegend))) |
| 68 | } |
| 69 | |
| 70 | prettifyCorpusNames <- function(df) { |
| 71 | rownames(df) = NULL |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 72 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 73 | df %>% |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 74 | mutate(corpus = corpus %>% str_replace("referTo *", "") %>% str_replace(" *& *$", "") %>% |
| 75 | str_replace_all("[)()]", "") %>% |
| 76 | str_replace("^ *$", "DeReKo-KorAP")) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 77 | |
| 78 | } |
| 79 | |
| 80 | |
| 81 | output$country <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 82 | corpus <- vcFromString(input$cq) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 83 | |
| 84 | countries <- c("DE", "AT", "CH", "IT", "BE", "LU") %>% sort() |
| 85 | |
| 86 | df <- expand_grid(corpus=corpus, country=countries) %>% |
| 87 | mutate(vc = sprintf("%spubPlaceKey=%s", corpus, country)) %>% |
| 88 | prettifyCorpusNames() %>% |
| 89 | bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) |
| 90 | |
| 91 | highchart() %>% |
| 92 | hc_add_series(type = "column", data = df, hcaes(x=country, y=tokens, group=corpus)) %>% |
| 93 | hc_xAxis(categories = df$country) %>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 94 | hc_legend(enabled=T) %>% |
| 95 | hc_title(text="Land") |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 96 | }) |
| 97 | |
| 98 | output$domain <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 99 | corpus <- vcFromString(input$cq) |
| 100 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 101 | topics <- |
| 102 | c( |
| 103 | "freizeit-unterhaltung", |
| 104 | "gesundheit-ernaehrung", |
| 105 | "kultur", |
| 106 | "politik", |
| 107 | "sport", |
| 108 | "staat-gesellschaft", |
| 109 | "technik-industrie", |
| 110 | "wissenschaft", |
| 111 | "wirtschaft-finanzen", |
| 112 | "natur-umwelt", |
| 113 | "fiktion" |
| 114 | ) |
| 115 | |
| 116 | df <- expand_grid(corpus=corpus, domain=topics) %>% |
| 117 | mutate(vc = sprintf("%stextClass=%s", corpus, domain)) %>% |
| 118 | bind_cols(corpusStats(kco, .$vc)%>% select(-vc)) %>% |
| 119 | prettifyCorpusNames() |
| 120 | |
| 121 | highchart() %>% |
| 122 | hc_add_series(type = "bar", data = df, hcaes(domain, tokens, group=corpus)) %>% |
| 123 | hc_xAxis(categories = df$domain %>% str_to_title(locale = "en") )%>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 124 | hc_title(text="Thema") |
| 125 | |
| 126 | }) |
| 127 | |
| 128 | output$decade <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 129 | corpus <- vcFromString(input$cq) |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 130 | decades <- |
| 131 | c(1951, 1961, 1971, 1981, 1991, 2001, 2011, 2021) |
| 132 | decade_labels <- function(start_year) { |
| 133 | sprintf("%d-%d", start_year, start_year+9) |
| 134 | } |
| 135 | |
| 136 | df <- expand_grid(corpus=corpus, decade=decades) %>% |
| 137 | mutate(vc = sprintf("%spubDate since %d & pubDate until %d", corpus, decade, decade+9)) %>% |
| 138 | bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>% |
| 139 | mutate(decade = decade_labels(decade)) %>% |
| 140 | prettifyCorpusNames() |
| 141 | |
| 142 | highchart() %>% |
| 143 | hc_add_series(type = "bar", data = df, hcaes(decade, tokens, group=corpus)) %>% |
| 144 | hc_xAxis(categories = df$decade )%>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 145 | hc_title(text="Dekade") |
| 146 | }) |
| 147 | |
| 148 | output$texttype <- renderHighchart({ |
Marc Kupietz | f6bb6cb | 2023-11-18 17:17:50 +0100 | [diff] [blame] | 149 | corpus <- vcFromString(input$cq) |
| 150 | |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 151 | texttypes <- |
| 152 | c("/Zeitung.*/", "/(Zeitschrift|Magazin).*/", "/Agenturmeldung.*/", "/Enzyklopädie.*/", "/.*Diskussion.*/", "/Roman.*/", "/Newsgroup.*/", "/Tagebuch.*/", "/Sachbuch.*/") |
| 153 | |
| 154 | df <- expand_grid(corpus=corpus, texttype=texttypes) %>% |
| 155 | mutate(vc = sprintf("%stextType=%s", corpus, texttype)) %>% |
| 156 | bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>% |
| 157 | prettifyCorpusNames() |
| 158 | |
| 159 | hc <- highchart() %>% |
| 160 | hc_add_series(type = "bar", data = df, hcaes(texttype, tokens, group=corpus)) %>% |
| 161 | hc_xAxis(categories = df$texttype %>% str_replace_all("[/.*)()]", "") %>% str_replace_all("\\|", "/")) %>% |
Marc Kupietz | 6ac6507 | 2023-11-17 20:31:39 +0100 | [diff] [blame] | 162 | hc_title(text="Texttyp") |
| 163 | hc |
| 164 | }) |
| 165 | |
| 166 | } |
| 167 | |
| 168 | shinyApp(ui, server) |