Build as docker container
Change-Id: Ied30167fbaf02fb7744e6295168c8c450687a4c6
diff --git a/shiny/app.R b/shiny/app.R
new file mode 100644
index 0000000..1755347
--- /dev/null
+++ b/shiny/app.R
@@ -0,0 +1,168 @@
+library(shiny)
+library(shinythemes)
+library(highcharter)
+library(RKorAPClient)
+library(tidyverse)
+library(idsThemeR)
+
+corpus=c("", "referTo ratskorpus-2023-1", "referTo drukola.20180909.1b_words")
+
+vcFromString <- function(string) {
+ string %>%
+ str_split_1(" *[;] *") %>%
+ str_replace("^(.+)$", "(\\1) & ") %>%
+ str_replace_all(" +", " ")
+}
+
+ui <- fluidPage(
+
+ theme = shinytheme("paper"),
+ fluidRow(column(width = 12, textInput("cq", "Corpus definitions", paste0(corpus, collapse = ";"), width="100%"))),
+ fluidRow(
+ column(width = 6, highchartOutput("country")),
+ column(width = 6, highchartOutput("domain")),
+ column(width = 6, highchartOutput("decade")),
+ column(width = 6, highchartOutput("texttype")),
+ ) %>% tagAppendAttributes(class="hc-link-legend")
+
+)
+
+server <- function(input, output, session) {
+
+# corpus <- str_split(input$corpus, ",")
+# corpus <- corpus %>% str_replace("^(.+)$", "\\1 & ")
+
+ observe({
+ query <- parseQueryString(session$clientData$url_search)
+ if (!is.null(query[['cq']])) {
+ corpus = as.vector(unlist(query))
+ updateTextInput(session, "cq", value = corpus)
+ }
+ })
+
+ sharelegend = JS('function(event){
+ var vis = this.visible;
+ var conall = $(this.chart.container).parents(".hc-link-legend").find("div.highchart");
+ for(var i = 0; i < conall.length; i++){
+ var hc = $(conall[i]).highcharts();
+ var series = hc.series[this.index];
+ if(series){
+ if(vis){
+ series.hide();
+ } else{
+ series.show();
+ }
+ }
+ }
+ return false;
+ }')
+
+ kco <- new("KorAPConnection", verbose=TRUE)
+ highchart <- function(...) {
+ highcharter::highchart() %>%
+ hc_add_theme(hc_theme_ids_light()) %>%
+ hc_add_onclick_korap_search() %>%
+ hc_yAxis(type = "logarithmic") %>%
+ hc_legend(enabled=F) %>%
+ hc_plotOptions(series = list(events = list(legendItemClick = sharelegend)))
+ }
+
+ prettifyCorpusNames <- function(df) {
+ rownames(df) = NULL
+
+ df %>%
+ mutate(corpus = corpus %>% str_replace("referTo *", "") %>% str_replace(" *& *$", "") %>%
+ str_replace_all("[)()]", "") %>%
+ str_replace("^ *$", "DeReKo-KorAP"))
+
+ }
+
+
+ output$country <- renderHighchart({
+ corpus <- vcFromString(input$cq)
+
+ countries <- c("DE", "AT", "CH", "IT", "BE", "LU") %>% sort()
+
+ df <- expand_grid(corpus=corpus, country=countries) %>%
+ mutate(vc = sprintf("%spubPlaceKey=%s", corpus, country)) %>%
+ prettifyCorpusNames() %>%
+ bind_cols(corpusStats(kco, .$vc) %>% select(-vc))
+
+ highchart() %>%
+ hc_add_series(type = "column", data = df, hcaes(x=country, y=tokens, group=corpus)) %>%
+ hc_xAxis(categories = df$country) %>%
+ hc_legend(enabled=T) %>%
+ hc_title(text="Land")
+ })
+
+ output$domain <- renderHighchart({
+ corpus <- vcFromString(input$cq)
+
+ topics <-
+ c(
+ "freizeit-unterhaltung",
+ "gesundheit-ernaehrung",
+ "kultur",
+ "politik",
+ "sport",
+ "staat-gesellschaft",
+ "technik-industrie",
+ "wissenschaft",
+ "wirtschaft-finanzen",
+ "natur-umwelt",
+ "fiktion"
+ )
+
+ df <- expand_grid(corpus=corpus, domain=topics) %>%
+ mutate(vc = sprintf("%stextClass=%s", corpus, domain)) %>%
+ bind_cols(corpusStats(kco, .$vc)%>% select(-vc)) %>%
+ prettifyCorpusNames()
+
+ highchart() %>%
+ hc_add_series(type = "bar", data = df, hcaes(domain, tokens, group=corpus)) %>%
+ hc_xAxis(categories = df$domain %>% str_to_title(locale = "en") )%>%
+ hc_title(text="Thema")
+
+ })
+
+ output$decade <- renderHighchart({
+ corpus <- vcFromString(input$cq)
+ decades <-
+ c(1951, 1961, 1971, 1981, 1991, 2001, 2011, 2021)
+ decade_labels <- function(start_year) {
+ sprintf("%d-%d", start_year, start_year+9)
+ }
+
+ df <- expand_grid(corpus=corpus, decade=decades) %>%
+ mutate(vc = sprintf("%spubDate since %d & pubDate until %d", corpus, decade, decade+9)) %>%
+ bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>%
+ mutate(decade = decade_labels(decade)) %>%
+ prettifyCorpusNames()
+
+ highchart() %>%
+ hc_add_series(type = "bar", data = df, hcaes(decade, tokens, group=corpus)) %>%
+ hc_xAxis(categories = df$decade )%>%
+ hc_title(text="Dekade")
+ })
+
+ output$texttype <- renderHighchart({
+ corpus <- vcFromString(input$cq)
+
+ texttypes <-
+ c("/Zeitung.*/", "/(Zeitschrift|Magazin).*/", "/Agenturmeldung.*/", "/Enzyklopädie.*/", "/.*Diskussion.*/", "/Roman.*/", "/Newsgroup.*/", "/Tagebuch.*/", "/Sachbuch.*/")
+
+ df <- expand_grid(corpus=corpus, texttype=texttypes) %>%
+ mutate(vc = sprintf("%stextType=%s", corpus, texttype)) %>%
+ bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>%
+ prettifyCorpusNames()
+
+ hc <- highchart() %>%
+ hc_add_series(type = "bar", data = df, hcaes(texttype, tokens, group=corpus)) %>%
+ hc_xAxis(categories = df$texttype %>% str_replace_all("[/.*)()]", "") %>% str_replace_all("\\|", "/")) %>%
+ hc_title(text="Texttyp")
+ hc
+ })
+
+}
+
+shinyApp(ui, server)