blob: 94541f95d108f9ea1b8c1a72857db9d4f58e7f84 [file] [log] [blame]
library(shiny)
library(shinythemes)
library(highcharter)
library(RKorAPClient)
library(tidyverse)
library(idsThemeR)
corpus=c("", "referTo ratskorpus-2023-1", "referTo drukola.20180909.1b_words")
vcFromString <- function(string) {
string %>%
str_split_1(" *[;] *") %>%
str_replace("^(.+)$", "(\\1) & ") %>%
str_replace_all(" +", " ")
}
hc_add_log_linear_toggle <- function(hc, index=50) {
hc_add_series(hc, name="[toggle log/linear]", legendIndex=index, visible=TRUE, type="spline", color="white") %>%
hc_plotOptions(spline = list(
events = list(legendItemClick = JS("
function() {
var conall = $(this.chart.container).parents('.hc-link-legend').find('div.highchart');
for(var i = 0; i < conall.length; i++) {
var hc = $(conall[i]).highcharts();
hc.yAxis[0].update({type: hc.yAxis[0].options['type']=='logarithmic' ? 'linear' : 'logarithmic'});
}
}
"))
))
}
ui <- fluidPage(
theme = shinytheme("paper"),
fluidRow(column(width = 12, textInput("cq", "Corpus definitions", paste0(corpus, collapse = ";"), width="100%"))),
fluidRow(
column(width = 6, highchartOutput("country")),
column(width = 6, highchartOutput("domain")),
column(width = 6, highchartOutput("decade")),
column(width = 6, highchartOutput("texttype")),
) %>% tagAppendAttributes(class="hc-link-legend")
)
server <- function(input, output, session) {
# corpus <- str_split(input$corpus, ",")
# corpus <- corpus %>% str_replace("^(.+)$", "\\1 & ")
observe({
query <- parseQueryString(session$clientData$url_search)
if (!is.null(query[['cq']])) {
corpus = as.vector(unlist(query))
updateTextInput(session, "cq", value = corpus)
}
})
sharelegend = JS('function(event){
var vis = this.visible;
var conall = $(this.chart.container).parents(".hc-link-legend").find("div.highchart");
for(var i = 0; i < conall.length; i++){
var hc = $(conall[i]).highcharts();
var series = hc.series[this.index];
if(series){
if(vis){
series.hide();
} else{
series.show();
}
}
}
return false;
}')
kco <- new("KorAPConnection", verbose=TRUE)
highchart <- function(...) {
highcharter::highchart() %>%
hc_add_theme(hc_theme_ids_light()) %>%
hc_add_onclick_korap_search() %>%
hc_yAxis(type = "logarithmic") %>%
hc_legend(enabled=F) %>%
hc_plotOptions(series = list(events = list(legendItemClick = sharelegend))) %>%
hc_add_log_linear_toggle()
}
prettifyCorpusNames <- function(df) {
rownames(df) = NULL
df %>%
mutate(corpus = corpus %>% str_replace("referTo *", "") %>% str_replace(" *& *$", "") %>%
str_replace_all("[)()]", "") %>%
str_replace("^ *$", "DeReKo-KorAP"))
}
pureCorpus <- corpus %>% str_replace(" *& *$", "")
dfPureCorpus <- corpusStats(kco, pureCorpus)
dfPureCorpus$corpus <- dfPureCorpus$vc
dfPureCorpus <- prettifyCorpusNames(dfPureCorpus)
output$country <- renderHighchart({
corpus <- vcFromString(input$cq)
countries <- c("DE", "AT", "CH", "IT", "BE", "LU") %>% sort()
df <- expand_grid(corpus=corpus, country=countries) %>%
mutate(vc = sprintf("%spubPlaceKey=%s", corpus, country)) %>%
prettifyCorpusNames() %>%
bind_cols(corpusStats(kco, .$vc) %>% select(-vc))
dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
dfAssigned$country <- "NE/NA"
dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
df <- bind_rows(df, dfAssigned)
df <- df[order(df$corpus),]
highchart() %>%
hc_add_series(type = "column", data = df, hcaes(x=country, y=tokens, group=corpus)) %>%
hc_xAxis(categories = df$country) %>%
hc_legend(enabled=T) %>%
hc_title(text="Land")
})
output$domain <- renderHighchart({
corpus <- vcFromString(input$cq)
topics <-
c(
"freizeit-unterhaltung",
"gesundheit-ernaehrung",
"kultur",
"politik",
"sport",
"staat-gesellschaft",
"technik-industrie",
"wissenschaft",
"wirtschaft-finanzen",
"natur-umwelt",
"fiktion"
)
index <- 1
NASTRING <- paste0("textClass != ", topics[index])
while (index < length(topics)) {
index <-index+1
NASTRING <- paste0(NASTRING, " & textClass != ", topics[index])
}
df <- expand_grid(corpus=corpus, domain=topics) %>%
mutate(vc = sprintf("%stextClass=%s", corpus, domain)) %>%
bind_cols(corpusStats(kco, .$vc)%>% select(-vc)) %>%
prettifyCorpusNames()
dfNotAssigned <-
corpusStats(kco, vc=sprintf("%s%s", corpus, NASTRING)) %>%
prettifyCorpusNames()
dfNotAssigned$domain <- "NE/NA"
df <- bind_rows(df, dfNotAssigned)
df <- df[order(df$corpus),]
highchart() %>%
hc_add_series(type = "bar", data = df, hcaes(domain, tokens, group=corpus)) %>%
hc_xAxis(categories = df$domain %>% tools::toTitleCase() %>% str_replace_all("ae", "ä") )%>%
hc_title(text="Thema")
})
output$decade <- renderHighchart({
corpus <- vcFromString(input$cq)
decades <-
c(1951, 1961, 1971, 1981, 1991, 2001, 2011, 2021)
decade_labels <- function(start_year) {
sprintf("%d-%d", start_year, start_year+9)
}
df <- expand_grid(corpus=corpus, decade=decades) %>%
mutate(vc = sprintf("%spubDate since %d & pubDate until %d", corpus, decade, decade+9)) %>%
bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>%
mutate(decade = decade_labels(decade)) %>%
prettifyCorpusNames()
dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
dfAssigned$decade <- "NE/NA"
dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
df <- bind_rows(df, dfAssigned)
df <- df[order(df$corpus),]
highchart() %>%
hc_add_series(type = "bar", data = df, hcaes(decade, tokens, group=corpus)) %>%
hc_xAxis(categories = df$decade )%>%
hc_title(text="Dekade")
})
output$texttype <- renderHighchart({
corpus <- vcFromString(input$cq)
texttypes <-
c("/[^:]*[Zz]eitung.*/", "/(Zeitschrift|Magazin).*/", "/Agenturmeldung.*/", "/Enzyklopädie.*/", "/.*Diskussion.*/",
"/.*[Rr]oman([^z].*)?/", "/Tagebuch.*/", "/.*Sachbuch.*/", "/Protokoll.*/", "/Chat/",
"/.*[Bb]ericht.*/", "/.*Abhandlung.*/")
df <- expand_grid(corpus=corpus, texttype=texttypes) %>%
mutate(vc = sprintf("%stextType=%s", corpus, texttype)) %>%
bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>%
prettifyCorpusNames()
dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
dfAssigned$texttype <- "NE|NA"
dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
print (dfAssigned)
dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
print (dfAssigned)
df <- bind_rows(df, dfAssigned)
df <- df[order(df$corpus),]
hc <- highchart() %>%
hc_add_series(type = "bar", data = df, hcaes(texttype, tokens, group=corpus)) %>%
hc_xAxis(categories = df$texttype %>%
str_replace_all("Zz", "Z") %>%
str_replace_all("Rr", "R") %>%
str_replace_all("Bb", "B") %>%
str_replace_all("z]", "") %>%
str_replace_all("[/.*)():^\\[\\]\\?]", "") %>%
str_replace_all("\\|", "/")) %>%
hc_title(text="Texttyp")
hc
})
}
shinyApp(ui, server)