Merge branch 'rainer' into 'master'
Added computation of "others", displayed as NE/NA (for not existent/not...
See merge request KorAP/CorpusCompositionAnalyzer!5
diff --git a/shiny/app.R b/shiny/app.R
index f807658..94541f9 100644
--- a/shiny/app.R
+++ b/shiny/app.R
@@ -95,6 +95,10 @@
}
+ pureCorpus <- corpus %>% str_replace(" *& *$", "")
+ dfPureCorpus <- corpusStats(kco, pureCorpus)
+ dfPureCorpus$corpus <- dfPureCorpus$vc
+ dfPureCorpus <- prettifyCorpusNames(dfPureCorpus)
output$country <- renderHighchart({
corpus <- vcFromString(input$cq)
@@ -106,6 +110,13 @@
prettifyCorpusNames() %>%
bind_cols(corpusStats(kco, .$vc) %>% select(-vc))
+ dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
+ dfAssigned$country <- "NE/NA"
+ dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
+ dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
+ df <- bind_rows(df, dfAssigned)
+ df <- df[order(df$corpus),]
+
highchart() %>%
hc_add_series(type = "column", data = df, hcaes(x=country, y=tokens, group=corpus)) %>%
hc_xAxis(categories = df$country) %>%
@@ -131,14 +142,30 @@
"fiktion"
)
+ index <- 1
+ NASTRING <- paste0("textClass != ", topics[index])
+ while (index < length(topics)) {
+ index <-index+1
+ NASTRING <- paste0(NASTRING, " & textClass != ", topics[index])
+ }
+
df <- expand_grid(corpus=corpus, domain=topics) %>%
mutate(vc = sprintf("%stextClass=%s", corpus, domain)) %>%
bind_cols(corpusStats(kco, .$vc)%>% select(-vc)) %>%
prettifyCorpusNames()
+ dfNotAssigned <-
+ corpusStats(kco, vc=sprintf("%s%s", corpus, NASTRING)) %>%
+ prettifyCorpusNames()
+
+ dfNotAssigned$domain <- "NE/NA"
+
+ df <- bind_rows(df, dfNotAssigned)
+ df <- df[order(df$corpus),]
+
highchart() %>%
hc_add_series(type = "bar", data = df, hcaes(domain, tokens, group=corpus)) %>%
- hc_xAxis(categories = df$domain %>% str_to_title(locale = "en") )%>%
+ hc_xAxis(categories = df$domain %>% tools::toTitleCase() %>% str_replace_all("ae", "ä") )%>%
hc_title(text="Thema")
})
@@ -157,6 +184,13 @@
mutate(decade = decade_labels(decade)) %>%
prettifyCorpusNames()
+ dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
+ dfAssigned$decade <- "NE/NA"
+ dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
+ dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
+ df <- bind_rows(df, dfAssigned)
+ df <- df[order(df$corpus),]
+
highchart() %>%
hc_add_series(type = "bar", data = df, hcaes(decade, tokens, group=corpus)) %>%
hc_xAxis(categories = df$decade )%>%
@@ -168,7 +202,7 @@
texttypes <-
c("/[^:]*[Zz]eitung.*/", "/(Zeitschrift|Magazin).*/", "/Agenturmeldung.*/", "/Enzyklopädie.*/", "/.*Diskussion.*/",
- "/.*[Rr]oman([^z].*)?/", "/Newsgroup.*/", "/Tagebuch.*/", "/.*Sachbuch.*/", "/Protokoll.*/", "/Chat/",
+ "/.*[Rr]oman([^z].*)?/", "/Tagebuch.*/", "/.*Sachbuch.*/", "/Protokoll.*/", "/Chat/",
"/.*[Bb]ericht.*/", "/.*Abhandlung.*/")
df <- expand_grid(corpus=corpus, texttype=texttypes) %>%
@@ -176,6 +210,16 @@
bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>%
prettifyCorpusNames()
+ dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
+ dfAssigned$texttype <- "NE|NA"
+ dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
+ print (dfAssigned)
+
+ dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
+ print (dfAssigned)
+ df <- bind_rows(df, dfAssigned)
+ df <- df[order(df$corpus),]
+
hc <- highchart() %>%
hc_add_series(type = "bar", data = df, hcaes(texttype, tokens, group=corpus)) %>%
hc_xAxis(categories = df$texttype %>%