Merge branch 'rainer' into 'master'

Added computation of "others", displayed as NE/NA (for not existent/not...

See merge request KorAP/CorpusCompositionAnalyzer!5
diff --git a/shiny/app.R b/shiny/app.R
index f807658..94541f9 100644
--- a/shiny/app.R
+++ b/shiny/app.R
@@ -95,6 +95,10 @@
 
   }
 
+  pureCorpus <- corpus %>% str_replace(" *& *$", "")
+  dfPureCorpus <- corpusStats(kco, pureCorpus)
+  dfPureCorpus$corpus <- dfPureCorpus$vc
+  dfPureCorpus <- prettifyCorpusNames(dfPureCorpus)
 
   output$country <- renderHighchart({
     corpus <- vcFromString(input$cq)
@@ -106,6 +110,13 @@
       prettifyCorpusNames() %>%
       bind_cols(corpusStats(kco, .$vc) %>% select(-vc))
 
+    dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
+    dfAssigned$country <- "NE/NA"
+    dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
+    dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
+    df <- bind_rows(df, dfAssigned)
+    df <- df[order(df$corpus),]
+
     highchart() %>%
       hc_add_series(type = "column", data = df, hcaes(x=country, y=tokens, group=corpus)) %>%
       hc_xAxis(categories = df$country) %>%
@@ -131,14 +142,30 @@
         "fiktion"
       )
 
+    index <- 1
+    NASTRING <- paste0("textClass != ", topics[index])
+    while (index < length(topics)) {
+      index <-index+1
+      NASTRING <- paste0(NASTRING, " & textClass != ", topics[index])
+    }
+
     df <- expand_grid(corpus=corpus, domain=topics) %>%
       mutate(vc = sprintf("%stextClass=%s", corpus, domain)) %>%
       bind_cols(corpusStats(kco, .$vc)%>% select(-vc)) %>%
       prettifyCorpusNames()
 
+    dfNotAssigned <-
+      corpusStats(kco, vc=sprintf("%s%s", corpus, NASTRING)) %>%
+      prettifyCorpusNames()
+
+    dfNotAssigned$domain <- "NE/NA"
+
+    df <- bind_rows(df, dfNotAssigned)
+    df <- df[order(df$corpus),]
+
     highchart() %>%
       hc_add_series(type = "bar", data = df, hcaes(domain, tokens, group=corpus)) %>%
-      hc_xAxis(categories = df$domain %>% str_to_title(locale = "en") )%>%
+      hc_xAxis(categories = df$domain %>% tools::toTitleCase() %>% str_replace_all("ae", "ä") )%>%
       hc_title(text="Thema")
 
   })
@@ -157,6 +184,13 @@
       mutate(decade = decade_labels(decade)) %>%
       prettifyCorpusNames()
 
+    dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
+    dfAssigned$decade <- "NE/NA"
+    dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
+    dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
+    df <- bind_rows(df, dfAssigned)
+    df <- df[order(df$corpus),]
+
     highchart() %>%
       hc_add_series(type = "bar", data = df, hcaes(decade, tokens, group=corpus)) %>%
       hc_xAxis(categories = df$decade )%>%
@@ -168,7 +202,7 @@
 
     texttypes <-
       c("/[^:]*[Zz]eitung.*/", "/(Zeitschrift|Magazin).*/", "/Agenturmeldung.*/", "/Enzyklopädie.*/", "/.*Diskussion.*/",
-        "/.*[Rr]oman([^z].*)?/", "/Newsgroup.*/", "/Tagebuch.*/", "/.*Sachbuch.*/", "/Protokoll.*/", "/Chat/",
+        "/.*[Rr]oman([^z].*)?/", "/Tagebuch.*/", "/.*Sachbuch.*/", "/Protokoll.*/", "/Chat/",
         "/.*[Bb]ericht.*/", "/.*Abhandlung.*/")
 
     df <- expand_grid(corpus=corpus, texttype=texttypes) %>%
@@ -176,6 +210,16 @@
       bind_cols(corpusStats(kco, .$vc) %>% select(-vc)) %>%
       prettifyCorpusNames()
 
+    dfAssigned <- aggregate(tokens ~ corpus, data=df, sum)
+    dfAssigned$texttype <- "NE|NA"
+    dfAssigned <- merge(dfAssigned, dfPureCorpus, by="corpus")
+    print (dfAssigned)
+
+    dfAssigned$tokens <- dfAssigned$tokens.y - dfAssigned$tokens.x
+    print (dfAssigned)
+    df <- bind_rows(df, dfAssigned)
+    df <- df[order(df$corpus),]
+
     hc <- highchart() %>%
       hc_add_series(type = "bar", data = df, hcaes(texttype, tokens, group=corpus)) %>%
       hc_xAxis(categories = df$texttype %>%