Build as docker container

Change-Id: Ied30167fbaf02fb7744e6295168c8c450687a4c6
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..305f4b5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+cache/
+docs
+*.log
+*_files/
+*.bak
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 92f7eec..77120cb 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,38 +1,31 @@
-# use the verse rocker image, as it contains tidyverse, devtools and some texlive
-image: rocker/tidyverse
+# gitlab ci pipeline to build corpuscomposition docker container
+# automatically triggered on tag pushes or run manually
+#
+# Download container from artifacts and import it
+# e.g. with
+# unxz -c < corpuscomposition-master-fffe0d98.xz | docker load
+#
+image: docker:latest
 
-# define stages of runner. at the moment,
-# just build (no test or deploy).
-stages:
-  - build
+services:
+  - docker:dind
 
-build-job:
+build:
+  rules:
+    - if: $CI_COMMIT_TAG =~ /.+/
+      variables:
+        VID: $CI_COMMIT_TAG
+    - when: manual
+      variables:
+        VID: $CI_COMMIT_BRANCH-$CI_COMMIT_SHORT_SHA
   stage: build
-
-  cache:
-    key: korap
-    paths:
-      - ./cache
-
   before_script:
-    - source `find .. -name section_helper.sh`
-
-    - start_section install_linux_packages "Installing missing Linux packages"
-    - apt-get update
-    - apt-get install -y libvulkan1 libu2f-udev build-essential libglpk40 libcurl4-gnutls-dev libxml2-dev libsodium-dev libsecret-1-dev libfontconfig1-dev libssl-dev libxt6 libpq-dev curl
-    - end_section install_linux_packages
-
-    - start_section install_r_packages "Installing missing R packages"
-    - R -e "install.packages(c('devtools', 'RKorAPClient', 'httr', 'shiny', 'shinythemes', 'highcharter'))"
-    - R -e 'devtools::install_git("https://korap.ids-mannheim.de/gerrit/IDS-Mannheim/idsThemeR")'
-    - end_section install_r_packages
-
+    - apk update
+    - apk add --no-cache git
   script:
-    - start_section render "Running scripts"
-    - echo 'options(shiny.port=18000)' >> ~/.Rprofile
-    - R_CACHE_ROOTPATH=./cache Rscript shinyCorpusComposition.R &
-    - PID=$!
-    - sleep 10
-    - curl http://127.0.0.1:18000/
-    - kill $PID
-    - end_section render
+    - docker build -f Dockerfile -t korap/corpuscomposition:$VID .
+    - docker save korap/corpuscomposition:$VID | xz -T0 -M16G -9 > corpuscomposition-$VID.xz
+  artifacts:
+    paths:
+      - corpuscomposition-$VID.xz
+
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..19f8a12
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,17 @@
+FROM rocker/tidyverse
+
+RUN apt-get update -qq && apt-get -y --no-install-recommends install \
+      libu2f-udev build-essential libglpk40 libcurl4-gnutls-dev libxml2-dev libsodium-dev libsecret-1-dev libfontconfig1-dev libssl-dev libxt6 libpq-dev
+
+COPY /shiny ./app
+
+RUN R -e "install.packages(c('devtools', 'shiny', 'shinythemes', 'highcharter'))"
+RUN R -e 'devtools::install_git("https://korap.ids-mannheim.de/gerrit/IDS-Mannheim/idsThemeR")'
+RUN R -e 'devtools::install_git("https://korap.ids-mannheim.de/gerrit/KorAP/RKorAPClient")'
+RUN echo "Populating cache"
+ENV R_CACHE_ROOTPATH=/cache
+RUN mkdir ${R_CACHE_ROOTPATH}
+RUN R -e 'input=list(cq=";referTo ratskorpus-2023-1;referTo drukola.20180909.1b_words"); source("/app/app.R"); renderHighchart<-function(x) { eval(x) }; server(input,"","");'
+
+EXPOSE 3838
+CMD ["R", "-e", "shiny::runApp('/app', host = '0.0.0.0', port = 3838)"]
diff --git a/shinyCorpusComposition.R b/shiny/app.R
similarity index 83%
rename from shinyCorpusComposition.R
rename to shiny/app.R
index 3c8bccb..1755347 100644
--- a/shinyCorpusComposition.R
+++ b/shiny/app.R
@@ -5,9 +5,19 @@
 library(tidyverse)
 library(idsThemeR)
 
+corpus=c("", "referTo ratskorpus-2023-1", "referTo drukola.20180909.1b_words")
+
+vcFromString <- function(string) {
+  string %>%
+    str_split_1(" *[;] *") %>%
+    str_replace("^(.+)$", "(\\1) & ") %>%
+    str_replace_all("  +", " ")
+}
+
 ui <- fluidPage(
 
   theme = shinytheme("paper"),
+  fluidRow(column(width = 12, textInput("cq", "Corpus definitions", paste0(corpus, collapse = ";"), width="100%"))),
   fluidRow(
     column(width = 6, highchartOutput("country")),
     column(width = 6, highchartOutput("domain")),
@@ -19,10 +29,14 @@
 
 server <- function(input, output, session) {
 
+#  corpus <- str_split(input$corpus, ",")
+#  corpus <- corpus %>% str_replace("^(.+)$", "\\1 & ")
+
   observe({
     query <- parseQueryString(session$clientData$url_search)
     if (!is.null(query[['cq']])) {
-      message(query[['cq']])
+      corpus = as.vector(unlist(query))
+      updateTextInput(session, "cq", value = corpus)
     }
   })
 
@@ -43,24 +57,29 @@
     return false;
   }')
 
-  corpus=c("", "referTo ratskorpus-2023-1 & ", "referTo drukola.20180909.1b_words & ")
   kco <- new("KorAPConnection", verbose=TRUE)
   highchart <- function(...) {
     highcharter::highchart() %>%
       hc_add_theme(hc_theme_ids_light()) %>%
       hc_add_onclick_korap_search() %>%
+      hc_yAxis(type = "logarithmic") %>%
+      hc_legend(enabled=F) %>%
       hc_plotOptions(series = list(events = list(legendItemClick = sharelegend)))
   }
 
   prettifyCorpusNames <- function(df) {
     rownames(df) = NULL
+
     df %>%
-      mutate(corpus = corpus %>% str_replace("referTo *", "") %>% str_replace(" *& *$", "") |> str_replace("^ *$", "DeReKo-KorAP"))
+      mutate(corpus = corpus %>% str_replace("referTo *", "") %>% str_replace(" *& *$", "") %>%
+               str_replace_all("[)()]", "") %>%
+               str_replace("^ *$", "DeReKo-KorAP"))
 
   }
 
 
   output$country <- renderHighchart({
+    corpus <- vcFromString(input$cq)
 
     countries <- c("DE", "AT", "CH", "IT", "BE", "LU") %>% sort()
 
@@ -72,13 +91,13 @@
     highchart() %>%
       hc_add_series(type = "column", data = df, hcaes(x=country, y=tokens, group=corpus)) %>%
       hc_xAxis(categories = df$country) %>%
-      hc_yAxis(type = "logarithmic") %>%
       hc_legend(enabled=T) %>%
       hc_title(text="Land")
-
   })
 
   output$domain <- renderHighchart({
+    corpus <- vcFromString(input$cq)
+
     topics <-
       c(
         "freizeit-unterhaltung",
@@ -102,13 +121,12 @@
     highchart() %>%
       hc_add_series(type = "bar", data = df, hcaes(domain, tokens, group=corpus)) %>%
       hc_xAxis(categories = df$domain %>% str_to_title(locale = "en") )%>%
-      hc_yAxis(type = "logarithmic") %>%
-      hc_legend(enabled=F) %>%
       hc_title(text="Thema")
 
   })
 
   output$decade <- renderHighchart({
+    corpus <- vcFromString(input$cq)
     decades <-
       c(1951, 1961, 1971, 1981, 1991, 2001, 2011, 2021)
     decade_labels <- function(start_year) {
@@ -124,12 +142,12 @@
     highchart() %>%
       hc_add_series(type = "bar", data = df, hcaes(decade, tokens, group=corpus)) %>%
       hc_xAxis(categories = df$decade )%>%
-      hc_yAxis(type = "logarithmic") %>%
-      hc_legend(enabled=F) %>%
       hc_title(text="Dekade")
   })
 
   output$texttype <- renderHighchart({
+    corpus <- vcFromString(input$cq)
+
     texttypes <-
       c("/Zeitung.*/", "/(Zeitschrift|Magazin).*/", "/Agenturmeldung.*/", "/Enzyklopädie.*/", "/.*Diskussion.*/", "/Roman.*/", "/Newsgroup.*/", "/Tagebuch.*/", "/Sachbuch.*/")
 
@@ -141,8 +159,6 @@
     hc <- highchart() %>%
       hc_add_series(type = "bar", data = df, hcaes(texttype, tokens, group=corpus)) %>%
       hc_xAxis(categories = df$texttype %>% str_replace_all("[/.*)()]", "") %>% str_replace_all("\\|", "/")) %>%
-      hc_yAxis(type = "logarithmic") %>%
-      hc_legend(enabled=F) %>%
       hc_title(text="Texttyp")
     hc
   })