Build as docker container
Change-Id: Ied30167fbaf02fb7744e6295168c8c450687a4c6
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..305f4b5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+cache/
+docs
+*.log
+*_files/
+*.bak
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 92f7eec..77120cb 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,38 +1,31 @@
-# use the verse rocker image, as it contains tidyverse, devtools and some texlive
-image: rocker/tidyverse
+# gitlab ci pipeline to build corpuscomposition docker container
+# automatically triggered on tag pushes or run manually
+#
+# Download container from artifacts and import it
+# e.g. with
+# unxz -c < corpuscomposition-master-fffe0d98.xz | docker load
+#
+image: docker:latest
-# define stages of runner. at the moment,
-# just build (no test or deploy).
-stages:
- - build
+services:
+ - docker:dind
-build-job:
+build:
+ rules:
+ - if: $CI_COMMIT_TAG =~ /.+/
+ variables:
+ VID: $CI_COMMIT_TAG
+ - when: manual
+ variables:
+ VID: $CI_COMMIT_BRANCH-$CI_COMMIT_SHORT_SHA
stage: build
-
- cache:
- key: korap
- paths:
- - ./cache
-
before_script:
- - source `find .. -name section_helper.sh`
-
- - start_section install_linux_packages "Installing missing Linux packages"
- - apt-get update
- - apt-get install -y libvulkan1 libu2f-udev build-essential libglpk40 libcurl4-gnutls-dev libxml2-dev libsodium-dev libsecret-1-dev libfontconfig1-dev libssl-dev libxt6 libpq-dev curl
- - end_section install_linux_packages
-
- - start_section install_r_packages "Installing missing R packages"
- - R -e "install.packages(c('devtools', 'RKorAPClient', 'httr', 'shiny', 'shinythemes', 'highcharter'))"
- - R -e 'devtools::install_git("https://korap.ids-mannheim.de/gerrit/IDS-Mannheim/idsThemeR")'
- - end_section install_r_packages
-
+ - apk update
+ - apk add --no-cache git
script:
- - start_section render "Running scripts"
- - echo 'options(shiny.port=18000)' >> ~/.Rprofile
- - R_CACHE_ROOTPATH=./cache Rscript shinyCorpusComposition.R &
- - PID=$!
- - sleep 10
- - curl http://127.0.0.1:18000/
- - kill $PID
- - end_section render
+ - docker build -f Dockerfile -t korap/corpuscomposition:$VID .
+ - docker save korap/corpuscomposition:$VID | xz -T0 -M16G -9 > corpuscomposition-$VID.xz
+ artifacts:
+ paths:
+ - corpuscomposition-$VID.xz
+
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..19f8a12
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,17 @@
+FROM rocker/tidyverse
+
+RUN apt-get update -qq && apt-get -y --no-install-recommends install \
+ libu2f-udev build-essential libglpk40 libcurl4-gnutls-dev libxml2-dev libsodium-dev libsecret-1-dev libfontconfig1-dev libssl-dev libxt6 libpq-dev
+
+COPY /shiny ./app
+
+RUN R -e "install.packages(c('devtools', 'shiny', 'shinythemes', 'highcharter'))"
+RUN R -e 'devtools::install_git("https://korap.ids-mannheim.de/gerrit/IDS-Mannheim/idsThemeR")'
+RUN R -e 'devtools::install_git("https://korap.ids-mannheim.de/gerrit/KorAP/RKorAPClient")'
+RUN echo "Populating cache"
+ENV R_CACHE_ROOTPATH=/cache
+RUN mkdir ${R_CACHE_ROOTPATH}
+RUN R -e 'input=list(cq=";referTo ratskorpus-2023-1;referTo drukola.20180909.1b_words"); source("/app/app.R"); renderHighchart<-function(x) { eval(x) }; server(input,"","");'
+
+EXPOSE 3838
+CMD ["R", "-e", "shiny::runApp('/app', host = '0.0.0.0', port = 3838)"]
diff --git a/shinyCorpusComposition.R b/shiny/app.R
similarity index 83%
rename from shinyCorpusComposition.R
rename to shiny/app.R
index 3c8bccb..1755347 100644
--- a/shinyCorpusComposition.R
+++ b/shiny/app.R
@@ -5,9 +5,19 @@
library(tidyverse)
library(idsThemeR)
+corpus=c("", "referTo ratskorpus-2023-1", "referTo drukola.20180909.1b_words")
+
+vcFromString <- function(string) {
+ string %>%
+ str_split_1(" *[;] *") %>%
+ str_replace("^(.+)$", "(\\1) & ") %>%
+ str_replace_all(" +", " ")
+}
+
ui <- fluidPage(
theme = shinytheme("paper"),
+ fluidRow(column(width = 12, textInput("cq", "Corpus definitions", paste0(corpus, collapse = ";"), width="100%"))),
fluidRow(
column(width = 6, highchartOutput("country")),
column(width = 6, highchartOutput("domain")),
@@ -19,10 +29,14 @@
server <- function(input, output, session) {
+# corpus <- str_split(input$corpus, ",")
+# corpus <- corpus %>% str_replace("^(.+)$", "\\1 & ")
+
observe({
query <- parseQueryString(session$clientData$url_search)
if (!is.null(query[['cq']])) {
- message(query[['cq']])
+ corpus = as.vector(unlist(query))
+ updateTextInput(session, "cq", value = corpus)
}
})
@@ -43,24 +57,29 @@
return false;
}')
- corpus=c("", "referTo ratskorpus-2023-1 & ", "referTo drukola.20180909.1b_words & ")
kco <- new("KorAPConnection", verbose=TRUE)
highchart <- function(...) {
highcharter::highchart() %>%
hc_add_theme(hc_theme_ids_light()) %>%
hc_add_onclick_korap_search() %>%
+ hc_yAxis(type = "logarithmic") %>%
+ hc_legend(enabled=F) %>%
hc_plotOptions(series = list(events = list(legendItemClick = sharelegend)))
}
prettifyCorpusNames <- function(df) {
rownames(df) = NULL
+
df %>%
- mutate(corpus = corpus %>% str_replace("referTo *", "") %>% str_replace(" *& *$", "") |> str_replace("^ *$", "DeReKo-KorAP"))
+ mutate(corpus = corpus %>% str_replace("referTo *", "") %>% str_replace(" *& *$", "") %>%
+ str_replace_all("[)()]", "") %>%
+ str_replace("^ *$", "DeReKo-KorAP"))
}
output$country <- renderHighchart({
+ corpus <- vcFromString(input$cq)
countries <- c("DE", "AT", "CH", "IT", "BE", "LU") %>% sort()
@@ -72,13 +91,13 @@
highchart() %>%
hc_add_series(type = "column", data = df, hcaes(x=country, y=tokens, group=corpus)) %>%
hc_xAxis(categories = df$country) %>%
- hc_yAxis(type = "logarithmic") %>%
hc_legend(enabled=T) %>%
hc_title(text="Land")
-
})
output$domain <- renderHighchart({
+ corpus <- vcFromString(input$cq)
+
topics <-
c(
"freizeit-unterhaltung",
@@ -102,13 +121,12 @@
highchart() %>%
hc_add_series(type = "bar", data = df, hcaes(domain, tokens, group=corpus)) %>%
hc_xAxis(categories = df$domain %>% str_to_title(locale = "en") )%>%
- hc_yAxis(type = "logarithmic") %>%
- hc_legend(enabled=F) %>%
hc_title(text="Thema")
})
output$decade <- renderHighchart({
+ corpus <- vcFromString(input$cq)
decades <-
c(1951, 1961, 1971, 1981, 1991, 2001, 2011, 2021)
decade_labels <- function(start_year) {
@@ -124,12 +142,12 @@
highchart() %>%
hc_add_series(type = "bar", data = df, hcaes(decade, tokens, group=corpus)) %>%
hc_xAxis(categories = df$decade )%>%
- hc_yAxis(type = "logarithmic") %>%
- hc_legend(enabled=F) %>%
hc_title(text="Dekade")
})
output$texttype <- renderHighchart({
+ corpus <- vcFromString(input$cq)
+
texttypes <-
c("/Zeitung.*/", "/(Zeitschrift|Magazin).*/", "/Agenturmeldung.*/", "/Enzyklopädie.*/", "/.*Diskussion.*/", "/Roman.*/", "/Newsgroup.*/", "/Tagebuch.*/", "/Sachbuch.*/")
@@ -141,8 +159,6 @@
hc <- highchart() %>%
hc_add_series(type = "bar", data = df, hcaes(texttype, tokens, group=corpus)) %>%
hc_xAxis(categories = df$texttype %>% str_replace_all("[/.*)()]", "") %>% str_replace_all("\\|", "/")) %>%
- hc_yAxis(type = "logarithmic") %>%
- hc_legend(enabled=F) %>%
hc_title(text="Texttyp")
hc
})