Add poster
Change-Id: I1a9f7335b376ef72c65f0647287adc894de9aa17
diff --git a/R/poster.Rmd b/R/poster.Rmd
new file mode 100644
index 0000000..677a042
--- /dev/null
+++ b/R/poster.Rmd
@@ -0,0 +1,196 @@
+---
+title: "News from the International Comparable Corpus"
+subtitle: "First launch of ICC written"
+date: "`r Sys.Date()`"
+author:
+ - name: Marc Kupietz
+ affil: 1
+ - name: Adrien Barbaresi
+ affil: 2
+ - name: Anna Cermakova
+ affil: 3
+ - name: Małgorzata Czachor
+ affil: 4
+ - name: Nils Diewald
+ affil: 1
+ - name: Jarle Ebeling
+ affil: 5
+ - name: Rafał L. Górski
+ affil: 4
+ - name: John Kirk
+ affil: 6
+ - name: Michal Křen
+ affil: 3
+ - name: Harald Lüngen
+ affil: 1
+ - name: Eliza Margaretha
+ affil: 1
+ - name: Signe Oksefjell Ebeling
+ affil: 5
+ - name: Mícheál Ó Meachair
+ affil: 7
+ - name: Ines Pisetta
+ affil: 1
+ - name: Elaine Uí Dhonnchadha
+ affil: 8
+ - name: Friedemann Vogel
+ affil: 9
+ - name: Rebecca Wilm
+ affil: 1
+ - name: Jiajin Xu
+ affil: 10
+ - name: Rameela Yaddehige
+ affil: 1
+affiliation:
+ - num: 1
+ address: IDS Mannheim
+ - num: 2
+ address: BBAW Berlin
+ - num: 3
+ address: Charles University
+ - num: 4
+ address: Polish Academy of Sciences
+ - num: 5
+ address: University of Oslo
+ - num: 6
+ address: University of Vienna
+ - num: 7
+ address: Dublin City University
+ - num: 8
+ address: Trinity College Dublin
+ - num: 9
+ address: University of Siegen
+ - num: 10
+ address: Beijing Foreign Studies University
+
+
+logoleft_name: "../Figures/ICC_COL.svg"
+author_textsize: "32pt"
+
+output:
+ posterdown::posterdown_html
+---
+
+```{r setup, include=FALSE, echo=FALSE, warning=FALSE}
+knitr::opts_chunk$set(echo = FALSE, warnings = FALSE)
+source("common.R")
+```
+# ICC aims & charcteristics
+
+* open initiative
+* to improve the empirical basis for contrastive linguistics
+* by compiling comparable corpora for many languages
+* and making them as freely available as possible
+* also by providing tools to query and analyse them
+* mostly based on existing corpora
+* mimics the composition of ICE
+
+# Current alpha launch
+
+## Composition of parts
+### By ICC genre
+
+```{r composition_by_genre, message = FALSE, fig.width=14, fig.height=10, out.width = "100%"}
+icc_genre <- icc %>%
+ expand_grid(genre) %>%
+ mutate(vc = paste0("iccGenre=", genre)) %>%
+ rowwise() %>%
+ mutate(tokens= corpusStats(icc_con(lang, token), vc = vc)@tokens)
+
+icc_genre %>% ggplot(aes(x=lang, fill=genre, y=tokens)) +
+ geom_col() + scale_y_continuous(labels = label_number(scale_cut = cut_short_scale())) +
+ theme_ids(base_size = 24) +
+ theme(
+ axis.title.x = element_text(size = rel(1.5), face = "bold"),
+ axis.title.y = element_text(size = rel(1.5), face = "bold"),
+ axis.text = element_text(size = rel(0.70)),
+ legend.title = element_text(size = rel(0.85), face = "bold"),
+ legend.text = element_text(size = rel(1))) +
+ scale_fill_ids() +
+ geom_text(aes(label=if_else(tokens > 0, as.character(tokens), ""), y=tokens), position= position_stack(reverse = F, vjust = 0.5), color="black", size=6.2, family="Fira Sans Condensed")
+
+```
+
+### By date of publication
+
+
+```{r composition_by_pubdate, message=F, warning=F, fig.width=14, fig.height=7, out.width = "100%"}
+year <- c(1986:2023)
+
+icc_year <- icc %>%
+ expand_grid(year) %>%
+ mutate(vc = paste0("pubDate in ", year)) %>%
+ rowwise() %>%
+ mutate(tokens= corpusStats(icc_con(lang, token), vc = vc)@tokens)
+
+icc_year %>% ggplot(aes(x=year, fill=lang, color=lang, y=tokens)) +
+ # geom_smooth(se=F, span=0.25) +
+ xlim(1990, 2023) +
+ ylim(0, NA) +
+ stat_smooth(
+ geom = 'area', method = 'loess', span = 1/4,
+ alpha = 0.1) +
+ # geom_area(alpha=0.1, position = "identity") +
+ scale_fill_ids() + scale_colour_ids() +
+ scale_y_continuous(labels = label_number(scale_cut = cut_short_scale())) +
+ theme_ids(base_size=24) +
+ theme(
+ axis.title.x = element_text(size = rel(1.5), face = "bold"),
+ axis.title.y = element_text(size = rel(1.5), face = "bold"),
+ axis.text = element_text(size = rel(1)),
+ legend.title = element_text(size = rel(1), face = "bold"),
+ legend.text = element_text(size = rel(1)))
+```
+
+### Part-of-Speech proportions
+
+```{r pos_proportions, fig.width=14, fig.height=10, out.width = "100%"}
+POS_tag <- c(
+ "ADJ", "ADP",# "PUNCT",
+ "ADV", "AUX", # "SYM",
+ # "INTJ",
+ "CCONJ", # "X",
+ "NOUN", "DET",
+ "PROPN", #"NUM",
+ "VERB", #"PART",
+ "PRON",
+ "SCONJ"
+ )
+
+icc_by_pos_tag <- icc %>% expand_grid(POS = POS_tag) %>%
+ rowwise() %>%
+ mutate(f = frequencyQuery(icc_con(lang), sprintf("[ud/p=%s]", POS))$f)
+
+icc_by_pos_tag %>% ggplot(aes(x=lang, fill = POS, y=f)) +
+ geom_col() + scale_y_continuous(labels = label_number(scale_cut = cut_short_scale())) +
+ scale_fill_ids() + scale_color_ids() +
+ theme_ids(base_size=24) +
+ theme(
+ axis.title.x = element_text(size = rel(1.5), face = "bold"),
+ axis.title.y = element_text(size = rel(1.5), face = "bold"),
+ axis.text = element_text(size = rel(1)),
+ legend.title = element_text(size = rel(1), face = "bold"),
+ legend.text = element_text(size = rel(1))) +
+ geom_text(aes(label=sprintf("%.2f%%", 100*f), y=f), position= position_stack(reverse = F, vjust = 0.5), color="black", size=6.2, family="Fira Sans Condensed")
+```
+
+# Identification of Light Verb Constructions with *take*
+
+
+## English: *take*
+
+```{r take_icc, echo=TRUE, message=FALSE}
+take_ca_icc <-
+ collocationAnalysis(
+ icc_con("eng"),
+ "focus({[ud/l=take]} [ud/p=NOUN])",
+ leftContextSize = 0,
+ rightContextSize = 1,
+ minOccur = 2,
+ addExamples = T
+ )
+
+take_ca_icc %>% show_table()
+```
+
+