Add POS proportions chart
Change-Id: I33eb45dbac2b915bca3c99eb5f0670554bf6f9b4
diff --git a/R/icc_stats.R b/R/icc_stats.R
index a1d8374..9d8f4a1 100644
--- a/R/icc_stats.R
+++ b/R/icc_stats.R
@@ -104,3 +104,30 @@
print(plot)
}
+POS_tag <- c(
+ "ADJ", "ADP",# "PUNCT",
+ "ADV", "AUX", # "SYM",
+ "INTJ", "CCONJ", # "X",
+ "NOUN", "DET",
+ "PROPN", #"NUM",
+ "VERB", #"PART",
+ "PRON",
+ "SCONJ"
+ )
+
+icc_by_pos_tag <- icc %>% expand_grid(POS = POS_tag) %>%
+ rowwise() %>%
+ mutate(f = frequencyQuery(icc_con(lang), sprintf("[ud/p=%s]", POS))$f)
+
+plot <- icc_by_pos_tag %>% ggplot(aes(x=lang, fill = POS, y=f)) +
+ geom_col() + scale_y_continuous(labels = label_number(scale_cut = cut_short_scale())) +
+ theme_ids(base_size = 12) +
+ geom_text(aes(label=sprintf("%.2f%%", 100*f), y=f), position= position_stack(reverse = F, vjust = 0.5), color="white", size=3.2, family="Fira Sans Condensed")
+
+ggsave("target/pos_proportions.png", width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800)
+ggsave("target/pos_proportions.svg", width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800)
+ggsave("target/pos_proportions.pdf", device = cairo_pdf, width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800)
+
+if(rstudioapi::isAvailable()) {
+ print(plot)
+}
diff --git a/Readme.md b/Readme.md
index 3aa05cc..a4c02df 100644
--- a/Readme.md
+++ b/Readme.md
@@ -20,3 +20,6 @@
![tokens per year](https://gitlab.ids-mannheim.de/ICC/2023-07-20-ICC-ICLC10/-/jobs/artifacts/master/raw/target/tokens_per_year.png?job=build-job)
+### POS proportions
+
+![POS proportions](https://gitlab.ids-mannheim.de/ICC/2023-07-20-ICC-ICLC10/-/jobs/artifacts/master/raw/target/pos_proportions.svg?job=build-job)