Add POS proportions chart
Change-Id: I33eb45dbac2b915bca3c99eb5f0670554bf6f9b4
diff --git a/R/icc_stats.R b/R/icc_stats.R
index a1d8374..9d8f4a1 100644
--- a/R/icc_stats.R
+++ b/R/icc_stats.R
@@ -104,3 +104,30 @@
print(plot)
}
+POS_tag <- c(
+ "ADJ", "ADP",# "PUNCT",
+ "ADV", "AUX", # "SYM",
+ "INTJ", "CCONJ", # "X",
+ "NOUN", "DET",
+ "PROPN", #"NUM",
+ "VERB", #"PART",
+ "PRON",
+ "SCONJ"
+ )
+
+icc_by_pos_tag <- icc %>% expand_grid(POS = POS_tag) %>%
+ rowwise() %>%
+ mutate(f = frequencyQuery(icc_con(lang), sprintf("[ud/p=%s]", POS))$f)
+
+plot <- icc_by_pos_tag %>% ggplot(aes(x=lang, fill = POS, y=f)) +
+ geom_col() + scale_y_continuous(labels = label_number(scale_cut = cut_short_scale())) +
+ theme_ids(base_size = 12) +
+ geom_text(aes(label=sprintf("%.2f%%", 100*f), y=f), position= position_stack(reverse = F, vjust = 0.5), color="white", size=3.2, family="Fira Sans Condensed")
+
+ggsave("target/pos_proportions.png", width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800)
+ggsave("target/pos_proportions.svg", width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800)
+ggsave("target/pos_proportions.pdf", device = cairo_pdf, width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800)
+
+if(rstudioapi::isAvailable()) {
+ print(plot)
+}