|  | library(RKorAPClient) | 
|  | library(httr) | 
|  | library(httpuv) | 
|  | library(tidyverse) | 
|  | library(scales) | 
|  | library(idsThemeR) | 
|  |  | 
|  | icc_base_url = "https://korap.ids-mannheim.de/instance/icc"; | 
|  |  | 
|  | connections = list() | 
|  |  | 
|  | icc <- tibble( | 
|  | lang = c("eng", "ger", "nor"), | 
|  | app_id = c( | 
|  | "mTTTnJ6f6hGrPh6dRhbJhJ", | 
|  | "TMLPTJfP7rHb93bpFp39mL", | 
|  | "TMFtPJnbb7f4MRmd76Rb34" | 
|  | ) | 
|  | ) | 
|  |  | 
|  |  | 
|  | icc_token <- function(lang,  app_id, url = paste0(icc_base_url, '/', lang)) { | 
|  | token_key =  paste0("KORAP_ICC_TOKEN_", lang) | 
|  | token = Sys.getenv(token_key) | 
|  | if (token != "") | 
|  | return(token) | 
|  |  | 
|  | korap_app <- oauth_app("icc-iclc10-contribution", key = app_id, secret = NULL) | 
|  |  | 
|  | korap_endpoint <- oauth_endpoint(NULL, | 
|  | "settings/oauth/authorize", | 
|  | "api/v1.0/oauth2/token", | 
|  | base_url = url) | 
|  | token_bundle = oauth2.0_token(korap_endpoint, korap_app, scope = "search match_info", cache = FALSE) | 
|  | token = token_bundle[["credentials"]][["access_token"]] | 
|  | do.call(Sys.setenv, as.list(setNames(token, token_key))) | 
|  | return(token) | 
|  | } | 
|  |  | 
|  | icc_con <- function(lang, token = Sys.getenv(paste0("KORAP_ICC_TOKEN_", lang))) { | 
|  | if ((! lang %in% names(connections)) || is_empty(connections[[lang]])) { | 
|  | url <- paste0(icc_base_url, '/', lang) | 
|  | connections[[lang]] <<- new("KorAPConnection", KorAPUrl = url, accessToken = token, cache = F) | 
|  | } | 
|  | return(connections[[lang]]) | 
|  | } | 
|  |  | 
|  | icc <- icc %>% | 
|  | rowwise() %>% | 
|  | mutate(token = icc_token(lang, app_id)) | 
|  |  | 
|  | genre <- c("Blog", | 
|  | "Creative:Novels_ShortStories", | 
|  | "Informational:Learned:Humanities", | 
|  | "Informational:Learned:NaturalSciences", | 
|  | "Informational:Learned:SocialSciences", | 
|  | "Informational:Learned:Technology", | 
|  | "Informational:Popular:Humanities", | 
|  | "Informational:Popular:NaturalSciences", | 
|  | "Informational:Popular:SocialSciences", | 
|  | "Informational:Popular:Technology", | 
|  | "Informational:Reportage", | 
|  | "Instructional:AdministrativeRegulatoryProse", | 
|  | "Instructional:Skills_Hobbies", | 
|  | "Persuasive" | 
|  | ) | 
|  |  | 
|  | icc_genre <- icc %>% | 
|  | expand_grid(genre) %>% | 
|  | mutate(vc = paste0("iccGenre=", genre)) %>% | 
|  | rowwise() %>% | 
|  | mutate(tokens= corpusStats(icc_con(lang, token), vc = vc)@tokens) | 
|  |  | 
|  | plot <- icc_genre %>% ggplot(aes(x=lang, fill=genre, y=tokens)) + | 
|  | geom_col() + scale_y_continuous(labels = label_number(scale_cut = cut_short_scale())) + | 
|  | theme_ids() + | 
|  | geom_text(aes(label=if_else(tokens > 0, as.character(tokens), ""), y=tokens), position= position_stack(reverse = F, vjust = 0.5), color="white", size=3.2, family="Fira Sans Condensed") | 
|  |  | 
|  | ggsave("target/tokens_per_genre.png", width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800) | 
|  | ggsave("target/tokens_per_genre.svg", width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800) | 
|  | ggsave("target/tokens_per_genre.pdf", device = cairo_pdf, width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800) | 
|  |  | 
|  | if(rstudioapi::isAvailable()) { | 
|  | print(plot) | 
|  | } | 
|  |  | 
|  | year <- c(1988:2022) | 
|  |  | 
|  | icc_year <- icc %>% | 
|  | expand_grid(year) %>% | 
|  | mutate(vc = paste0("pubDate in ", year)) %>% | 
|  | rowwise() %>% | 
|  | mutate(tokens= corpusStats(icc_con(lang, token), vc = vc)@tokens) | 
|  |  | 
|  | plot <- icc_year %>% ggplot(aes(x=year, fill=lang, color=lang, y=tokens)) + | 
|  | geom_line() + geom_point() + scale_y_continuous(labels = label_number(scale_cut = cut_short_scale())) + | 
|  | theme_ids() | 
|  |  | 
|  | ggsave("target/tokens_per_year.png", width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800) | 
|  | ggsave("target/tokens_per_year.svg", width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800) | 
|  | ggsave("target/tokens_per_year.pdf", device = cairo_pdf, width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800) | 
|  |  | 
|  | if(rstudioapi::isAvailable()) { | 
|  | print(plot) | 
|  | } | 
|  |  | 
|  | POS_tag <- c( | 
|  | "ADJ", 	"ADP",#	"PUNCT", | 
|  | "ADV",	"AUX",	# "SYM", | 
|  | "INTJ",	"CCONJ", #	"X", | 
|  | "NOUN",	"DET", | 
|  | "PROPN",	#"NUM", | 
|  | "VERB",	#"PART", | 
|  | "PRON", | 
|  | "SCONJ" | 
|  | ) | 
|  |  | 
|  | icc_by_pos_tag <- icc %>% expand_grid(POS = POS_tag) %>% | 
|  | rowwise() %>% | 
|  | mutate(f = frequencyQuery(icc_con(lang), sprintf("[ud/p=%s]", POS))$f) | 
|  |  | 
|  | plot <- icc_by_pos_tag %>% ggplot(aes(x=lang, fill = POS, y=f)) + | 
|  | geom_col() + scale_y_continuous(labels = label_number(scale_cut = cut_short_scale())) + | 
|  | theme_ids(base_size = 12) + | 
|  | geom_text(aes(label=sprintf("%.2f%%", 100*f), y=f), position= position_stack(reverse = F, vjust = 0.5), color="white", size=3.2, family="Fira Sans Condensed") | 
|  |  | 
|  | ggsave("target/pos_proportions.png", width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800) | 
|  | ggsave("target/pos_proportions.svg", width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800) | 
|  | ggsave("target/pos_proportions.pdf", device = cairo_pdf, width = 70 * .pt, height = 45 *.pt, units = "mm", dpi = 800) | 
|  |  | 
|  | if(rstudioapi::isAvailable()) { | 
|  | print(plot) | 
|  | } |