blob: 02823da628957ce463b91893e590913b92e2965a [file] [log] [blame]
library(RKorAPClient)
library(highcharter)
library(tidyverse)
kco <- new("KorAPConnection", verbose = TRUE)
set.seed(7)
get_relative_positions_sample <- function(kco, query, sampleSize = 400) {
res <- corpusQuery(kco, query)
res <- fetchNext(res, maxFetch = sampleSize, randomizePageOrder = TRUE)
matches <- res@collectedMatches
matches <- matches %>%
mutate(
query = query,
vc = paste0('textSigle="', textSigle, '"'),
textSize = corpusStats(kco, vc, as.df = TRUE)$tokens,
relativeTextPosition = matchStart / textSize
)
cat("\n\n", query, ":\n")
print(summary(matches$relativeTextPosition))
cat("\n\n")
return(matches)
}
df <- c(
"anfangs/i",
"zuguterletzt/i",
"zun\u00e4chst/i", # it is still necessary to encode non ascii characters in R package demos
"zuerst/i",
"zuletzt/i",
"schlie\u00dflich/i"
) %>%
map(~ get_relative_positions_sample(kco, .)) %>%
bind_rows()
hc_data <- df %>%
group_by(query) %>%
summarise(
min = min(relativeTextPosition),
q1 = quantile(relativeTextPosition, 0.25),
median = median(relativeTextPosition),
q3 = quantile(relativeTextPosition, 0.75),
max = max(relativeTextPosition)
) %>%
mutate(data = pmap(list(min, q1, median, q3, max), c)) %>%
select(query, data)
hc <- highchart() %>%
hc_chart(type = "boxplot", inverted = TRUE) %>%
hc_xAxis(categories = hc_data$query) %>%
hc_yAxis(ceiling = 1, title = list(text = "Relative position in text")) %>%
hc_add_series(data = hc_data$data) %>%
hc_title(text = "Relative positions of some adverbs in DeReKo texts") %>%
hc_legend(enabled = FALSE)
print(hc)