blob: 02823da628957ce463b91893e590913b92e2965a [file] [log] [blame]
Marc Kupietzf4881122024-12-17 14:55:39 +01001library(RKorAPClient)
2library(highcharter)
3library(tidyverse)
4
5kco <- new("KorAPConnection", verbose = TRUE)
6
7set.seed(7)
8
9get_relative_positions_sample <- function(kco, query, sampleSize = 400) {
10 res <- corpusQuery(kco, query)
11 res <- fetchNext(res, maxFetch = sampleSize, randomizePageOrder = TRUE)
12 matches <- res@collectedMatches
13 matches <- matches %>%
14 mutate(
15 query = query,
16 vc = paste0('textSigle="', textSigle, '"'),
17 textSize = corpusStats(kco, vc, as.df = TRUE)$tokens,
18 relativeTextPosition = matchStart / textSize
19 )
20 cat("\n\n", query, ":\n")
21 print(summary(matches$relativeTextPosition))
22 cat("\n\n")
23 return(matches)
24}
25
26df <- c(
27 "anfangs/i",
28 "zuguterletzt/i",
29 "zun\u00e4chst/i", # it is still necessary to encode non ascii characters in R package demos
30 "zuerst/i",
31 "zuletzt/i",
32 "schlie\u00dflich/i"
33) %>%
34 map(~ get_relative_positions_sample(kco, .)) %>%
35 bind_rows()
36
37hc_data <- df %>%
38 group_by(query) %>%
39 summarise(
40 min = min(relativeTextPosition),
41 q1 = quantile(relativeTextPosition, 0.25),
42 median = median(relativeTextPosition),
43 q3 = quantile(relativeTextPosition, 0.75),
44 max = max(relativeTextPosition)
45 ) %>%
46 mutate(data = pmap(list(min, q1, median, q3, max), c)) %>%
47 select(query, data)
48
49hc <- highchart() %>%
50 hc_chart(type = "boxplot", inverted = TRUE) %>%
51 hc_xAxis(categories = hc_data$query) %>%
52 hc_yAxis(ceiling = 1, title = list(text = "Relative position in text")) %>%
53 hc_add_series(data = hc_data$data) %>%
54 hc_title(text = "Relative positions of some adverbs in DeReKo texts") %>%
55 hc_legend(enabled = FALSE)
56
57print(hc)