blob: af68bb8037a2413e8f528e9af756c5d7a7ab4a49 [file] [log] [blame]
Marc Kupietz686c4312023-06-23 15:41:44 +02001library(RKorAPClient)
2library(httr)
3library(httpuv)
4library(tidyverse)
5library(scales)
6library(idsThemeR)
7library(kableExtra)
8library(DT)
9
10nkjp = new("KorAPConnection", KorAPUrl = "https://korap.ids-mannheim.de/instance/nkjp1m-sgjp")
11wordsFromQuery <- function (query) {
12 v <- str_split(query, "([! )(\uc2\uab,.:?\u201e\u201c\'\"]+|&quot;)") %>% unlist() %>% unique()
13 v <- v[str_detect(v, '^[:alnum:]+-?[:alnum:]*$')]
14 v[order(nchar(v), v, decreasing = T)]
15}
16
17highliteSubstrings <- function (string, substrings) {
18 what = paste0('(', paste0(substrings, collapse="|"), ')')
19 with = '<b>\\1</b>'
20 str_replace_all(string, what, with)
21}
22
23deleteFillers <- function (string) {
24 string %>%
25 str_replace_all('</b> +<b>', ' ') %>%
26 str_replace_all('</b>[^<]+<b>', ' ... ') %>%
27 str_replace_all('^[^<]*<b>', '') %>%
28 str_replace_all('</b>[^<]*$', '')
29
30}
31
32show_table <- function(df) {
33 df %>%
34 mutate(Collocate=sprintf('<a href="%s">%s</a>', webUIRequestUrl, collocate)) %>%
35 mutate(example=str_replace(example, ".*(\\W+\\w+\\W+\\w+\\W+<mark.*/mark>.*)", "\\1")) %>%
36 mutate(example=str_replace(example, "(.*<mark.*/mark>\\W+\\w+\\W+\\w+).*", "\\1")) %>%
37 rowwise() %>%
38# mutate(Example=highliteSubstrings(example, wordsFromQuery(query))) %>%
39 mutate(Example=example) %>%
40 select(Collocate, Example, logDice, pmi, ll) %>%
41 head(50) %>%
Marc Kupietz91d5ba92023-06-27 15:27:58 +020042 datatable(escape = F, rownames = FALSE) %>%
Marc Kupietz686c4312023-06-23 15:41:44 +020043 formatRound(columns=~logDice + pmi + ll, digits=2)
44}
45
Marc K66264002023-06-27 13:29:38 +020046show_simple_table <- function(df, pageLength = 20) {
Marc Kupietz686c4312023-06-23 15:41:44 +020047 df %>%
48 mutate(Collocate=sprintf('<a href="%s">%s</a>', webUIRequestUrl, collocate)) %>%
49 select(Collocate, logDice, pmi, ll) %>%
Marc K66264002023-06-27 13:29:38 +020050 datatable(options = list(pageLength = pageLength), escape = F) %>%
Marc Kupietz686c4312023-06-23 15:41:44 +020051 formatRound(columns=~logDice + pmi + ll, digits=2)
52}