blob: 992945e59b6b8cf6bbd386b76f6144ad50755e36 [file] [log] [blame]
Marc Kupietz686c4312023-06-23 15:41:44 +02001---
Marc Kupietz2347bc92023-06-28 12:25:50 +02002title: "Applying the newly extended European Reference Corpus EuReCo"
3subtitle: "Pilot studies of light-verb constructions in German, Romanian, Hungarian and Polish"
Marc Kupietz686c4312023-06-23 15:41:44 +02004author:
5 - name: Piotr Bański
6 - name: Nils Diewald
7 - name: Marc Kupietz
8 - name: Beata Trawiński
9affiliation:
10 address: IDS Mannheim
11column_numbers: 2
12contact:
13 name: Piotr Bański
Marc K4853d6a2023-06-24 18:27:47 +020014 department: Digital Linguistics / Grammar
Marc Kupietz686c4312023-06-23 15:41:44 +020015 email: banski@ids-mannheim.de
Marc K4853d6a2023-06-24 18:27:47 +020016 website: "https://www.ids-mannheim.de/"
17 qrlink: >
Marc Kf8c3ccd2023-06-27 13:30:08 +020018 `r posterdown::qrlink("https://korap.ids-mannheim.de/instance/nkjp1m-sgjp", logo="kalamar_wbg.svg")`
Marc Kupietz686c4312023-06-23 15:41:44 +020019output:
20 posterdown::posterdown_ids:
21 self_contained: false
22 keep_md: true
23
24bibliography: references.bib
25csl: "https://raw.githubusercontent.com/ICLC-10/Zotero/master/styles/ICLC-10.csl"
26---
27
28```{r setup, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
Marc Kupietz4e33d112023-06-27 18:11:46 +020029knitr::opts_chunk$set(dev = 'svg', echo = FALSE, warning = FALSE, message = FALSE)
Marc Kupietz686c4312023-06-23 15:41:44 +020030source("common.R")
31```
Marc Kupietz2347bc92023-06-28 12:25:50 +020032# Pilot study: LVC detection
Marc K66264002023-06-27 13:29:38 +020033
Marc Kupietz9087c772023-06-27 18:11:26 +020034Identification of Light Verb Constructions using collocation analysis.
Marc K66264002023-06-27 13:29:38 +020035
Marc Kupietz7c544c22023-06-28 06:35:12 +020036## German
37
38```{r setzen-in, fig.cap='Collocation analysis for »in … setzen« (= to put in NN) in DeReKo'}
39# setzen_ca <- new("KorAPConnection", verbose = TRUE) %>%
40# collocationAnalysis(
41# "focus(in [tt/p=NN] {[tt/l=setzen]})",
42# leftContextSize = 1,
43# rightContextSize = 0,
44# addExamples = TRUE
45# )
46setzen_ca <- readRDS("in_NN_setzen.rds")
Marc K0f82bcc2023-06-28 07:56:35 +020047setzen_ca %>% show_lvc_table()
Marc Kupietz7c544c22023-06-28 06:35:12 +020048```
49
Marc Kupietz9087c772023-06-27 18:11:26 +020050## Romanian
51
Marc K71155df2023-06-28 08:00:25 +020052```{r pune-in, fig.cap='Collocation analysis for »pune în NN« (= to put in NN) in CoRoLa [@kupietz_neue_2022].'}
Marc K66264002023-06-27 13:29:38 +020053pune_in_ca_de = readRDS("pune_in_CA_de.rds")
Marc K62df54f2023-06-28 07:59:05 +020054pune_in_ca_de %>% show_lvc_table(10)
Marc K66264002023-06-27 13:29:38 +020055```
56
Marc Kupietz4e33d112023-06-27 18:11:46 +020057## Hungarian
Marc K66264002023-06-27 13:29:38 +020058
Marc Kupietzdf5fda12023-06-27 15:27:41 +020059```{r hoz, fig.cap='Collocation analysis for lemma hoz (=bring) with noun in sublative or illative – focus([hnc/p="FN.(SUB|ILL)"] {[hnc/l=hoz]})'}
60hoz1 <- readRDS("hoz.Rda")
61hoz1 %>%
62 mutate(collocation=sprintf('<a href="%s">%s</a>', webUIRequestUrl, example)) %>%
63 select(collocation, EN, logDice, pmi, ll) %>%
64 dplyr::arrange(desc(logDice)) %>%
65 dplyr::rename("LVC example" = "collocation") %>%
Marc Kupietz4e33d112023-06-27 18:11:46 +020066 dplyr::rename("en" = "EN") %>%
Marc Kupietzdf5fda12023-06-27 15:27:41 +020067# head(50) %>%
68 datatable(escape = F,
69 extensions = c('Buttons'),
70 rownames = FALSE,
71 options = list(
72 buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
73 pageLength = 10,
74 dom = ''
75 )) %>%
76 formatRound(columns=~logDice + pmi + ll, digits=1)
77```
Marc Kupietz686c4312023-06-23 15:41:44 +020078
Marc Kupietz064d46a2023-06-27 18:10:41 +020079## Polish
Marc Kupietz686c4312023-06-23 15:41:44 +020080
Marc K5a0229b2023-06-28 07:59:55 +020081```{r dac-ca-code, echo=TRUE, results = 'hide'}
Marc K9912b8a2023-06-28 08:14:40 +020082new("KorAPConnection",
Marc Kupietz064d46a2023-06-27 18:10:41 +020083 KorAPUrl = "https://korap.ids-mannheim.de/instance/nkjp1m-sgjp") %>%
Marc Kupietz686c4312023-06-23 15:41:44 +020084collocationAnalysis(
Marc Kupietz064d46a2023-06-27 18:10:41 +020085 'focus({[nkjp/l="da(wa)?ć"] []{,5}} [ud/p=NOUN])',
86 leftContextSize = 0,
87 rightContextSize = 1, # relative to { ... } in focus(),
88 addExamples = TRUE
89)
Marc Kupietz686c4312023-06-23 15:41:44 +020090```
Marc Kupietz2347bc92023-06-28 12:25:50 +020091```{r dac-ca-result, echo=FALSE, fig.cap = "Collocation analysis of da(wa)?ć (=give) + NOUN in NKJP1M-SGJP using UDPipe2 annotations [@straka_udpipe_2018] and RKorAPClient [@kupietz_rkorapclient_2020]"}
Marc K71155df2023-06-28 08:00:25 +020092dac <- readRDS("dac.rds")
93dac %>%
Marc Kupietz3a91b3b2023-06-27 18:09:35 +020094 mutate(example=str_replace(example, "(</mark>)(\\W?\\w+)", "\\2\\1")) %>%
Marc Kupietz686c4312023-06-23 15:41:44 +020095 show_table()
Marc Kupietz064d46a2023-06-27 18:10:41 +020096```
Marc K71155df2023-06-28 08:00:25 +020097
98# References
99