blob: 6aaeeb5c9d666f6b1a45d8a4e26f8525966b2420 [file] [log] [blame]
Marc Kupietz686c4312023-06-23 15:41:44 +02001---
2title: "Assembling EuReCo for Contrastive Research"
3subtitle: "The Polish Piece"
4author:
5 - name: Piotr Bański
6 - name: Nils Diewald
7 - name: Marc Kupietz
8 - name: Beata Trawiński
9affiliation:
10 address: IDS Mannheim
11column_numbers: 2
12contact:
13 name: Piotr Bański
Marc K4853d6a2023-06-24 18:27:47 +020014 department: Digital Linguistics / Grammar
Marc Kupietz686c4312023-06-23 15:41:44 +020015 email: banski@ids-mannheim.de
Marc K4853d6a2023-06-24 18:27:47 +020016 website: "https://www.ids-mannheim.de/"
17 qrlink: >
Marc Kf8c3ccd2023-06-27 13:30:08 +020018 `r posterdown::qrlink("https://korap.ids-mannheim.de/instance/nkjp1m-sgjp", logo="kalamar_wbg.svg")`
Marc Kupietz686c4312023-06-23 15:41:44 +020019output:
20 posterdown::posterdown_ids:
21 self_contained: false
22 keep_md: true
23
24bibliography: references.bib
25csl: "https://raw.githubusercontent.com/ICLC-10/Zotero/master/styles/ICLC-10.csl"
26---
27
28```{r setup, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
Marc Kupietz4e33d112023-06-27 18:11:46 +020029knitr::opts_chunk$set(dev = 'svg', echo = FALSE, warning = FALSE, message = FALSE)
Marc Kupietz686c4312023-06-23 15:41:44 +020030source("common.R")
31```
Marc Kupietz9087c772023-06-27 18:11:26 +020032# Pilot study: LVC detetction
Marc K66264002023-06-27 13:29:38 +020033
Marc Kupietz9087c772023-06-27 18:11:26 +020034Identification of Light Verb Constructions using collocation analysis.
Marc K66264002023-06-27 13:29:38 +020035
Marc Kupietz9087c772023-06-27 18:11:26 +020036## Romanian
37
38```{r pune-in, fig.cap='Collocation analysis for »pune în NN« (= to put in NN) in CoRoLa.'}
Marc K66264002023-06-27 13:29:38 +020039pune_in_ca_de = readRDS("pune_in_CA_de.rds")
Marc Kupietz9087c772023-06-27 18:11:26 +020040pune_in_ca_de %>% show_simple_table_tr(10)
Marc K66264002023-06-27 13:29:38 +020041```
42
Marc Kupietz4e33d112023-06-27 18:11:46 +020043## Hungarian
Marc K66264002023-06-27 13:29:38 +020044
Marc Kupietzdf5fda12023-06-27 15:27:41 +020045```{r hoz, fig.cap='Collocation analysis for lemma hoz (=bring) with noun in sublative or illative – focus([hnc/p="FN.(SUB|ILL)"] {[hnc/l=hoz]})'}
46hoz1 <- readRDS("hoz.Rda")
47hoz1 %>%
48 mutate(collocation=sprintf('<a href="%s">%s</a>', webUIRequestUrl, example)) %>%
49 select(collocation, EN, logDice, pmi, ll) %>%
50 dplyr::arrange(desc(logDice)) %>%
51 dplyr::rename("LVC example" = "collocation") %>%
Marc Kupietz4e33d112023-06-27 18:11:46 +020052 dplyr::rename("en" = "EN") %>%
Marc Kupietzdf5fda12023-06-27 15:27:41 +020053# head(50) %>%
54 datatable(escape = F,
55 extensions = c('Buttons'),
56 rownames = FALSE,
57 options = list(
58 buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
59 pageLength = 10,
60 dom = ''
61 )) %>%
62 formatRound(columns=~logDice + pmi + ll, digits=1)
63```
Marc Kupietz686c4312023-06-23 15:41:44 +020064
Marc Kupietz064d46a2023-06-27 18:10:41 +020065## Polish
Marc Kupietz686c4312023-06-23 15:41:44 +020066
Marc Kupietz686c4312023-06-23 15:41:44 +020067
Marc Kupietz064d46a2023-06-27 18:10:41 +020068### da(wa)?ć + NOUN
69
70```{r dac_ca_code, echo=TRUE, results = 'hide'}
71new("KorAPConnection",
72 KorAPUrl = "https://korap.ids-mannheim.de/instance/nkjp1m-sgjp") %>%
Marc Kupietz686c4312023-06-23 15:41:44 +020073collocationAnalysis(
Marc Kupietz064d46a2023-06-27 18:10:41 +020074 'focus({[nkjp/l="da(wa)?ć"] []{,5}} [ud/p=NOUN])',
75 leftContextSize = 0,
76 rightContextSize = 1, # relative to { ... } in focus(),
77 addExamples = TRUE
78)
Marc Kupietz686c4312023-06-23 15:41:44 +020079```
Marc Kupietz064d46a2023-06-27 18:10:41 +020080```{r dac_ca_result, echo=FALSE}
Marc Kupietz686c4312023-06-23 15:41:44 +020081collocationAnalysis(
82 nkjp,
Marc Kupietz064d46a2023-06-27 18:10:41 +020083 'focus({[nkjp/l="da(wa)?ć"] []{,5}} [ud/p=NOUN])',
Marc Kupietz686c4312023-06-23 15:41:44 +020084 leftContextSize = 0,
85 rightContextSize = 1, # relative to { ... } in focus(),
Marc Kupietz686c4312023-06-23 15:41:44 +020086 addExamples = TRUE
87 ) %>%
Marc Kupietz3a91b3b2023-06-27 18:09:35 +020088 mutate(example=str_replace(example, "(</mark>)(\\W?\\w+)", "\\2\\1")) %>%
Marc Kupietz686c4312023-06-23 15:41:44 +020089 show_table()
Marc Kupietz064d46a2023-06-27 18:10:41 +020090```