blob: 8d390a6e6a247823d21b85d243c865c640952411 [file] [log] [blame]
Marc Kupietz686c4312023-06-23 15:41:44 +02001---
2title: "Assembling EuReCo for Contrastive Research"
3subtitle: "The Polish Piece"
4author:
5 - name: Piotr Bański
6 - name: Nils Diewald
7 - name: Marc Kupietz
8 - name: Beata Trawiński
9affiliation:
10 address: IDS Mannheim
11column_numbers: 2
12contact:
13 name: Piotr Bański
Marc K4853d6a2023-06-24 18:27:47 +020014 department: Digital Linguistics / Grammar
Marc Kupietz686c4312023-06-23 15:41:44 +020015 email: banski@ids-mannheim.de
Marc K4853d6a2023-06-24 18:27:47 +020016 website: "https://www.ids-mannheim.de/"
17 qrlink: >
Marc Kf8c3ccd2023-06-27 13:30:08 +020018 `r posterdown::qrlink("https://korap.ids-mannheim.de/instance/nkjp1m-sgjp", logo="kalamar_wbg.svg")`
Marc Kupietz686c4312023-06-23 15:41:44 +020019output:
20 posterdown::posterdown_ids:
21 self_contained: false
22 keep_md: true
23
24bibliography: references.bib
25csl: "https://raw.githubusercontent.com/ICLC-10/Zotero/master/styles/ICLC-10.csl"
26---
27
28```{r setup, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
Marc Kupietz4e33d112023-06-27 18:11:46 +020029knitr::opts_chunk$set(dev = 'svg', echo = FALSE, warning = FALSE, message = FALSE)
Marc Kupietz686c4312023-06-23 15:41:44 +020030source("common.R")
31```
Marc Kupietz9087c772023-06-27 18:11:26 +020032# Pilot study: LVC detetction
Marc K66264002023-06-27 13:29:38 +020033
Marc Kupietz9087c772023-06-27 18:11:26 +020034Identification of Light Verb Constructions using collocation analysis.
Marc K66264002023-06-27 13:29:38 +020035
Marc Kupietz7c544c22023-06-28 06:35:12 +020036## German
37
38```{r setzen-in, fig.cap='Collocation analysis for »in … setzen« (= to put in NN) in DeReKo'}
39# setzen_ca <- new("KorAPConnection", verbose = TRUE) %>%
40# collocationAnalysis(
41# "focus(in [tt/p=NN] {[tt/l=setzen]})",
42# leftContextSize = 1,
43# rightContextSize = 0,
44# addExamples = TRUE
45# )
46setzen_ca <- readRDS("in_NN_setzen.rds")
47setzen_ca %>% show_table()
48```
49
Marc Kupietz9087c772023-06-27 18:11:26 +020050## Romanian
51
52```{r pune-in, fig.cap='Collocation analysis for »pune în NN« (= to put in NN) in CoRoLa.'}
Marc K66264002023-06-27 13:29:38 +020053pune_in_ca_de = readRDS("pune_in_CA_de.rds")
Marc Kupietz9087c772023-06-27 18:11:26 +020054pune_in_ca_de %>% show_simple_table_tr(10)
Marc K66264002023-06-27 13:29:38 +020055```
56
Marc Kupietz4e33d112023-06-27 18:11:46 +020057## Hungarian
Marc K66264002023-06-27 13:29:38 +020058
Marc Kupietzdf5fda12023-06-27 15:27:41 +020059```{r hoz, fig.cap='Collocation analysis for lemma hoz (=bring) with noun in sublative or illative – focus([hnc/p="FN.(SUB|ILL)"] {[hnc/l=hoz]})'}
60hoz1 <- readRDS("hoz.Rda")
61hoz1 %>%
62 mutate(collocation=sprintf('<a href="%s">%s</a>', webUIRequestUrl, example)) %>%
63 select(collocation, EN, logDice, pmi, ll) %>%
64 dplyr::arrange(desc(logDice)) %>%
65 dplyr::rename("LVC example" = "collocation") %>%
Marc Kupietz4e33d112023-06-27 18:11:46 +020066 dplyr::rename("en" = "EN") %>%
Marc Kupietzdf5fda12023-06-27 15:27:41 +020067# head(50) %>%
68 datatable(escape = F,
69 extensions = c('Buttons'),
70 rownames = FALSE,
71 options = list(
72 buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
73 pageLength = 10,
74 dom = ''
75 )) %>%
76 formatRound(columns=~logDice + pmi + ll, digits=1)
77```
Marc Kupietz686c4312023-06-23 15:41:44 +020078
Marc Kupietz064d46a2023-06-27 18:10:41 +020079## Polish
Marc Kupietz686c4312023-06-23 15:41:44 +020080
Marc Kupietz686c4312023-06-23 15:41:44 +020081
Marc Kupietz064d46a2023-06-27 18:10:41 +020082### da(wa)?ć + NOUN
83
84```{r dac_ca_code, echo=TRUE, results = 'hide'}
85new("KorAPConnection",
86 KorAPUrl = "https://korap.ids-mannheim.de/instance/nkjp1m-sgjp") %>%
Marc Kupietz686c4312023-06-23 15:41:44 +020087collocationAnalysis(
Marc Kupietz064d46a2023-06-27 18:10:41 +020088 'focus({[nkjp/l="da(wa)?ć"] []{,5}} [ud/p=NOUN])',
89 leftContextSize = 0,
90 rightContextSize = 1, # relative to { ... } in focus(),
91 addExamples = TRUE
92)
Marc Kupietz686c4312023-06-23 15:41:44 +020093```
Marc Kupietz064d46a2023-06-27 18:10:41 +020094```{r dac_ca_result, echo=FALSE}
Marc Kupietz686c4312023-06-23 15:41:44 +020095collocationAnalysis(
96 nkjp,
Marc Kupietz064d46a2023-06-27 18:10:41 +020097 'focus({[nkjp/l="da(wa)?ć"] []{,5}} [ud/p=NOUN])',
Marc Kupietz686c4312023-06-23 15:41:44 +020098 leftContextSize = 0,
99 rightContextSize = 1, # relative to { ... } in focus(),
Marc Kupietz686c4312023-06-23 15:41:44 +0200100 addExamples = TRUE
101 ) %>%
Marc Kupietz3a91b3b2023-06-27 18:09:35 +0200102 mutate(example=str_replace(example, "(</mark>)(\\W?\\w+)", "\\2\\1")) %>%
Marc Kupietz686c4312023-06-23 15:41:44 +0200103 show_table()
Marc Kupietz064d46a2023-06-27 18:10:41 +0200104```