blob: 30c03f662d2ef2eb26672ce184dc9c7a010c94ce [file] [log] [blame]
Marc Kupietz686c4312023-06-23 15:41:44 +02001---
2title: "Assembling EuReCo for Contrastive Research"
3subtitle: "The Polish Piece"
4author:
5 - name: Piotr Bański
6 - name: Nils Diewald
7 - name: Marc Kupietz
8 - name: Beata Trawiński
9affiliation:
10 address: IDS Mannheim
11column_numbers: 2
12contact:
13 name: Piotr Bański
Marc K4853d6a2023-06-24 18:27:47 +020014 department: Digital Linguistics / Grammar
Marc Kupietz686c4312023-06-23 15:41:44 +020015 email: banski@ids-mannheim.de
Marc K4853d6a2023-06-24 18:27:47 +020016 website: "https://www.ids-mannheim.de/"
17 qrlink: >
Marc Kf8c3ccd2023-06-27 13:30:08 +020018 `r posterdown::qrlink("https://korap.ids-mannheim.de/instance/nkjp1m-sgjp", logo="kalamar_wbg.svg")`
Marc Kupietz686c4312023-06-23 15:41:44 +020019output:
20 posterdown::posterdown_ids:
21 self_contained: false
22 keep_md: true
23
24bibliography: references.bib
25csl: "https://raw.githubusercontent.com/ICLC-10/Zotero/master/styles/ICLC-10.csl"
26---
27
28```{r setup, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
29knitr::opts_chunk$set(dev = 'svg', echo = FALSE, warning = FALSE)
30source("common.R")
31```
Marc K66264002023-06-27 13:29:38 +020032# Romanian
33
34## CA in CoRoLa for »pune în NN« (= to put in NN)
35
36```{r pune_in}
37pune_in_ca_de = readRDS("pune_in_CA_de.rds")
38pune_in_ca_de %>% show_simple_table(10)
39```
40
Marc Kupietzdf5fda12023-06-27 15:27:41 +020041# Hungarian
Marc K66264002023-06-27 13:29:38 +020042
Marc Kupietzdf5fda12023-06-27 15:27:41 +020043```{r hoz, fig.cap='Collocation analysis for lemma hoz (=bring) with noun in sublative or illative – focus([hnc/p="FN.(SUB|ILL)"] {[hnc/l=hoz]})'}
44hoz1 <- readRDS("hoz.Rda")
45hoz1 %>%
46 mutate(collocation=sprintf('<a href="%s">%s</a>', webUIRequestUrl, example)) %>%
47 select(collocation, EN, logDice, pmi, ll) %>%
48 dplyr::arrange(desc(logDice)) %>%
49 dplyr::rename("LVC example" = "collocation") %>%
50 dplyr::rename("EN (DeepL)" = "EN") %>%
51# head(50) %>%
52 datatable(escape = F,
53 extensions = c('Buttons'),
54 rownames = FALSE,
55 options = list(
56 buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
57 pageLength = 10,
58 dom = ''
59 )) %>%
60 formatRound(columns=~logDice + pmi + ll, digits=1)
61```
Marc Kupietz686c4312023-06-23 15:41:44 +020062
Marc Kupietzdf5fda12023-06-27 15:27:41 +020063# Polish
Marc Kupietz686c4312023-06-23 15:41:44 +020064
Marc Kupietzdf5fda12023-06-27 15:27:41 +020065## Plain collocation analysis without restriction to NN/subst for da(wa)?ć
Marc Kupietz686c4312023-06-23 15:41:44 +020066
67```{r dac_simple, echo=TRUE}
68collocationAnalysis(
69 nkjp,
70 '[nkjp/l="da(wa)?ć"]',
71 leftContextSize = 5,
72 rightContextSize = 5,
73 minOccur = 5
74 ) %>%
75 show_simple_table()
76```
77
78# Identification of Light Verb Constructions
79using collocation analysis
80
81## da(wa)?ć
82
83```{r dac, echo=TRUE}
84collocationAnalysis(
85 nkjp,
86 'focus({[nkjp/l="da(wa)?ć"] []{,5}} [nkjp/p=subst])',
87 leftContextSize = 0,
88 rightContextSize = 1, # relative to { ... } in focus(),
89 minOccur = 5,
90 addExamples = TRUE
91 ) %>%
92 show_table()
93```
94
95### (Z)robić
96
97```{r robic, echo=TRUE}
98collocationAnalysis(
99 nkjp,
100 'focus({[nkjp/l="z?robić"] []{,5}} [nkjp/p=subst])',
101 leftContextSize = 0,
102 rightContextSize = 1, # relative to { ... } in focus(),
103 minOccur = 5,
104 addExamples = TRUE
105 ) %>%
106 show_table()
107```
108
109```{r brac, echo=TRUE}
110collocationAnalysis(
111 nkjp,
112 'focus({[nkjp/l="brać" | nkjp/l="wziąć"] []{,5}} [nkjp/p=subst])',
113 leftContextSize = 0,
114 rightContextSize = 1, # relative to { ... } in focus(),
115 minOccur = 5,
116 addExamples = TRUE
117 ) %>%
Marc Kupietz3a91b3b2023-06-27 18:09:35 +0200118 mutate(example=str_replace(example, "(</mark>)(\\W?\\w+)", "\\2\\1")) %>%
Marc Kupietz686c4312023-06-23 15:41:44 +0200119 show_table()