blob: e9db38567dbe512ebaafd0ace80ef6a561d20909 [file] [log] [blame]
---
title: "Assembling EuReCo for Contrastive Research"
subtitle: "The Polish Piece"
author:
- name: Piotr Bański
- name: Nils Diewald
- name: Marc Kupietz
- name: Beata Trawiński
affiliation:
address: IDS Mannheim
column_numbers: 2
contact:
name: Piotr Bański
department: Digital Linguistics / Grammar
email: banski@ids-mannheim.de
website: "https://www.ids-mannheim.de/"
qrlink: >
`r posterdown::qrlink("https://korap.ids-mannheim.de/instance/nkjp1m-sgjp", logo="kalamar_wbg.svg")`
output:
posterdown::posterdown_ids:
self_contained: false
keep_md: true
bibliography: references.bib
csl: "https://raw.githubusercontent.com/ICLC-10/Zotero/master/styles/ICLC-10.csl"
---
```{r setup, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
knitr::opts_chunk$set(dev = 'svg', echo = FALSE, warning = FALSE, message = FALSE)
source("common.R")
```
# Pilot study: LVC detetction
Identification of Light Verb Constructions using collocation analysis.
## German
```{r setzen-in, fig.cap='Collocation analysis for »in … setzen« (= to put in NN) in DeReKo'}
# setzen_ca <- new("KorAPConnection", verbose = TRUE) %>%
# collocationAnalysis(
# "focus(in [tt/p=NN] {[tt/l=setzen]})",
# leftContextSize = 1,
# rightContextSize = 0,
# addExamples = TRUE
# )
setzen_ca <- readRDS("in_NN_setzen.rds")
setzen_ca %>% show_lvc_table()
```
## Romanian
```{r pune-in, fig.cap='Collocation analysis for »pune în NN« (= to put in NN) in CoRoLa [@kupietz_neue_2022].'}
pune_in_ca_de = readRDS("pune_in_CA_de.rds")
pune_in_ca_de %>% show_lvc_table(10)
```
## Hungarian
```{r hoz, fig.cap='Collocation analysis for lemma hoz (=bring) with noun in sublative or illative – focus([hnc/p="FN.(SUB|ILL)"] {[hnc/l=hoz]})'}
hoz1 <- readRDS("hoz.Rda")
hoz1 %>%
mutate(collocation=sprintf('<a href="%s">%s</a>', webUIRequestUrl, example)) %>%
select(collocation, EN, logDice, pmi, ll) %>%
dplyr::arrange(desc(logDice)) %>%
dplyr::rename("LVC example" = "collocation") %>%
dplyr::rename("en" = "EN") %>%
# head(50) %>%
datatable(escape = F,
extensions = c('Buttons'),
rownames = FALSE,
options = list(
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
pageLength = 10,
dom = ''
)) %>%
formatRound(columns=~logDice + pmi + ll, digits=1)
```
## Polish
```{r dac-ca-code, echo=TRUE, results = 'hide'}
new("KorAPConnection",
KorAPUrl = "https://korap.ids-mannheim.de/instance/nkjp1m-sgjp") %>%
collocationAnalysis(
'focus({[nkjp/l="da(wa)?ć"] []{,5}} [ud/p=NOUN])',
leftContextSize = 0,
rightContextSize = 1, # relative to { ... } in focus(),
addExamples = TRUE
)
```
```{r dac-ca-result, echo=FALSE, fig.cap = "Collocation analysis of da(wa)?ć + NOUN (=give) in NKJP1M-SGJP using UDPipe2 annotations [@straka_udpipe_2018] and RKorAPClient [@kupietz_rkorapclient_2020]"}
dac <- readRDS("dac.rds")
dac %>%
mutate(example=str_replace(example, "(</mark>)(\\W?\\w+)", "\\2\\1")) %>%
show_table()
```
# References