| --- |
| title: "Applying the newly extended European Reference Corpus EuReCo" |
| subtitle: "Pilot studies of light-verb constructions in German, Romanian, Hungarian and Polish" |
| author: |
| - name: Piotr Bański |
| - name: Nils Diewald |
| - name: Marc Kupietz |
| - name: Beata Trawiński |
| affiliation: |
| address: IDS Mannheim |
| column_numbers: 2 |
| contact: |
| name: Piotr Bański |
| department: Digital Linguistics / Grammar |
| email: banski@ids-mannheim.de |
| website: "https://www.ids-mannheim.de/" |
| qrlink: > |
| `r posterdown::qrlink("https://korap.ids-mannheim.de/instance/nkjp1m-sgjp", logo="kalamar_wbg.svg")` |
| output: |
| posterdown::posterdown_ids: |
| self_contained: false |
| keep_md: true |
| |
| bibliography: references.bib |
| csl: "https://raw.githubusercontent.com/ICLC-10/Zotero/master/styles/ICLC-10.csl" |
| --- |
| |
| ```{r setup, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE} |
| knitr::opts_chunk$set(dev = 'svg', echo = FALSE, warning = FALSE, message = FALSE) |
| source("common.R") |
| ``` |
| # Pilot study: LVC detection |
| |
| Identification of Light Verb Constructions using collocation analysis. |
| |
| ## German |
| |
| ```{r setzen-in, fig.cap='Collocation analysis for »in … setzen« (= to put in NN) in DeReKo'} |
| # setzen_ca <- new("KorAPConnection", verbose = TRUE) %>% |
| # collocationAnalysis( |
| # "focus(in [tt/p=NN] {[tt/l=setzen]})", |
| # leftContextSize = 1, |
| # rightContextSize = 0, |
| # addExamples = TRUE |
| # ) |
| setzen_ca <- readRDS("in_NN_setzen.rds") |
| setzen_ca %>% show_lvc_table() |
| ``` |
| |
| ## Romanian |
| |
| ```{r pune-in, fig.cap='Collocation analysis for »pune în NN« (= to put in NN) in CoRoLa [@kupietz_neue_2022].'} |
| pune_in_ca_de = readRDS("pune_in_CA_de.rds") |
| pune_in_ca_de %>% show_lvc_table(10) |
| ``` |
| |
| ## Hungarian |
| |
| ```{r hoz, fig.cap='Collocation analysis for lemma hoz (=bring) with noun in sublative or illative – focus([hnc/p="FN.(SUB|ILL)"] {[hnc/l=hoz]})'} |
| hoz1 <- readRDS("hoz.Rda") |
| hoz1 %>% |
| mutate(collocation=sprintf('<a href="%s">%s</a>', webUIRequestUrl, example)) %>% |
| select(collocation, EN, logDice, pmi, ll) %>% |
| dplyr::arrange(desc(logDice)) %>% |
| dplyr::rename("LVC example" = "collocation") %>% |
| dplyr::rename("en" = "EN") %>% |
| # head(50) %>% |
| datatable(escape = F, |
| extensions = c('Buttons'), |
| rownames = FALSE, |
| options = list( |
| buttons = c('copy', 'csv', 'excel', 'pdf', 'print'), |
| pageLength = 10, |
| dom = '' |
| )) %>% |
| formatRound(columns=~logDice + pmi + ll, digits=1) |
| ``` |
| |
| ## Polish |
| |
| ```{r dac-ca-code, echo=TRUE, results = 'hide'} |
| new("KorAPConnection", |
| KorAPUrl = "https://korap.ids-mannheim.de/instance/nkjp1m-sgjp") %>% |
| collocationAnalysis( |
| 'focus({[nkjp/l="da(wa)?ć"] []{,5}} [ud/p=NOUN])', |
| leftContextSize = 0, |
| rightContextSize = 1, # relative to { ... } in focus(), |
| addExamples = TRUE |
| ) |
| ``` |
| ```{r dac-ca-result, echo=FALSE, fig.cap = "Collocation analysis of da(wa)?ć (=give) + NOUN in NKJP1M-SGJP using UDPipe2 annotations [@straka_udpipe_2018] and RKorAPClient [@kupietz_rkorapclient_2020]"} |
| dac <- readRDS("dac.rds") |
| dac %>% |
| mutate(example=str_replace(example, "(</mark>)(\\W?\\w+)", "\\2\\1")) %>% |
| show_table() |
| ``` |
| |
| # References |
| |