Add Wiki-Corp reference Change-Id: I31ff16b018aa114e24c246b858addac26796ea43

commit: 48a4134c64faf5a73c0f8866695dc121e8f47d3f [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Wed Jun 28 18:16:54 2023 +0200
committer: Marc Kupietz <kupietz@ids-mannheim.de> Wed Jun 28 18:16:54 2023 +0200
tree: 6d6efd3bd773734e9201d78b9b1b2642d5dc5451
parent: 49a7c1807c76d6c889faefe90eab1b68ce8f340f [diff]
diff --git a/R/poster.Rmd b/R/poster.Rmd
index 7b1df91..b42cbcb 100644
--- a/R/poster.Rmd
+++ b/R/poster.Rmd

@@ -182,8 +182,7 @@
 ## Results
 
 * for English the query for *take* + NOUN (as direct right neighbour) yields 10 different pairs with a minimum frequency of 2 (see  Figure \@ref(fig:take-icc))
-  * based English Wikipedia (2015) the query yields 139 pairs (log-dice-threshold: 2.0) with about 20 false positives
-* for ICC German with DeReKo as background corpus, the ratio of true positive LVCs is 10/80 
+  * based on English Wikipedia [2015 snapshot, see @MargarethaLuengen2014] the query yields 139 pairs (log-dice-threshold: 2.0) with 44 false positives
 
 # Summary & Outlook
 

diff --git a/tex/references.bib b/tex/references.bib
index 6019cd5..cae2002 100644
--- a/tex/references.bib
+++ b/tex/references.bib

@@ -59,7 +59,6 @@
 	booktitle = {Proceedings of the {Workshop} on {Challenges} in the {Management} of {Large} {Corpora} and {Big} {Data} and {Natural} {Language} {Processing} ({CMLC}-5+{BigNLP}) 2017},
 	publisher = {IDS},
 	author = {Kirk, John and Čermáková, Anna},
-	editor = {Bański, Piotr and Kupietz, Marc and Lüngen, Harald and Rayson, Paul and Biber, Hanno and Breiteneder, Evelyn and Clematide, Simon and Mariani, John and Stevenson, Mark and Sick, Theresa},
 	year = {2017},
 	pages = {7 -- 12},
 }
@@ -226,3 +225,16 @@
         pages = {7015--7021},
 }
 
+@article{MargarethaLuengen2014,
+  author    = {Eliza Margaretha and Harald Lüngen},
+  title     = {Building linguistic corpora from Wikipedia articles and discussions},
+  journal   = {Journal of Language Technology and Computational Linguistics. Special issue on building and annotating corpora of computer-mediated communication. Issues and challenges at the interface between computational and corpus linguistics},
+  volume    = {29},
+  number    = {2},
+  editor    = {Michael Beißwenger and Angelika Storrer and Nelleke Oostdijk and Henk van den Heuvel},
+  url       = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-33306},
+  pages     = {59 -- 82},
+  year      = {2014},
+  abstract  = {Wikipedia is a valuable resource, useful as a lingustic corpus or a dataset for many kinds of research. We built corpora from Wikipedia articles and talk pages in the I5 format, a TEI customisation used in the German Reference Corpus (Deutsches Referenzkorpus - DeReKo). Our approach is a two-stage conversion combining parsing using the Sweble parser, and transformation using XSLT stylesheets. The conversion approach is able to successfully generate rich and valid corpora regardless of languages. We also introduce a method to segment user contributions in talk pages into postings.},
+  language  = {de}
+}
commit	48a4134c64faf5a73c0f8866695dc121e8f47d3f	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Wed Jun 28 18:16:54 2023 +0200
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Wed Jun 28 18:16:54 2023 +0200
tree	6d6efd3bd773734e9201d78b9b1b2642d5dc5451
parent	49a7c1807c76d6c889faefe90eab1b68ce8f340f [diff]