Blame - tex/references.bib - ICC/2023-07-20-ICC-ICLC10

blob: cae2002b28ccf7f6f6392bdfe2d3d35ca513faec [file] [log] [blame]

Marc Kupietz	bcde0b6	2023-06-14 14:22:35 +0200	[diff] [blame]	1
				2	@book{greenbaum_comparing_1996,
				3	address = {Oxford},
				4	title = {Comparing {English} {Worldwide}: {The} {International} {Corpus} of {English}},
				5	publisher = {Clarendon Press},
				6	editor = {Greenbaum, Sidney},
				7	year = {1996},
				8	}
				9
				10	@book{teich_cross-linguistic_2003,
				11	address = {Berlin},
				12	title = {Cross-{Linguistic} {Variation} in {System} and {Text}: {A} {Methodology} for the {Investigation} of {Translations} and {Comparable} {Texts}},
				13	publisher = {Mouton de Gruyter},
				14	author = {Teich, Elke},
				15	year = {2003},
				16	}
				17
				18	@inproceedings{diewald_korap_2016,
				19	address = {Portorož, Slovenia},
				20	title = {{KorAP} {Architecture} ― {Diving} in the {Deep} {Sea} of {Corpus} {Data}},
				21	url = {https://www.aclweb.org/anthology/L16-1569},
				22	booktitle = {Proceedings of the {Tenth} {International} {Conference} on {Language} {Resources} and {Evaluation} ({LREC}'16)},
				23	publisher = {European Language Resources Association (ELRA)},
				24	author = {Diewald, Nils and Hanl, Michael and Margaretha, Eliza and Bingel, Joachim and Kupietz, Marc and Bański, Piotr and Witt, Andreas},
				25	month = may,
				26	year = {2016},
				27	pages = {3586--3591},
				28	}
				29
				30	@inproceedings{borin_korp_2012,
				31	address = {Istanbul, Turkey},
				32	title = {Korp — the corpus infrastructure of {Språkbanken}},
				33	url = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/248_Paper.pdf},
				34	booktitle = {Proceedings of the {Eighth} {International} {Conference} on {Language} {Resources} and {Evaluation} ({LREC}'12)},
				35	publisher = {European Language Resources Association (ELRA)},
				36	author = {Borin, Lars and Forsberg, Markus and Roxendal, Johan},
				37	month = may,
				38	year = {2012},
				39	pages = {474--478},
				40	}
				41
				42	@inproceedings{machalek_kontext_2020,
				43	address = {Marseille, France},
				44	title = {{KonText}: {Advanced} and {Flexible} {Corpus} {Query} {Interface}},
				45	isbn = {979-10-95546-34-4},
				46	url = {https://www.aclweb.org/anthology/2020.lrec-1.865},
				47	language = {English},
				48	booktitle = {Proceedings of the 12th {Language} {Resources} and {Evaluation} {Conference}},
				49	publisher = {European Language Resources Association},
				50	author = {Machálek, Tomáš},
				51	month = may,
				52	year = {2020},
				53	pages = {7003--7008},
				54	}
				55
				56	@inproceedings{kirk_ice_2017,
				57	title = {From {ICE} to {ICC}: {The} new {International} {Comparable} {Corpus}},
				58	url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-62490},
				59	booktitle = {Proceedings of the {Workshop} on {Challenges} in the {Management} of {Large} {Corpora} and {Big} {Data} and {Natural} {Language} {Processing} ({CMLC}-5+{BigNLP}) 2017},
				60	publisher = {IDS},
				61	author = {Kirk, John and Čermáková, Anna},
Marc Kupietz	bcde0b6	2023-06-14 14:22:35 +0200	[diff] [blame]	62	year = {2017},
				63	pages = {7 -- 12},
				64	}
				65
				66	@article{kupietz_recent_2020,
				67	series = {Corpora and {Language} in {Use}},
				68	title = {Recent developments in the {European} {Reference} {Corpus} {EuReCo}},
				69	journal = {Translating and Comparing Languages: Corpus-based Insights. Selected Proceedings of the Fifth Using Corpora in Contrastive and Translation Studies Conference. Louvain-la-Neuve: Presses universitaires de Louvain},
				70	author = {Kupietz, Marc and Diewald, Nils and Trawiński, Beata and Cosma, Ruxandra and Cristea, Dan and Tufiş, Dan and Váradi, Tamás and Wöllstein, Angelika},
				71	year = {2020},
				72	pages = {257--273},
				73	}
				74
				75	@inproceedings{nivre_universal_2020,
				76	address = {Marseille, France},
				77	title = {Universal {Dependencies} v2: {An} {Evergrowing} {Multilingual} {Treebank} {Collection}},
				78	isbn = {979-10-95546-34-4},
				79	url = {https://www.aclweb.org/anthology/2020.lrec-1.497},
				80	language = {English},
				81	booktitle = {Proceedings of the 12th {Language} {Resources} and {Evaluation} {Conference}},
				82	publisher = {European Language Resources Association},
				83	author = {Nivre, Joakim and de Marneffe, Marie-Catherine and Ginter, Filip and Hajič, Jan and Manning, Christopher D. and Pyysalo, Sampo and Schuster, Sebastian and Tyers, Francis and Zeman, Daniel},
				84	month = may,
				85	year = {2020},
				86	pages = {4034--4043},
				87	}
				88
				89	@article{cermakova_international_2021,
				90	title = {The {International} {Comparable} {Corpus}: {Challenges} in building multilingual spoken and written comparable corpora},
				91	volume = {9},
				92	issn = {2243-4712},
				93	url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-105084},
				94	doi = {10.32714/ricl.09.01.06},
				95	abstract = {This paper reports on the efforts of twelve national teams in building the International Comparable Corpus (ICC; https://korpus.cz/icc) that will contain highly comparable datasets of spoken, written and electronic registers. The languages currently covered are Czech, Finnish, French, German, Irish, Italian, Norwegian, Polish, Slovak, Swedish and, more recently, Chinese, as well as English, which is considered to be the pivot language. The goal of the project is to provide much-needed data for contrastive corpus-based linguistics. The ICC corpus is committed to the idea of re-using existing multilingual resources as much as possible and the design is modelled, with various adjustments, on the International Corpus of English (ICE). As such, ICC will contain approximately the same balance of forty percent of written language and 60 percent of spoken language distributed across 27 different text types and contexts. A number of issues encountered by the project teams are discussed, ranging from copyright and data sustainability to technical advances in data distribution.},
				96	language = {en},
				97	number = {1},
				98	journal = {Research in Corpus Linguistics: Special issue "Challenges of combining structured and unstructured data in corpus development"},
				99	author = {Čermáková, Anna and Jantunen, Jarmo and Jauhiainen, Tommi and Kirk, John and Křen, Michal and Kupietz, Marc and Uí Dhonnchadha, Elaine},
				100	editor = {Säily, Tanja and Tyrkkö, Jukka},
				101	year = {2021},
				102	note = {Place: Murcia
				103	Publisher: Spanish Association for Corpus Linguistics},
				104	pages = {89 -- 103},
				105	}
				106
				107	@incollection{kupietz_building_2022,
				108	address = {Berlin},
				109	title = {Building paths to corpus data: {A} multi-level least effort and maximum return approach},
				110	url = {https://doi.org/10.1515/9783110767377-007},
				111	booktitle = {{CLARIN}. {The} {Infrastructure} for {Language} {Resources}.},
				112	publisher = {deGruyter},
				113	author = {Kupietz, Marc and Diewald, Nils and Margaretha, Eliza},
				114	editor = {Fišer, Darja and Witt, Andreas},
				115	year = {2022},
				116	note = {Section: number x},
				117	}
				118
				119	@article{cermakova_be_nodate,
				120	title = {‘{Be}’ verbs in a contrastive perspective: {The} case of být, be and være.},
				121	journal = {Nordic Journal of English Studies},
				122	author = {Čermáková, Anna and Ebeling, Jarle and Ebeling Oksefjell, Signe},
				123	}
				124
				125	@incollection{kupietz_neue_2022,
				126	address = {Bern},
				127	series = {Jahrbuch für {Internationale} {Germanistik} - {Beihefte} - 6},
				128	title = {Neue {Perspektiven} für kontrastive {Korpuslinguistik}: {Das} {Europäische} {Referenzkorpus} {EuReCo}},
				129	isbn = {978-3-0343-3660-4},
				130	abstract = {Dieser Beitrag beschreibt die Motivation und Ziele hinter der Initiative Europäisches Referenzkorpus EuReCo. Ausgehend von den Desiderata, die sich aufgrund der Defizite verfügbarer Forschungsdaten wie monolinguale Korpora, Parallelkorpora und Vergleichskorpora für den Sprachvergleich ergeben, werden die bisherigen und die laufenden Arbeiten im Rahmen von EuReCo präsentiert und anhand vergleichender deutsch-rumänischer Kookkurrenzanalysen neue Perspektiven für kontrastive Korpuslinguistik, die die EuReCo-Initiative öffnet, skizziert.},
				131	booktitle = {Wege der {Germanistik} in transkultureller {Perspektive}. {Akten} des {XIV}. {Kongresses} der {Internationalen} {Vereinigung} für {Germanistik} ({IVG}) ({Bd}. 6)},
				132	publisher = {Peter Lang},
				133	author = {Kupietz, Marc and Trawiński, Beata},
				134	editor = {Auteri, Laura and Barrale, Natascia and Di Bella, Arianna and Hoffmann, Sabine},
				135	year = {2022},
				136	keywords = {Kontrastive Linguistik, Korpus, Deutsch, Funktionsverbgefüge, Kookkurrenzanalyse, Korpuslinguistik, Rumänisch, Vergleichbare Korpora},
				137	pages = {417--439},
				138	}
				139
				140	@incollection{hardy_multi-dimensional_2015,
				141	address = {London},
				142	title = {Multi-{Dimensional} {Analysis} of {Academic} {Discourse}},
				143	isbn = {978-1-137-43173-8},
				144	url = {https://doi.org/10.1057/9781137431738_8},
				145	abstract = {This chapter provides an overview of multi-dimensional (MD) analysis and important findings in this area of research. This approach to the study of language variation and discourse communities is then exemplified through a case study of an MD analysis of student writing from the Michigan Corpus of Upper-level Student Papers (MICUSP), which includes four different levels of discourse community members: final-year undergraduate students, and first-, second-, and third-year graduate students. Although variation of MICUSP has been investigated according to discipline (Hardy and Römer, 2013) and paper type (Hardy and Friginal, 2014), it has not been investigated according to writer level.},
				146	booktitle = {Corpora and {Discourse} {Studies}: {Integrating} {Discourse} and {Corpora}},
				147	publisher = {Palgrave Macmillan UK},
				148	author = {Hardy, Jack A.},
				149	editor = {Baker, Paul and McEnery, Tony},
				150	year = {2015},
				151	doi = {10.1057/9781137431738_8},
				152	pages = {155--174},
				153	}
				154
				155	@article{biber_spoken_1986,
				156	title = {Spoken and {Written} {Textual} {Dimensions} in {English}: {Resolving} the {Contradictory} {Findings}},
				157	volume = {62},
				158	issn = {00978507, 15350665},
				159	url = {http://www.jstor.org/stable/414678},
				160	doi = {10.2307/414678},
				161	abstract = {[Although similarities and differences between speech and writing have often been studied, contradictory claims concerning the linguistic relationship between the two modes are still common. These contradictions can arise from basing global conclusions on restricted methodologies-such as assigning undue weight to individual linguistic features, or to choice of particular text samples and text types. The present study uses a 'multi-feature/multi-dimension' approach, which includes a broad range of linguistic features and text types in a single quantitative analysis, to provide a global description of similarities and differences among spoken/written text types in English. The distribution of 41 linguistic features in 545 text samples of approximately 2000 words each is subjected to factor analysis (a multivariate statistical technique). Three underlying textual dimensions are identified: Interactive vs. Edited Text, Abstract vs. Situated Content, and Reported vs. Immediate Style. To demonstrate the value of the multi-feature/multi-dimension approach, the specific findings of earlier studies are reconciled within the model proposed here.]},
				162	number = {2},
				163	urldate = {2023-04-30},
				164	journal = {Language},
				165	author = {Biber, Douglas},
				166	year = {1986},
				167	note = {Publisher: Linguistic Society of America},
				168	pages = {384--414},
				169	file = {Spoken and Written Textual Dimensions in English\: Resolving the Contradictory Findings:/home/kupietz/Zotero/storage/938FXDXC/biber1986.pdf.pdf:application/pdf},
				170	}
				171
				172	@inproceedings{straka_udpipe_2018,
				173	address = {Brussels, Belgium},
				174	title = {{UDPipe} 2.0 {Prototype} at {CoNLL} 2018 {UD} {Shared} {Task}},
				175	url = {https://www.aclweb.org/anthology/K18-2020},
				176	doi = {10.18653/v1/K18-2020},
				177	booktitle = {Proceedings of the {CoNLL} 2018 {Shared} {Task}: {Multilingual} {Parsing} from {Raw} {Text} to {Universal} {Dependencies}},
				178	publisher = {Association for Computational Linguistics},
				179	author = {Straka, Milan},
				180	month = oct,
				181	year = {2018},
				182	pages = {197--207},
				183	}
				184
				185	@inproceedings{Kupietz:Diewald:Hanl:Margaretha:2016,
				186	address = {Mannheim, Germany},
				187	series = {Proceedings of the {Methodenmesse} im {Rahmen} der {Jahrestagung} des {Instituts} für {Deutsche} {Sprache}},
				188	title = {Möglichkeiten der {Erforschung} grammatischer {Variation} mithilfe von {KorAP}, der neuen {Korpusanalyseplattform} des {IDS}},
				189	copyright = {All rights reserved},
				190	booktitle = {Grammatische {Variation}. {Empirische} {Zugänge} und theoretische {Modellierung}},
				191	publisher = {De Gruyter},
				192	author = {Kupietz, Marc and Diewald, Nils and Hanl, Michael and Margaretha, Eliza},
				193	year = {2016},
				194	pages = {319--329},
				195	file = {Kupietz et al. - 2016 - Möglichkeiten der Erforschung grammatischer Variat.pdf:/home/kupietz/Zotero/storage/8K4AI4T9/Kupietz et al. - 2016 - Möglichkeiten der Erforschung grammatischer Variat.pdf:application/pdf},
				196	}
				197
				198	@inproceedings{Banski:Fischer:Frick:Ketzan:Kupietz:Schnober:Schonefeld:Witt:2012,
				199	address = {Istanbul, Turkey},
				200	title = {The {New} {IDS} {Corpus} {Analysis} {Platform}: {Challenges} and {Prospects}},
				201	shorttitle = {The {New} {IDS} {Corpus} {Analysis} {Platform}},
				202	url = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/789_Paper.pdf},
				203	abstract = {The present article describes the first stage of the KorAP project, launched recently at the Institut für Deutsche Sprache (IDS) in Mannheim, Germany. The aim of this project is to develop an innovative corpus analysis platform to tackle the increasing demands of modern linguistic research. The platform will facilitate new linguistic findings by making it possible to manage and analyse primary data and annotations in the petabyte range, while at the same time allowing an undistorted view of the primary linguistic data, and thus fully satisfying the demands of a scientific tool. An additional important aim of the project is to make corpus data as openly accessible as possible in light of unavoidable legal restrictions, for instance through support for distributed virtual corpora, user-defined annotations and adaptable user interfaces, as well as interfaces and sandboxes for user-supplied analysis applications. We discuss our motivation for undertaking this endeavour and the challenges that face it. Next, we outline our software implementation plan and describe development to-date.},
				204	urldate = {2022-04-12},
				205	booktitle = {Proceedings of the {Eighth} {International} {Conference} on {Language} {Resources} and {Evaluation} ({LREC}'12)},
				206	publisher = {European Language Resources Association (ELRA)},
				207	author = {Bański, Piotr and Fischer, Peter M. and Frick, Elena and Ketzan, Erik and Kupietz, Marc and Schnober, Carsten and Schonefeld, Oliver and Witt, Andreas},
				208	month = may,
				209	year = {2012},
				210	pages = {2905--2911},
				211	file = {Full Text PDF:/home/kupietz/Zotero/storage/IC9U5T6F/Bański et al. - 2012 - The New IDS Corpus Analysis Platform Challenges a.pdf:application/pdf},
				212	}
Marc Kupietz	f1dd910	2023-06-26 20:36:08 +0200	[diff] [blame]	213
				214	@inproceedings{kupietz_rkorapclient_2020,
				215	address = {Marseille, France},
				216	title = {{RKorAPClient}: {An} {R} {Package} for {Accessing} the {German} {Reference} {Corpus} {DeReKo} via {KorAP}},
				217	isbn = {979-10-95546-34-4},
				218	url = {https://www.aclweb.org/anthology/2020.lrec-1.867},
				219	language = {English},
				220	booktitle = {Proceedings of the 12th {Language} {Resources} and {Evaluation} {Conference}},
				221	publisher = {European Language Resources Association},
				222	author = {Kupietz, Marc and Diewald, Nils and Margaretha, Eliza},
				223	month = may,
				224	year = {2020},
				225	pages = {7015--7021},
				226	}
				227
Marc Kupietz	48a4134	2023-06-28 18:16:54 +0200	[diff] [blame]	228	@article{MargarethaLuengen2014,
				229	author = {Eliza Margaretha and Harald Lüngen},
				230	title = {Building linguistic corpora from Wikipedia articles and discussions},
				231	journal = {Journal of Language Technology and Computational Linguistics. Special issue on building and annotating corpora of computer-mediated communication. Issues and challenges at the interface between computational and corpus linguistics},
				232	volume = {29},
				233	number = {2},
				234	editor = {Michael Beißwenger and Angelika Storrer and Nelleke Oostdijk and Henk van den Heuvel},
				235	url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-33306},
				236	pages = {59 -- 82},
				237	year = {2014},
				238	abstract = {Wikipedia is a valuable resource, useful as a lingustic corpus or a dataset for many kinds of research. We built corpora from Wikipedia articles and talk pages in the I5 format, a TEI customisation used in the German Reference Corpus (Deutsches Referenzkorpus - DeReKo). Our approach is a two-stage conversion combining parsing using the Sweble parser, and transformation using XSLT stylesheets. The conversion approach is able to successfully generate rich and valid corpora regardless of languages. We also introduce a method to segment user contributions in talk pages into postings.},
				239	language = {de}
				240	}