Blame - tex/references.bib - ICC/2023-07-20-ICC-ICLC10

blob: b09be9be6635bc6aa076cedd99babe05d50bd3f0 [file] [log] [blame]

Marc Kupietz	bcde0b6	2023-06-14 14:22:35 +0200	[diff] [blame]	1
				2	@book{greenbaum_comparing_1996,
				3	address = {Oxford},
				4	title = {Comparing {English} {Worldwide}: {The} {International} {Corpus} of {English}},
				5	publisher = {Clarendon Press},
				6	editor = {Greenbaum, Sidney},
				7	year = {1996},
				8	}
				9
				10	@book{teich_cross-linguistic_2003,
				11	address = {Berlin},
				12	title = {Cross-{Linguistic} {Variation} in {System} and {Text}: {A} {Methodology} for the {Investigation} of {Translations} and {Comparable} {Texts}},
				13	publisher = {Mouton de Gruyter},
				14	author = {Teich, Elke},
				15	year = {2003},
				16	}
				17
				18	@inproceedings{diewald_korap_2016,
				19	address = {Portorož, Slovenia},
				20	title = {{KorAP} {Architecture} ― {Diving} in the {Deep} {Sea} of {Corpus} {Data}},
Marc Kupietz	333f99e	2023-06-29 16:00:51 +0200	[diff] [blame^]	21	url = {https://aclanthology.org/L16-1569/},
Marc Kupietz	bcde0b6	2023-06-14 14:22:35 +0200	[diff] [blame]	22	booktitle = {Proceedings of the {Tenth} {International} {Conference} on {Language} {Resources} and {Evaluation} ({LREC}'16)},
Marc Kupietz	333f99e	2023-06-29 16:00:51 +0200	[diff] [blame^]	23	address = "Portorož / Paris",
				24	publisher = {ELRA},
Marc Kupietz	bcde0b6	2023-06-14 14:22:35 +0200	[diff] [blame]	25	author = {Diewald, Nils and Hanl, Michael and Margaretha, Eliza and Bingel, Joachim and Kupietz, Marc and Bański, Piotr and Witt, Andreas},
				26	month = may,
				27	year = {2016},
				28	pages = {3586--3591},
				29	}
				30
				31	@inproceedings{borin_korp_2012,
				32	address = {Istanbul, Turkey},
				33	title = {Korp — the corpus infrastructure of {Språkbanken}},
				34	url = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/248_Paper.pdf},
				35	booktitle = {Proceedings of the {Eighth} {International} {Conference} on {Language} {Resources} and {Evaluation} ({LREC}'12)},
				36	publisher = {European Language Resources Association (ELRA)},
				37	author = {Borin, Lars and Forsberg, Markus and Roxendal, Johan},
				38	month = may,
				39	year = {2012},
				40	pages = {474--478},
				41	}
				42
				43	@inproceedings{machalek_kontext_2020,
				44	address = {Marseille, France},
				45	title = {{KonText}: {Advanced} and {Flexible} {Corpus} {Query} {Interface}},
				46	isbn = {979-10-95546-34-4},
				47	url = {https://www.aclweb.org/anthology/2020.lrec-1.865},
				48	language = {English},
				49	booktitle = {Proceedings of the 12th {Language} {Resources} and {Evaluation} {Conference}},
				50	publisher = {European Language Resources Association},
				51	author = {Machálek, Tomáš},
				52	month = may,
				53	year = {2020},
				54	pages = {7003--7008},
				55	}
				56
				57	@inproceedings{kirk_ice_2017,
				58	title = {From {ICE} to {ICC}: {The} new {International} {Comparable} {Corpus}},
				59	url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-62490},
				60	booktitle = {Proceedings of the {Workshop} on {Challenges} in the {Management} of {Large} {Corpora} and {Big} {Data} and {Natural} {Language} {Processing} ({CMLC}-5+{BigNLP}) 2017},
				61	publisher = {IDS},
				62	author = {Kirk, John and Čermáková, Anna},
Marc Kupietz	bcde0b6	2023-06-14 14:22:35 +0200	[diff] [blame]	63	year = {2017},
				64	pages = {7 -- 12},
				65	}
				66
				67	@article{kupietz_recent_2020,
				68	series = {Corpora and {Language} in {Use}},
				69	title = {Recent developments in the {European} {Reference} {Corpus} {EuReCo}},
				70	journal = {Translating and Comparing Languages: Corpus-based Insights. Selected Proceedings of the Fifth Using Corpora in Contrastive and Translation Studies Conference. Louvain-la-Neuve: Presses universitaires de Louvain},
				71	author = {Kupietz, Marc and Diewald, Nils and Trawiński, Beata and Cosma, Ruxandra and Cristea, Dan and Tufiş, Dan and Váradi, Tamás and Wöllstein, Angelika},
				72	year = {2020},
				73	pages = {257--273},
				74	}
				75
				76	@inproceedings{nivre_universal_2020,
				77	address = {Marseille, France},
				78	title = {Universal {Dependencies} v2: {An} {Evergrowing} {Multilingual} {Treebank} {Collection}},
				79	isbn = {979-10-95546-34-4},
				80	url = {https://www.aclweb.org/anthology/2020.lrec-1.497},
				81	language = {English},
				82	booktitle = {Proceedings of the 12th {Language} {Resources} and {Evaluation} {Conference}},
				83	publisher = {European Language Resources Association},
				84	author = {Nivre, Joakim and de Marneffe, Marie-Catherine and Ginter, Filip and Hajič, Jan and Manning, Christopher D. and Pyysalo, Sampo and Schuster, Sebastian and Tyers, Francis and Zeman, Daniel},
				85	month = may,
				86	year = {2020},
				87	pages = {4034--4043},
				88	}
				89
				90	@article{cermakova_international_2021,
				91	title = {The {International} {Comparable} {Corpus}: {Challenges} in building multilingual spoken and written comparable corpora},
				92	volume = {9},
				93	issn = {2243-4712},
				94	url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-105084},
				95	doi = {10.32714/ricl.09.01.06},
				96	abstract = {This paper reports on the efforts of twelve national teams in building the International Comparable Corpus (ICC; https://korpus.cz/icc) that will contain highly comparable datasets of spoken, written and electronic registers. The languages currently covered are Czech, Finnish, French, German, Irish, Italian, Norwegian, Polish, Slovak, Swedish and, more recently, Chinese, as well as English, which is considered to be the pivot language. The goal of the project is to provide much-needed data for contrastive corpus-based linguistics. The ICC corpus is committed to the idea of re-using existing multilingual resources as much as possible and the design is modelled, with various adjustments, on the International Corpus of English (ICE). As such, ICC will contain approximately the same balance of forty percent of written language and 60 percent of spoken language distributed across 27 different text types and contexts. A number of issues encountered by the project teams are discussed, ranging from copyright and data sustainability to technical advances in data distribution.},
				97	language = {en},
				98	number = {1},
				99	journal = {Research in Corpus Linguistics: Special issue "Challenges of combining structured and unstructured data in corpus development"},
				100	author = {Čermáková, Anna and Jantunen, Jarmo and Jauhiainen, Tommi and Kirk, John and Křen, Michal and Kupietz, Marc and Uí Dhonnchadha, Elaine},
				101	editor = {Säily, Tanja and Tyrkkö, Jukka},
				102	year = {2021},
				103	note = {Place: Murcia
				104	Publisher: Spanish Association for Corpus Linguistics},
				105	pages = {89 -- 103},
				106	}
				107
				108	@incollection{kupietz_building_2022,
				109	address = {Berlin},
				110	title = {Building paths to corpus data: {A} multi-level least effort and maximum return approach},
				111	url = {https://doi.org/10.1515/9783110767377-007},
				112	booktitle = {{CLARIN}. {The} {Infrastructure} for {Language} {Resources}.},
				113	publisher = {deGruyter},
				114	author = {Kupietz, Marc and Diewald, Nils and Margaretha, Eliza},
				115	editor = {Fišer, Darja and Witt, Andreas},
				116	year = {2022},
				117	note = {Section: number x},
				118	}
				119
				120	@article{cermakova_be_nodate,
				121	title = {‘{Be}’ verbs in a contrastive perspective: {The} case of být, be and være.},
				122	journal = {Nordic Journal of English Studies},
				123	author = {Čermáková, Anna and Ebeling, Jarle and Ebeling Oksefjell, Signe},
				124	}
				125
				126	@incollection{kupietz_neue_2022,
				127	address = {Bern},
				128	series = {Jahrbuch für {Internationale} {Germanistik} - {Beihefte} - 6},
				129	title = {Neue {Perspektiven} für kontrastive {Korpuslinguistik}: {Das} {Europäische} {Referenzkorpus} {EuReCo}},
				130	isbn = {978-3-0343-3660-4},
				131	abstract = {Dieser Beitrag beschreibt die Motivation und Ziele hinter der Initiative Europäisches Referenzkorpus EuReCo. Ausgehend von den Desiderata, die sich aufgrund der Defizite verfügbarer Forschungsdaten wie monolinguale Korpora, Parallelkorpora und Vergleichskorpora für den Sprachvergleich ergeben, werden die bisherigen und die laufenden Arbeiten im Rahmen von EuReCo präsentiert und anhand vergleichender deutsch-rumänischer Kookkurrenzanalysen neue Perspektiven für kontrastive Korpuslinguistik, die die EuReCo-Initiative öffnet, skizziert.},
				132	booktitle = {Wege der {Germanistik} in transkultureller {Perspektive}. {Akten} des {XIV}. {Kongresses} der {Internationalen} {Vereinigung} für {Germanistik} ({IVG}) ({Bd}. 6)},
				133	publisher = {Peter Lang},
				134	author = {Kupietz, Marc and Trawiński, Beata},
				135	editor = {Auteri, Laura and Barrale, Natascia and Di Bella, Arianna and Hoffmann, Sabine},
				136	year = {2022},
				137	keywords = {Kontrastive Linguistik, Korpus, Deutsch, Funktionsverbgefüge, Kookkurrenzanalyse, Korpuslinguistik, Rumänisch, Vergleichbare Korpora},
				138	pages = {417--439},
				139	}
				140
				141	@incollection{hardy_multi-dimensional_2015,
				142	address = {London},
				143	title = {Multi-{Dimensional} {Analysis} of {Academic} {Discourse}},
				144	isbn = {978-1-137-43173-8},
				145	url = {https://doi.org/10.1057/9781137431738_8},
				146	abstract = {This chapter provides an overview of multi-dimensional (MD) analysis and important findings in this area of research. This approach to the study of language variation and discourse communities is then exemplified through a case study of an MD analysis of student writing from the Michigan Corpus of Upper-level Student Papers (MICUSP), which includes four different levels of discourse community members: final-year undergraduate students, and first-, second-, and third-year graduate students. Although variation of MICUSP has been investigated according to discipline (Hardy and Römer, 2013) and paper type (Hardy and Friginal, 2014), it has not been investigated according to writer level.},
				147	booktitle = {Corpora and {Discourse} {Studies}: {Integrating} {Discourse} and {Corpora}},
				148	publisher = {Palgrave Macmillan UK},
				149	author = {Hardy, Jack A.},
				150	editor = {Baker, Paul and McEnery, Tony},
				151	year = {2015},
				152	doi = {10.1057/9781137431738_8},
				153	pages = {155--174},
				154	}
				155
				156	@article{biber_spoken_1986,
				157	title = {Spoken and {Written} {Textual} {Dimensions} in {English}: {Resolving} the {Contradictory} {Findings}},
				158	volume = {62},
				159	issn = {00978507, 15350665},
				160	url = {http://www.jstor.org/stable/414678},
				161	doi = {10.2307/414678},
				162	abstract = {[Although similarities and differences between speech and writing have often been studied, contradictory claims concerning the linguistic relationship between the two modes are still common. These contradictions can arise from basing global conclusions on restricted methodologies-such as assigning undue weight to individual linguistic features, or to choice of particular text samples and text types. The present study uses a 'multi-feature/multi-dimension' approach, which includes a broad range of linguistic features and text types in a single quantitative analysis, to provide a global description of similarities and differences among spoken/written text types in English. The distribution of 41 linguistic features in 545 text samples of approximately 2000 words each is subjected to factor analysis (a multivariate statistical technique). Three underlying textual dimensions are identified: Interactive vs. Edited Text, Abstract vs. Situated Content, and Reported vs. Immediate Style. To demonstrate the value of the multi-feature/multi-dimension approach, the specific findings of earlier studies are reconciled within the model proposed here.]},
				163	number = {2},
				164	urldate = {2023-04-30},
				165	journal = {Language},
				166	author = {Biber, Douglas},
				167	year = {1986},
				168	note = {Publisher: Linguistic Society of America},
				169	pages = {384--414},
				170	file = {Spoken and Written Textual Dimensions in English\: Resolving the Contradictory Findings:/home/kupietz/Zotero/storage/938FXDXC/biber1986.pdf.pdf:application/pdf},
				171	}
				172
				173	@inproceedings{straka_udpipe_2018,
				174	address = {Brussels, Belgium},
				175	title = {{UDPipe} 2.0 {Prototype} at {CoNLL} 2018 {UD} {Shared} {Task}},
				176	url = {https://www.aclweb.org/anthology/K18-2020},
				177	doi = {10.18653/v1/K18-2020},
				178	booktitle = {Proceedings of the {CoNLL} 2018 {Shared} {Task}: {Multilingual} {Parsing} from {Raw} {Text} to {Universal} {Dependencies}},
				179	publisher = {Association for Computational Linguistics},
				180	author = {Straka, Milan},
				181	month = oct,
				182	year = {2018},
				183	pages = {197--207},
				184	}
				185
				186	@inproceedings{Kupietz:Diewald:Hanl:Margaretha:2016,
				187	address = {Mannheim, Germany},
				188	series = {Proceedings of the {Methodenmesse} im {Rahmen} der {Jahrestagung} des {Instituts} für {Deutsche} {Sprache}},
				189	title = {Möglichkeiten der {Erforschung} grammatischer {Variation} mithilfe von {KorAP}, der neuen {Korpusanalyseplattform} des {IDS}},
				190	copyright = {All rights reserved},
				191	booktitle = {Grammatische {Variation}. {Empirische} {Zugänge} und theoretische {Modellierung}},
				192	publisher = {De Gruyter},
				193	author = {Kupietz, Marc and Diewald, Nils and Hanl, Michael and Margaretha, Eliza},
				194	year = {2016},
				195	pages = {319--329},
				196	file = {Kupietz et al. - 2016 - Möglichkeiten der Erforschung grammatischer Variat.pdf:/home/kupietz/Zotero/storage/8K4AI4T9/Kupietz et al. - 2016 - Möglichkeiten der Erforschung grammatischer Variat.pdf:application/pdf},
				197	}
				198
				199	@inproceedings{Banski:Fischer:Frick:Ketzan:Kupietz:Schnober:Schonefeld:Witt:2012,
				200	address = {Istanbul, Turkey},
				201	title = {The {New} {IDS} {Corpus} {Analysis} {Platform}: {Challenges} and {Prospects}},
				202	shorttitle = {The {New} {IDS} {Corpus} {Analysis} {Platform}},
				203	url = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/789_Paper.pdf},
				204	abstract = {The present article describes the first stage of the KorAP project, launched recently at the Institut für Deutsche Sprache (IDS) in Mannheim, Germany. The aim of this project is to develop an innovative corpus analysis platform to tackle the increasing demands of modern linguistic research. The platform will facilitate new linguistic findings by making it possible to manage and analyse primary data and annotations in the petabyte range, while at the same time allowing an undistorted view of the primary linguistic data, and thus fully satisfying the demands of a scientific tool. An additional important aim of the project is to make corpus data as openly accessible as possible in light of unavoidable legal restrictions, for instance through support for distributed virtual corpora, user-defined annotations and adaptable user interfaces, as well as interfaces and sandboxes for user-supplied analysis applications. We discuss our motivation for undertaking this endeavour and the challenges that face it. Next, we outline our software implementation plan and describe development to-date.},
				205	urldate = {2022-04-12},
				206	booktitle = {Proceedings of the {Eighth} {International} {Conference} on {Language} {Resources} and {Evaluation} ({LREC}'12)},
				207	publisher = {European Language Resources Association (ELRA)},
				208	author = {Bański, Piotr and Fischer, Peter M. and Frick, Elena and Ketzan, Erik and Kupietz, Marc and Schnober, Carsten and Schonefeld, Oliver and Witt, Andreas},
				209	month = may,
				210	year = {2012},
				211	pages = {2905--2911},
				212	file = {Full Text PDF:/home/kupietz/Zotero/storage/IC9U5T6F/Bański et al. - 2012 - The New IDS Corpus Analysis Platform Challenges a.pdf:application/pdf},
				213	}
Marc Kupietz	f1dd910	2023-06-26 20:36:08 +0200	[diff] [blame]	214
				215	@inproceedings{kupietz_rkorapclient_2020,
Marc Kupietz	333f99e	2023-06-29 16:00:51 +0200	[diff] [blame^]	216	address = {Marseille / Paris},
Marc Kupietz	f1dd910	2023-06-26 20:36:08 +0200	[diff] [blame]	217	title = {{RKorAPClient}: {An} {R} {Package} for {Accessing} the {German} {Reference} {Corpus} {DeReKo} via {KorAP}},
				218	isbn = {979-10-95546-34-4},
Marc Kupietz	333f99e	2023-06-29 16:00:51 +0200	[diff] [blame^]	219	url = {https://aclanthology.org/2020.lrec-1.867/},
Marc Kupietz	f1dd910	2023-06-26 20:36:08 +0200	[diff] [blame]	220	language = {English},
				221	booktitle = {Proceedings of the 12th {Language} {Resources} and {Evaluation} {Conference}},
Marc Kupietz	333f99e	2023-06-29 16:00:51 +0200	[diff] [blame^]	222	publisher = {ELRA},
Marc Kupietz	f1dd910	2023-06-26 20:36:08 +0200	[diff] [blame]	223	author = {Kupietz, Marc and Diewald, Nils and Margaretha, Eliza},
				224	month = may,
				225	year = {2020},
				226	pages = {7015--7021},
				227	}
				228
Marc Kupietz	48a4134	2023-06-28 18:16:54 +0200	[diff] [blame]	229	@article{MargarethaLuengen2014,
				230	author = {Eliza Margaretha and Harald Lüngen},
				231	title = {Building linguistic corpora from Wikipedia articles and discussions},
				232	journal = {Journal of Language Technology and Computational Linguistics. Special issue on building and annotating corpora of computer-mediated communication. Issues and challenges at the interface between computational and corpus linguistics},
				233	volume = {29},
				234	number = {2},
				235	editor = {Michael Beißwenger and Angelika Storrer and Nelleke Oostdijk and Henk van den Heuvel},
				236	url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-33306},
				237	pages = {59 -- 82},
				238	year = {2014},
				239	abstract = {Wikipedia is a valuable resource, useful as a lingustic corpus or a dataset for many kinds of research. We built corpora from Wikipedia articles and talk pages in the I5 format, a TEI customisation used in the German Reference Corpus (Deutsches Referenzkorpus - DeReKo). Our approach is a two-stage conversion combining parsing using the Sweble parser, and transformation using XSLT stylesheets. The conversion approach is able to successfully generate rich and valid corpora regardless of languages. We also introduce a method to segment user contributions in talk pages into postings.},
				240	language = {de}
				241	}