blob: 07b624184e91c2988973bf8729a523a051206c55 [file] [log] [blame]
margaretha2c50c732018-10-17 18:48:52 +02001[{
margarethaf0c53b82019-04-05 10:03:48 +02002 "id": "WPD17",
margarethaa25e9622025-04-14 10:23:35 +02003 "pid": "http://hdl.handle.net/10932/00-03B6-558F-4E10-6201-1",
margarethaf0c53b82019-04-05 10:03:48 +02004 "de_title" : "Deutsche Wikipedia Artikel 2017",
5 "en_title" : "German Wikipedia Articles 2017",
6 "en_description" : "A collection of articles of German Wikipedia from July 1st, 2017.",
margaretha2c50c732018-10-17 18:48:52 +02007 "layers": [
margaretha0c186222018-10-22 17:48:33 +02008 "corenlp/p",
margaretha0c186222018-10-22 17:48:33 +02009 "marmot/m",
margarethaf0c53b82019-04-05 10:03:48 +020010 "marmot/p",
11 "opennlp/p",
12 "tt/l",
13 "tt/p"
margarethab4472102024-11-13 13:15:36 +010014 ],
15 "corpus_query":"corpusSigle=WPD17",
margaretha663752e2025-05-23 10:35:04 +020016 "required_access" : "FREE",
margarethaa25e9622025-04-14 10:23:35 +020017 "institution" : "IDS Mannheim"
margarethaf0c53b82019-04-05 10:03:48 +020018},
19{
20 "id": "WDD17",
margarethaa25e9622025-04-14 10:23:35 +020021 "pid" : "http://hdl.handle.net/10932/00-03B6-558F-5EA0-6301-B",
margarethaf0c53b82019-04-05 10:03:48 +020022 "de_title" : "Deutsche Wikipedia-Diskussionskorpus 2017",
23 "en_title" : "German Wikipedia talk corpus 2017",
24 "en_description" : "A collection of talk pages of German Wikipedia from July 1st, 2017.",
25 "layers": [
26 "corenlp/p",
27 "marmot/m",
28 "marmot/p",
29 "opennlp/p",
30 "tt/l",
31 "tt/p"
margarethab4472102024-11-13 13:15:36 +010032 ],
33 "corpus_query":"corpusSigle=WDD17",
margaretha663752e2025-05-23 10:35:04 +020034 "required_access" : "FREE",
margarethaa25e9622025-04-14 10:23:35 +020035 "institution" : "IDS Mannheim"
margarethaf0c53b82019-04-05 10:03:48 +020036},
37{
38 "id": "WUD17",
margarethaa25e9622025-04-14 10:23:35 +020039 "pid": "http://hdl.handle.net/10932/00-03B6-558F-6EF0-6401-F",
margarethaf0c53b82019-04-05 10:03:48 +020040 "de_title" : "Deutsche Wikipedia-Benutzerdiskussionskorpus 2017",
margarethaac4501a2019-12-17 11:27:30 +010041 "en_title" : "German Wikipedia user talk corpus 2017",
margarethaf0c53b82019-04-05 10:03:48 +020042 "en_description" : "A collection of user talk pages of German Wikipedia from July 1st, 2017.",
43 "layers": [
44 "corenlp/p",
45 "marmot/m",
46 "marmot/p",
47 "opennlp/p",
48 "tt/l",
49 "tt/p"
margarethab4472102024-11-13 13:15:36 +010050 ],
51 "corpus_query":"corpusSigle=WUD17",
margarethaa25e9622025-04-14 10:23:35 +020052 "institution" : "IDS Mannheim"
margaretha2dee9322025-05-27 10:21:07 +020053},
54
55 {
56 "id": "Romane",
57 "pid": "Romane",
58 "de_title": "Romane",
59 "en_title": "Roman",
60 "layers": [
61 "corenlp/p",
62 "marmot/m",
63 "marmot/p",
64 "opennlp/p",
65 "tt/l",
66 "tt/p"
67 ],
68 "corpus_query": "textType = /.*[Rr]oman/",
69 "institution": "IDS Mannheim"
70 },
71 {
72 "id": "Literatur",
73 "pid": "Literatur",
74 "de_title": "Literatur",
75 "en_title": "Literature",
76 "layers": [
77 "corenlp/p",
78 "marmot/m",
79 "marmot/p",
80 "opennlp/p",
81 "tt/l",
82 "tt/p"
83 ],
84 "corpus_query": "textType = /(.*[Rr]oman|[Bb]iographie|[Dd]rama|[Ss]schauspiel)/",
85 "institution": "IDS Mannheim"
86 },
87 {
88 "id": "Usenet-News",
89 "pid": "Usenet-News",
90 "de_title": "Usenet-News",
91 "en_title": "Usenet-News",
92 "layers": [
93 "corenlp/p",
94 "marmot/m",
95 "marmot/p",
96 "opennlp/p",
97 "tt/l",
98 "tt/p"
99 ],
100 "corpus_query": "corpusSigle = /NG[A-Z]+/",
101 "institution": "IDS Mannheim"
102 },
103 {
104 "id": "Wikipedia-Diskussionsseiten",
105 "pid": "Wikipedia-Diskussionsseiten",
106 "de_title": "Wikipedia-Diskussionsseiten",
107 "en_title": "Wikipedia Talk Pages",
108 "layers": [
109 "corenlp/p",
110 "marmot/m",
111 "marmot/p",
112 "opennlp/p",
113 "tt/l",
114 "tt/p"
115 ],
116 "corpus_query": "corpusSigle = /W[UD]D[0-9]+/",
117 "institution": "IDS Mannheim"
118 },
119 {
120 "id": "Zeitungen",
121 "pid": "Zeitungen",
122 "de_title": "Zeitungen",
123 "en_title": "Newletters",
124 "layers": [
125 "corenlp/p",
126 "marmot/m",
127 "marmot/p",
128 "opennlp/p",
129 "tt/l",
130 "tt/p"
131 ],
132 "corpus_query": "textType = /[^:]*[Zz]eitung.*/",
133 "institution": "IDS Mannheim"
134 },
135 {
136 "id": "Zeitschriften",
137 "pid": "Zeitschriften",
138 "de_title": "Zeitschriften",
139 "en_title": "Magazines",
140 "layers": [
141 "corenlp/p",
142 "marmot/m",
143 "marmot/p",
144 "opennlp/p",
145 "tt/l",
146 "tt/p"
147 ],
148 "corpus_query": "textType = /(Zeitschrift|Magazin).*/",
149 "institution": "IDS Mannheim"
150 },
151 {
152 "id": "Zeitungen_und_Zeitschriften",
153 "pid": "Zeitungen_und_Zeitschriften",
154 "de_title": "Zeitungen und Zeitschriften",
155 "en_title": "Newsletters and Magazines",
156 "layers": [
157 "corenlp/p",
158 "marmot/m",
159 "marmot/p",
160 "opennlp/p",
161 "tt/l",
162 "tt/p"
163 ],
164 "corpus_query": "textType = /([^:]*[Zz]eitung|Zeitschrift|Magazin).*/",
165 "institution": "IDS Mannheim"
166 },
167 {
168 "id": "Agenturmeldungen",
169 "pid": "Agenturmeldungen",
170 "de_title": "Agenturmeldungen",
171 "en_title": "Agenturmeldungen",
172 "layers": [
173 "corenlp/p",
174 "marmot/m",
175 "marmot/p",
176 "opennlp/p",
177 "tt/l",
178 "tt/p"
179 ],
180 "corpus_query": "textType = /Agenturmeldung.*/",
181 "institution": "IDS Mannheim"
182 },
183 {
184 "id": "Plenarprotokolle",
185 "pid": "Plenarprotokolle",
186 "de_title": "Plenarprotokolle",
187 "en_title": "Plenarprotokolle",
188 "layers": [
189 "corenlp/p",
190 "marmot/m",
191 "marmot/p",
192 "opennlp/p",
193 "tt/l",
194 "tt/p"
195 ],
196 "corpus_query": "textTypeRef = /Plenarprotokoll/",
197 "institution": "IDS Mannheim"
198 },
199 {
200 "id": "Publikumszeitschriften",
201 "pid": "Publikumszeitschriften",
202 "de_title": "Publikumszeitschriften",
203 "en_title": "Publikumszeitschriften",
204 "layers": [
205 "corenlp/p",
206 "marmot/m",
207 "marmot/p",
208 "opennlp/p",
209 "tt/l",
210 "tt/p"
211 ],
212 "corpus_query": "textType = \"Zeitschrift: Publikumszeitschrift\"",
213 "institution": "IDS Mannheim"
214 },
215 {
216 "id": "Fachzeitschriften",
217 "pid": "Fachzeitschriften",
218 "de_title": "Fachzeitschriften",
219 "en_title": "Fachzeitschriften",
220 "layers": [
221 "corenlp/p",
222 "marmot/m",
223 "marmot/p",
224 "opennlp/p",
225 "tt/l",
226 "tt/p"
227 ],
228 "corpus_query": "textType = \"Zeitschrift: Fachzeitschrift\"",
229 "institution": "IDS Mannheim"
230 },
231 {
232 "id": "IT-Zeitschriften",
233 "pid": "IT-Zeitschriften",
234 "de_title": "IT-Zeitschriften",
235 "en_title": "IT-Zeitschriften",
236 "layers": [
237 "corenlp/p",
238 "marmot/m",
239 "marmot/p",
240 "opennlp/p",
241 "tt/l",
242 "tt/p"
243 ],
244 "corpus_query": "textType = \"Zeitschrift: Fachzeitschrift: IT-Magazin\"",
245 "institution": "IDS Mannheim"
246 },
247 {
248 "id": "Wirtschaftsressorts",
249 "pid": "Wirtschaftsressorts",
250 "de_title": "Wirtschaftsressorts",
251 "en_title": "Wirtschaftsressorts",
252 "layers": [
253 "corenlp/p",
254 "marmot/m",
255 "marmot/p",
256 "opennlp/p",
257 "tt/l",
258 "tt/p"
259 ],
260 "corpus_query": "textDomain = \"Wirtschaft\"",
261 "institution": "IDS Mannheim"
262 },
263 {
264 "id": "Kulturressorts",
265 "pid": "Kulturressorts",
266 "de_title": "Kulturressorts",
267 "en_title": "Kulturressorts",
268 "layers": [
269 "corenlp/p",
270 "marmot/m",
271 "marmot/p",
272 "opennlp/p",
273 "tt/l",
274 "tt/p"
275 ],
276 "corpus_query": "textDomain = \"Kultur\"",
277 "institution": "IDS Mannheim"
278 },
279 {
280 "id": "Sportressorts",
281 "pid": "Sportressorts",
282 "de_title": "Sportressorts",
283 "en_title": "Sportressorts",
284 "layers": [
285 "corenlp/p",
286 "marmot/m",
287 "marmot/p",
288 "opennlp/p",
289 "tt/l",
290 "tt/p"
291 ],
292 "corpus_query": "textDomain = \"Sport\"",
293 "institution": "IDS Mannheim"
294 },
295 {
296 "id": "Politikressorts",
297 "pid": "Politikressorts",
298 "de_title": "Politikressorts",
299 "en_title": "Politikressorts",
300 "layers": [
301 "corenlp/p",
302 "marmot/m",
303 "marmot/p",
304 "opennlp/p",
305 "tt/l",
306 "tt/p"
307 ],
308 "corpus_query": "textDomain = /Politik.*/",
309 "institution": "IDS Mannheim"
310 },
311 {
312 "id": "Stuttgart-21-Dialoge",
313 "pid": "Stuttgart-21-Dialoge",
314 "de_title": "Stuttgart-21-Dialoge (Transkripte aus FOLK)",
315 "en_title": "Stuttgart-21-Dialoge (Transcripts from FOLK)",
316 "layers": [
317 "corenlp/p",
318 "marmot/m",
319 "marmot/p",
320 "opennlp/p",
321 "tt/l",
322 "tt/p"
323 ],
324 "corpus_query": "corpusSigle=FOLK",
325 "institution": "IDS Mannheim"
326 },
327 {
328 "id": "Kinder-_und_Jugendliteratur",
329 "pid": "Kinder-_und_Jugendliteratur",
330 "de_title": "Kinder- und Jugendliteratur",
331 "en_title": "Kinder- und Jugendliteratur",
332 "layers": [
333 "corenlp/p",
334 "marmot/m",
335 "marmot/p",
336 "opennlp/p",
337 "tt/l",
338 "tt/p"
339 ],
340 "corpus_query": "corpusSigle=KJL",
341 "institution": "IDS Mannheim"
342 },
343 {
344 "id": "Fußballlinguistik-Korpora",
345 "pid": "Fußballlinguistik-Korpora",
346 "de_title": "Fußballlinguistik-Korpora",
347 "en_title": "Fußballlinguistik-Korpora",
348 "layers": [
349 "corenlp/p",
350 "marmot/m",
351 "marmot/p",
352 "opennlp/p",
353 "tt/l",
354 "tt/p"
355 ],
356 "corpus_query": "title=Fußballlinguistik-Korpora",
357 "institution": "IDS Mannheim"
358 },
359 {
360 "id": "Orthografisches_Kernkorpus_(OKK)",
361 "pid": "Orthografisches_Kernkorpus_(OKK)",
362 "de_title": "Orthografisches Kernkorpus (OKK)",
363 "en_title": "Orthografisches Kernkorpus (OKK)",
364 "layers": [
365 "corenlp/p",
366 "marmot/m",
367 "marmot/p",
368 "opennlp/p",
369 "tt/l",
370 "tt/p"
371 ],
372 "corpus_query": "referTo ratskorpus",
373 "institution": "IDS Mannheim"
374 },
375 {
376 "id": "Gingko-Korpus",
377 "pid": "Gingko-Korpus",
378 "de_title": "Gingko-Korpus",
379 "en_title": "Gingko-Korpus",
380 "layers": [
381 "corenlp/p",
382 "marmot/m",
383 "marmot/p",
384 "opennlp/p",
385 "tt/l",
386 "tt/p"
387 ],
388 "corpus_query": "corpusSigle=/[AM]TZ.*/",
389 "institution": "IDS Mannheim"
390 },
391 {
392 "id": "DE_Presse",
393 "pid": "DE_Presse",
394 "de_title": "DE Presse",
395 "en_title": "DE Presse",
396 "layers": [
397 "corenlp/p",
398 "marmot/m",
399 "marmot/p",
400 "opennlp/p",
401 "tt/l",
402 "tt/p"
403 ],
404 "corpus_query": "pubPlaceKey=DE and textType = /([^:]*[Zz]eitung|Zeitschrift|Magazin).*/",
405 "institution": "IDS Mannheim"
406 },
407 {
408 "id": "AT_Presse",
409 "pid": "AT_Presse",
410 "de_title": "AT Presse",
411 "en_title": "AT Presse",
412 "layers": [
413 "corenlp/p",
414 "marmot/m",
415 "marmot/p",
416 "opennlp/p",
417 "tt/l",
418 "tt/p"
419 ],
420 "corpus_query": "pubPlaceKey=AT and textType = /([^:]*[Zz]eitung|Zeitschrift|Magazin).*/",
421 "institution": "IDS Mannheim"
422 },
423 {
424 "id": "CH_Presse",
425 "pid": "CH_Presse",
426 "de_title": "CH Presse",
427 "en_title": "CH Presse",
428 "layers": [
429 "corenlp/p",
430 "marmot/m",
431 "marmot/p",
432 "opennlp/p",
433 "tt/l",
434 "tt/p"
435 ],
436 "corpus_query": "pubPlaceKey=CH and textType = /([^:]*[Zz]eitung|Zeitschrift|Magazin).*/",
437 "institution": "IDS Mannheim"
438 },
439 {
440 "id": "Fachzeitschriften_Ingenieur_und_Technik",
441 "pid": "Fachzeitschriften_Ingenieur_und_Technik",
442 "de_title": "Fachzeitschriften Ingenieur und Technik",
443 "en_title": "Fachzeitschriften Ingenieur und Technik",
444 "layers": [
445 "corenlp/p",
446 "marmot/m",
447 "marmot/p",
448 "opennlp/p",
449 "tt/l",
450 "tt/p"
451 ],
452 "corpus_query": "corpusSigle=/(ATZ|MTZ|KEM|CAV|MAV|ELI|TER|CHM|CHT|DNV|AUI|IEE|KE|EID|MET|EPP|TUE|ET|PRD|WWT|AUP|AUT|QE|VKU|AUE|EMA|FK|AIT|CHO|EMT).*/",
453 "institution": "IDS Mannheim"
454 },
455 {
456 "id": "Belletristik/Trivialliteratur",
457 "pid": "Belletristik/Trivialliteratur",
458 "de_title": "Belletristik/Trivialliteratur",
459 "en_title": "Belletristik/Trivialliteratur",
460 "layers": [
461 "corenlp/p",
462 "marmot/m",
463 "marmot/p",
464 "opennlp/p",
465 "tt/l",
466 "tt/p"
467 ],
468 "corpus_query": "corpusSigle=/(BIH|BIO|DIV|GOE|GR1|GRI|HES|KJL|LES|THM|WAM)/ or docSigle=/MK1\\/[LMTW].*/ or docSigle=/MK2\\/TRI/",
469 "institution": "IDS Mannheim"
470 }
margarethaf0c53b82019-04-05 10:03:48 +0200471]