blob: 5159889d011b3a77c67cca14e81207a713d604bb [file] [log] [blame]
Nils Diewald8e323ee2014-04-23 17:28:14 +00001#!/usr/bin/env perl
2# source ~/perl5/perlbrew/etc/bashrc
3# perlbrew switch perl-blead@korap
4use strict;
5use warnings;
6use utf8;
7use Test::More;
8use Benchmark ':hireswallclock';
9use lib 'lib', '../lib';
10
11use File::Basename 'dirname';
12use File::Spec::Functions 'catdir';
13
Nils Diewaldfeccbb12015-06-18 20:06:45 +000014
15# TODO: Make 'text' -> 'primaryText'
16
Nils Diewald8e323ee2014-04-23 17:28:14 +000017use_ok('KorAP::Document');
18
19# WPD/00001
20my $path = catdir(dirname(__FILE__), 'WPD/00001');
21ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
Nils Diewaldfeccbb12015-06-18 20:06:45 +000022like($doc->path, qr!$path/!, 'Path');
Nils Diewald8e323ee2014-04-23 17:28:14 +000023
24ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
Nils Diewaldfeccbb12015-06-18 20:06:45 +000025like($doc->path, qr!$path/$!, 'Path');
Nils Diewald8e323ee2014-04-23 17:28:14 +000026
27ok($doc->parse, 'Parse document');
28
29# Metdata
Nils Diewaldfeccbb12015-06-18 20:06:45 +000030is($doc->text_sigle, 'WPD_AAA.00001', 'ID');
31
Nils Diewald8e323ee2014-04-23 17:28:14 +000032is($doc->title, 'A', 'title');
33ok(!$doc->sub_title, 'subTitle');
Nils Diewald840c9242014-10-28 19:51:26 +000034is($doc->corpus_sigle, 'WPD', 'corpusID');
Nils Diewald8e323ee2014-04-23 17:28:14 +000035is($doc->pub_date, '20050328', 'pubDate');
36is($doc->pub_place, 'URL:http://de.wikipedia.org', 'pubPlace');
37is($doc->text_class->[0], 'freizeit-unterhaltung', 'TextClass');
38is($doc->text_class->[1], 'reisen', 'TextClass');
39is($doc->text_class->[2], 'wissenschaft', 'TextClass');
40is($doc->text_class->[3], 'populaerwissenschaft', 'TextClass');
41ok(!$doc->text_class->[4], 'TextClass');
Nils Diewaldfeccbb12015-06-18 20:06:45 +000042is($doc->author, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
43
44#is($doc->author->[0], 'Ruru', 'author');
45#is($doc->author->[1], 'Jens.Ol', 'author');
46#is($doc->author->[2], 'Aglarech', 'author');
47#ok(!$doc->author->[3], 'author');
Nils Diewald8e323ee2014-04-23 17:28:14 +000048
49# Additional information
Nils Diewaldfeccbb12015-06-18 20:06:45 +000050ok(!$doc->editor, 'Editor');
Nils Diewald8e323ee2014-04-23 17:28:14 +000051is($doc->publisher, 'Wikipedia', 'Publisher');
52is($doc->creation_date, '20050000', 'Creation date');
Nils Diewald8e323ee2014-04-23 17:28:14 +000053ok(!$doc->text_type, 'No text_type');
Nils Diewaldfeccbb12015-06-18 20:06:45 +000054ok(!$doc->text_type_art, 'no text_type art');
55ok(!$doc->text_type_ref, 'no text_type ref');
56ok(!$doc->text_domain, 'no text_domain');
57ok(!$doc->text_column, 'no text_column');
58ok(!$doc->keywords_string, 'no keywords');
59is($doc->text_class_string, 'freizeit-unterhaltung reisen wissenschaft populaerwissenschaft', 'no text classes');
60ok(!$doc->language, 'no text_column');
61
62#is($doc->coll_title, 'Wikipedia', 'Collection title');
63#is($doc->coll_sub_title, 'Die freie Enzyklopädie', 'Collection subtitle');
64#is($doc->coll_editor, 'wikipedia.org', 'Collection editor');
65#ok(!$doc->coll_author, 'Collection author');
Nils Diewald8e323ee2014-04-23 17:28:14 +000066
67# BRZ13/00001
68$path = catdir(dirname(__FILE__), 'BRZ13/00001');
69ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
70
71ok($doc->parse, 'Parse document');
72is($doc->title, 'Sexueller Missbrauch –„Das schreiende Kind steckt noch tief in mir“', 'title');
73ok(!$doc->sub_title, 'subTitle');
Nils Diewald840c9242014-10-28 19:51:26 +000074is($doc->text_sigle, 'BRZ13_APR.00001', 'ID');
75is($doc->corpus_sigle, 'BRZ13', 'corpusID');
Nils Diewaldfeccbb12015-06-18 20:06:45 +000076
77
Nils Diewald8e323ee2014-04-23 17:28:14 +000078is($doc->pub_date, '20130402', 'pubDate');
79is($doc->pub_place, 'Braunschweig', 'pubPlace');
Nils Diewaldfeccbb12015-06-18 20:06:45 +000080
Nils Diewald8e323ee2014-04-23 17:28:14 +000081is($doc->text_class->[0], 'staat-gesellschaft', 'TextClass');
82is($doc->text_class->[1], 'familie-geschlecht', 'TextClass');
83ok(!$doc->text_class->[2], 'TextClass');
Nils Diewaldfeccbb12015-06-18 20:06:45 +000084ok(!$doc->author, 'author');
Nils Diewald8e323ee2014-04-23 17:28:14 +000085
86# Additional information
87ok(!$doc->editor, 'Editor');
88is($doc->publisher, 'Braunschweiger Zeitungsverlag, Druckhaus Albert Limbach GmbH & Co. KG', 'Publisher');
89is($doc->creation_date, '20130402', 'Creation date');
Nils Diewaldfeccbb12015-06-18 20:06:45 +000090#is($doc->coll_title, 'Braunschweiger Zeitung', 'Collection title');
91#ok(!$doc->coll_sub_title, 'Collection subtitle');
92#ok(!$doc->coll_editor, 'Collection editor');
93#ok(!$doc->coll_author, 'Collection author');
Nils Diewald8e323ee2014-04-23 17:28:14 +000094is($doc->text_type, 'Zeitung: Tageszeitung', 'text_type');
95ok(!$doc->text_type_art, 'text_type art');
96
97# A01/13047
98$path = catdir(dirname(__FILE__), 'A01/13047');
99ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
100
101ok($doc->parse, 'Parse document');
102is($doc->title, 'Fischer und Kolp im Sonnenhügel', 'title');
103ok(!$doc->sub_title, 'subTitle');
Nils Diewald840c9242014-10-28 19:51:26 +0000104is($doc->text_sigle, 'A01_APR.13047', 'ID');
105is($doc->corpus_sigle, 'A01', 'corpusID');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000106is($doc->pub_date, '20010402', 'pubDate');
107ok(!$doc->pub_place, 'pubPlace');
108is($doc->text_class->[0], 'freizeit-unterhaltung', 'TextClass');
109is($doc->text_class->[1], 'vereine-veranstaltungen', 'TextClass');
110ok(!$doc->text_class->[2], 'TextClass');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000111ok(!$doc->author, 'author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000112
113# Additional information
114ok(!$doc->editor, 'Editor');
115ok(!$doc->publisher, 'Publisher');
116is($doc->creation_date, '20010402', 'Creation date');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000117#ok(!$doc->coll_title, 'Collection title');
118#ok(!$doc->coll_sub_title, 'Collection subtitle');
119#ok(!$doc->coll_editor, 'Collection editor');
120#ok(!$doc->coll_author, 'Collection author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000121ok(!$doc->text_type, 'text_type');
122is($doc->text_type_art, 'Bericht', 'text_type art');
123
124
125# ERL/0001
126$path = catdir(dirname(__FILE__), 'ERL/00001');
127ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
128
129ok($doc->parse, 'Parse document');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000130is($doc->title, 'MK2/ERL.00001 Amtsblatt des Landesbezirks Baden [diverse Erlasse], Hrsg. und Schriftleitung: Präsidialstelle der Landesverwaltung Baden in Karlsruhe. - Karlsruhe, o.J.', 'title'); # Amtsblatt des Landesbezirks Baden [diverse Erlasse]
131
Nils Diewald8e323ee2014-04-23 17:28:14 +0000132ok(!$doc->sub_title, 'subTitle');
Nils Diewald840c9242014-10-28 19:51:26 +0000133is($doc->text_sigle, 'MK2_ERL.00001', 'ID');
134is($doc->corpus_sigle, 'MK2', 'corpusID');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000135is($doc->pub_date, '00000000', 'pubDate');
136is($doc->pub_place, 'Karlsruhe', 'pubPlace');
137is($doc->text_class->[0], 'politik', 'TextClass');
138is($doc->text_class->[1], 'kommunalpolitik', 'TextClass');
139ok(!$doc->text_class->[2], 'TextClass');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000140ok(!$doc->author, 'author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000141
142# Additional information
143ok(!$doc->editor, 'Editor');
144is($doc->publisher, 'Badenia Verlag und Druckerei', 'Publisher');
Nils Diewald840c9242014-10-28 19:51:26 +0000145is($doc->creation_date, '19600000', 'Creation date');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000146diag 'Non-acceptance of creation date ranges may be temporary';
147#ok(!$doc->coll_title, 'Collection title');
148#ok(!$doc->coll_sub_title, 'Collection subtitle');
149#ok(!$doc->coll_editor, 'Collection editor');
150#ok(!$doc->coll_author, 'Collection author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000151is($doc->text_type, 'Erlass', 'text_type');
152ok(!$doc->text_type_art, 'text_type art');
153
Nils Diewald8e323ee2014-04-23 17:28:14 +0000154# A01/02035-substring
155$path = catdir(dirname(__FILE__), 'A01/02035-substring');
156ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
157
158ok($doc->parse, 'Parse document');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000159is($doc->title, 'A00/JAN.02035 St. Galler Tagblatt, 11.01.2000, Ressort: TB-RSP (Abk.)', 'title');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000160ok(!$doc->sub_title, 'subTitle');
Nils Diewald840c9242014-10-28 19:51:26 +0000161is($doc->text_sigle, 'A00_JAN.02035', 'ID');
162is($doc->corpus_sigle, 'A00', 'corpusID');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000163is($doc->pub_date, '20000111', 'pubDate');
164ok(!$doc->pub_place, 'pubPlace');
165is($doc->text_class->[0], 'sport', 'TextClass');
166is($doc->text_class->[1], 'ballsport', 'TextClass');
167ok(!$doc->text_class->[2], 'TextClass');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000168ok(!$doc->author, 'author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000169
170# Additional information
171ok(!$doc->editor, 'Editor');
172ok(!$doc->publisher, 'Publisher');
173is($doc->creation_date, "20000111", 'Creation date');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000174#ok(!$doc->coll_title, 'Collection title');
175#ok(!$doc->coll_sub_title, 'Collection subtitle');
176#ok(!$doc->coll_editor, 'Collection editor');
177#ok(!$doc->coll_author, 'Collection author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000178ok(!$doc->text_type, 'text_type');
179is($doc->text_type_art, 'Bericht', 'text_type art');
180
Nils Diewald8e323ee2014-04-23 17:28:14 +0000181# A01/02873-meta
182$path = catdir(dirname(__FILE__), 'A01/02873-meta');
183ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
184
185ok($doc->parse, 'Parse document');
186is($doc->title, 'Tradition und Moderne', 'title');
187ok(!$doc->sub_title, 'subTitle');
Nils Diewald840c9242014-10-28 19:51:26 +0000188is($doc->text_sigle, 'A00_JAN.02873', 'ID');
189is($doc->corpus_sigle, 'A00', 'corpusID');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000190is($doc->pub_date, '20000113', 'pubDate');
191ok(!$doc->pub_place, 'pubPlace');
192is($doc->text_class->[0], 'kultur', 'TextClass');
193is($doc->text_class->[1], 'film', 'TextClass');
194ok(!$doc->text_class->[2], 'TextClass');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000195ok(!$doc->author, 'author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000196
197# Additional information
198ok(!$doc->editor, 'Editor');
199ok(!$doc->publisher, 'Publisher');
200is($doc->creation_date, "20000113", 'Creation date');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000201#ok(!$doc->coll_title, 'Collection title');
202#ok(!$doc->coll_sub_title, 'Collection subtitle');
203#ok(!$doc->coll_editor, 'Collection editor');
204#ok(!$doc->coll_author, 'Collection author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000205ok(!$doc->text_type, 'text_type');
206is($doc->text_type_art, 'Bericht', 'text_type art');
207
208
209# A01/05663-unbalanced
210$path = catdir(dirname(__FILE__), 'A01/05663-unbalanced');
211ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
212
213ok($doc->parse, 'Parse document');
214is($doc->title, 'Mehr Arbeitslose im Dezember', 'title');
215ok(!$doc->sub_title, 'subTitle');
Nils Diewald840c9242014-10-28 19:51:26 +0000216is($doc->text_sigle, 'A00_JAN.05663', 'ID');
217is($doc->corpus_sigle, 'A00', 'corpusID');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000218is($doc->pub_date, '20000124', 'pubDate');
219ok(!$doc->pub_place, 'pubPlace');
220is($doc->text_class->[0], 'gesundheit-ernaehrung', 'TextClass');
221is($doc->text_class->[1], 'gesundheit', 'TextClass');
222ok(!$doc->text_class->[2], 'TextClass');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000223ok(!$doc->author, 'author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000224
225# Additional information
226ok(!$doc->editor, 'Editor');
227ok(!$doc->publisher, 'Publisher');
228is($doc->creation_date, "20000124", 'Creation date');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000229#ok(!$doc->coll_title, 'Collection title');
230#ok(!$doc->coll_sub_title, 'Collection subtitle');
231#ok(!$doc->coll_editor, 'Collection editor');
232#ok(!$doc->coll_author, 'Collection author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000233ok(!$doc->text_type, 'text_type');
234is($doc->text_type_art, 'Bericht', 'text_type art');
235
236
Nils Diewald8e323ee2014-04-23 17:28:14 +0000237# A01/07452-deep
238$path = catdir(dirname(__FILE__), 'A01/07452-deep');
239ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
240
241ok($doc->parse, 'Parse document');
242is($doc->title, 'Wil im Dezember 1999', 'title');
243ok(!$doc->sub_title, 'subTitle');
Nils Diewald840c9242014-10-28 19:51:26 +0000244is($doc->text_sigle, 'A00_JAN.07452', 'ID');
245is($doc->corpus_sigle, 'A00', 'corpusID');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000246is($doc->pub_date, '20000129', 'pubDate');
247ok(!$doc->pub_place, 'pubPlace');
248is($doc->text_class->[0], 'politik', 'TextClass');
249is($doc->text_class->[1], 'kommunalpolitik', 'TextClass');
250ok(!$doc->text_class->[2], 'TextClass');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000251ok(!$doc->author, 'author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000252
253# Additional information
254ok(!$doc->editor, 'Editor');
255ok(!$doc->publisher, 'Publisher');
256is($doc->creation_date, "20000129", 'Creation date');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000257#ok(!$doc->coll_title, 'Collection title');
258#ok(!$doc->coll_sub_title, 'Collection subtitle');
259#ok(!$doc->coll_editor, 'Collection editor');
260#ok(!$doc->coll_author, 'Collection author');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000261ok(!$doc->text_type, 'text_type');
262is($doc->text_type_art, 'Bericht', 'text_type art');
263
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000264
Nils Diewald98767bb2014-04-25 20:31:19 +0000265# ART
266$path = catdir(dirname(__FILE__), 'artificial');
267ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000268#is($doc->path, $path . '/', 'Path');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000269
Nils Diewald98767bb2014-04-25 20:31:19 +0000270ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000271#is($doc->path, $path . '/', 'Path');
Nils Diewald98767bb2014-04-25 20:31:19 +0000272
273ok($doc->parse, 'Parse document');
274
275# Metdata
276is($doc->title, 'Artificial Title', 'title');
277is($doc->sub_title, 'Artificial Subtitle', 'subTitle');
Nils Diewald840c9242014-10-28 19:51:26 +0000278is($doc->text_sigle, 'ART_ABC.00001', 'ID');
279is($doc->corpus_sigle, 'ART', 'corpusID');
Nils Diewald98767bb2014-04-25 20:31:19 +0000280is($doc->pub_date, '20010402', 'pubDate');
281is($doc->pub_place, 'Mannheim', 'pubPlace');
282is($doc->text_class->[0], 'freizeit-unterhaltung', 'TextClass');
283is($doc->text_class->[1], 'vereine-veranstaltungen', 'TextClass');
284ok(!$doc->text_class->[2], 'TextClass');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000285#is($doc->author->[0], 'Ruru', 'author');
286#is($doc->author->[1], 'Jens.Ol', 'author');
287#is($doc->author->[2], 'Aglarech', 'author');
288is($doc->author, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
Nils Diewald98767bb2014-04-25 20:31:19 +0000289
290# Additional information
291is($doc->editor, 'Nils Diewald', 'Editor');
292is($doc->publisher, 'Artificial articles Inc.', 'Publisher');
293is($doc->creation_date, '19990601', 'Creation date');
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000294#is($doc->coll_title, 'Artificial articles', 'Collection title');
295#is($doc->coll_sub_title, 'Best of!', 'Collection subtitle');
296#is($doc->coll_editor, 'Nils Diewald', 'Collection editor');
297#is($doc->coll_author, 'Nils Diewald', 'Collection author');
Nils Diewald98767bb2014-04-25 20:31:19 +0000298is($doc->text_type, 'Zeitung: Tageszeitung', 'No text_type');
299is($doc->text_type_art, 'Bericht', 'text_type art');
Nils Diewald8e323ee2014-04-23 17:28:14 +0000300
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000301# Multipath headers
302$path = catdir(dirname(__FILE__), 'VDI/JAN/00001');
303ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
304like($doc->path, qr!$path/!, 'Path');
305
306ok($doc = KorAP::Document->new( path => $path ), 'Load Korap::Document');
307like($doc->path, qr!$path/$!, 'Path');
308
309ok($doc->parse, 'Parse document');
310is($doc->text_sigle, 'VDI_JAN.00001', 'text sigle');
311is($doc->doc_sigle, 'VDI_JAN', 'doc sigle');
312is($doc->corpus_sigle, 'VDI', 'corpus sigle');
313is($doc->title, '10- Zz mit Zahl', 'title');
314ok(!$doc->sub_title, 'subtitle');
315is($doc->pub_date, '20140117', 'pubdate');
316is($doc->pub_place, 'Düsseldorf', 'pubplace');
317is($doc->author, 'Windhövel, Kerstin', 'author');
318is($doc->publisher, 'VDI Verlag GmbH', 'publisher');
319ok(!$doc->editor, 'editor');
320
321ok(!$doc->text_type, 'text type');
322ok(!$doc->text_type_art, 'text type art');
323ok(!$doc->text_type_ref, 'text type ref');
324ok(!$doc->text_column, 'text column');
325ok(!$doc->text_domain, 'text domain');
326ok(!$doc->creation_date, 'creation date');
327ok(!$doc->license, 'License');
328ok(!$doc->pages, 'Pages');
329ok(!$doc->file_edition_statement, 'file edition statement');
330ok(!$doc->bibl_edition_statement, 'bibl edition statement');
331is($doc->reference, 'VDI nachrichten, 17.01.2014, S. 10; 10- Zz mit Zahl [Ausführliche Zitierung nicht verfügbar]', 'Reference');
332
333ok(!$doc->language, 'Language');
334diag 'This may be "de" in the future';
335
336is($doc->doc_title, 'VDI nachrichten, Januar 2014', 'Doc title');
337ok(!$doc->doc_sub_title, 'Doc Sub title');
338ok(!$doc->doc_editor, 'Doc editor');
339ok(!$doc->doc_author, 'Doc author');
340
341is($doc->corpus_title, 'VDI nachrichten 2014', 'Corpus title');
342ok(!$doc->corpus_sub_title, 'Corpus Sub title');
343ok(!$doc->corpus_editor, 'Corpus editor');
344ok(!$doc->corpus_author, 'Corpus author');
345
346is($doc->keywords_string, '', 'Keywords');
347is($doc->text_class_string, 'Freizeit-Unterhaltung Reisen Politik Ausland', 'Text class');
348
349
350# WDD
351$path = catdir(dirname(__FILE__), 'WDD/G27/38989');
352ok($doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
353like($doc->path, qr!$path/!, 'Path');
354ok($doc->parse, 'Parse document');
355
356is($doc->text_sigle, 'WDD11_G27.38989', 'text sigle');
357is($doc->doc_sigle, 'WDD11_G27', 'doc sigle');
358is($doc->corpus_sigle, 'WDD11', 'corpus sigle');
359
360is($doc->title, 'Diskussion:Gunter A. Pilz', 'title');
361ok(!$doc->sub_title, 'subtitle');
362is($doc->pub_date, '20111029', 'pubdate');
363is($doc->pub_place, 'URL:http://de.wikipedia.org', 'pubplace');
364
365is($doc->author, '€pa, u.a.', 'author');
366is($doc->publisher, 'Wikipedia', 'publisher');
367ok(!$doc->editor, 'editor');
368
369is($doc->text_type, 'Diskussionen zu Enzyklopädie-Artikeln', 'text type');
370ok(!$doc->text_type_art, 'text type art');
371ok(!$doc->text_type_ref, 'text type ref');
372ok(!$doc->text_column, 'text column');
373ok(!$doc->text_domain, 'text domain');
374
375is($doc->creation_date, '20070707', 'creation date');
376is($doc->license, 'CC-BY-SA', 'License');
377ok(!$doc->pages, 'Pages');
378ok(!$doc->file_edition_statement, 'file edition statement');
379ok(!$doc->bibl_edition_statement, 'bibl edition statement');
380is($doc->reference, 'Diskussion:Gunter A. Pilz, In: Wikipedia - URL:http://de.wikipedia.org/wiki/Diskussion:Gunter_A._Pilz: Wikipedia, 2007', 'Reference');
381
382is($doc->language, 'de', 'Language');
383
384is($doc->doc_title, 'Wikipedia, Diskussionen zu Artikeln mit Anfangsbuchstabe G, Teil 27', 'Doc title');
385ok(!$doc->doc_sub_title, 'Doc Sub title');
386ok(!$doc->doc_editor, 'Doc editor');
387ok(!$doc->doc_author, 'Doc author');
388
389is($doc->corpus_title, 'Wikipedia.de 2011 Diskussionen', 'Corpus title');
390ok(!$doc->corpus_sub_title, 'Corpus Sub title');
391ok(!$doc->corpus_editor, 'Corpus editor');
392ok(!$doc->corpus_author, 'Corpus author');
393
394is($doc->keywords_string, '', 'Keywords');
395is($doc->text_class_string, '', 'Text class');
396
Nils Diewald8e323ee2014-04-23 17:28:14 +0000397done_testing;
398__END__
Nils Diewaldfeccbb12015-06-18 20:06:45 +0000399
400