blob: 4128f5d5b80356aaed0ce2c7559188f5818be3e0 [file] [log] [blame]
Akron8c84aa52016-02-13 21:26:54 +01001use strict;
2use warnings;
3use Test::More;
4use File::Basename 'dirname';
5use File::Spec::Functions 'catdir';
6use Data::Dumper;
7use KorAP::XML::Tokenizer;
8use KorAP::XML::Krill;
9use utf8;
10
11my $path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '2843');
12
13ok(my $doc = KorAP::XML::Krill->new(
14 path => $path . '/'
15), 'Create Document');
16
17ok($doc->parse, 'Parse document');
18
19like($doc->path, qr!$path/!, 'Path');
20
21# Metdata
22is($doc->text_sigle, 'CMC-TSK_2014-09.2843', 'ID-text');
23
24is($doc->doc_sigle, 'CMC-TSK_2014-09', 'ID-doc');
25is($doc->corpus_sigle, 'CMC-TSK', 'ID-corpus');
26
27is($doc->title, '@ Koelle_am_Rhing 10:18', 'title');
28
29ok(!$doc->sub_title, 'no subtitle');
30
31is($doc->publisher, 'tagesschau.de', 'Publisher');
32
33is($doc->pub_date, '20140930');
34
35ok(!$doc->pub_place, 'No pub place');
36
37is($doc->doc_title, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
38is($doc->doc_sub_title, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title');
39
40is($doc->store('funder'), 'Bundesministerium für Bildung und Forschung', 'Funder');
41
42is($doc->author, 'privat23', 'Author');
43ok(!$doc->store('sgbrAuthorSex'), 'No Sex');
44ok(!$doc->store('sgbrKodex'), 'No kodex');
45is($doc->reference, 'http://meta.tagesschau.de/node/090285#comment-1732187', 'Publace ref');
46
47is($doc->keywords_string, '');
48
49is($doc->language, 'de', 'Language');
50
51ok(!$doc->editor, 'Editor');
52
53ok(!$doc->text_type, 'Text Type');
54ok(!$doc->text_type_art, 'Text Type Art');
55ok(!$doc->text_type_ref, 'Text Type Ref');
56ok(!$doc->text_column, 'Text Column');
57ok(!$doc->text_domain, 'Text Domain');
58ok(!$doc->creation_date, 'Creation Date');
59ok(!$doc->license, 'License');
60ok(!$doc->pages, 'Pages');
61ok(!$doc->file_edition_statement, 'File Edition Statement');
62ok(!$doc->bibl_edition_statement, 'Bibl Edition Statement');
63
64ok(!$doc->doc_editor, 'Doc: editor');
65ok(!$doc->doc_author, 'Doc: author');
66
67ok(!$doc->corpus_title, 'Corpus: title');
68ok(!$doc->corpus_sub_title, 'Corpus: subtitle');
69ok(!$doc->corpus_editor, 'Corpus: editor');
70ok(!$doc->corpus_author, 'Corpus: author');
71
72my $hash = $doc->to_hash;
73is($hash->{title}, '@ Koelle_am_Rhing 10:18', 'Corpus title');
74
75
76# Second document
77
78$path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '3401');
79
80ok($doc = KorAP::XML::Krill->new(
81 path => $path . '/'
82), 'Create Document');
83
84ok($doc->parse, 'Parse document');
85
86like($doc->path, qr!$path/!, 'Path');
87
88# Metdata
89is($doc->text_sigle, 'CMC-TSK_2014-09.3401', 'ID-text');
90
91is($doc->doc_sigle, 'CMC-TSK_2014-09', 'ID-doc');
92is($doc->corpus_sigle, 'CMC-TSK', 'ID-corpus');
93
94is($doc->title, '@fitnessfrosch', 'title');
95
96ok(!$doc->sub_title, 'no subtitle');
97
98is($doc->publisher, 'tagesschau.de', 'Publisher');
99
100is($doc->pub_date, '20141001');
101is($doc->store('sgbrDate'), '2014-10-01 00:50:00');
102
103ok(!$doc->pub_place, 'No pub place');
104
105is($doc->doc_title, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
106is($doc->doc_sub_title, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title');
107
108is($doc->store('funder'), 'Bundesministerium für Bildung und Forschung', 'Funder');
109
110is($doc->author, 'weltoffen', 'Author');
111ok(!$doc->store('sgbrAuthorSex'), 'No Sex');
112ok(!$doc->store('sgbrKodex'), 'No kodex');
113is($doc->reference, 'http://meta.tagesschau.de/node/090308#comment-1732754', 'Publace ref');
114
115is($doc->keywords_string, '');
116
117is($doc->language, 'de', 'Language');
118
119ok(!$doc->editor, 'Editor');
120
121ok(!$doc->text_type, 'Text Type');
122ok(!$doc->text_type_art, 'Text Type Art');
123ok(!$doc->text_type_ref, 'Text Type Ref');
124ok(!$doc->text_column, 'Text Column');
125ok(!$doc->text_domain, 'Text Domain');
126ok(!$doc->creation_date, 'Creation Date');
127ok(!$doc->license, 'License');
128ok(!$doc->pages, 'Pages');
129ok(!$doc->file_edition_statement, 'File Edition Statement');
130ok(!$doc->bibl_edition_statement, 'Bibl Edition Statement');
131
132ok(!$doc->doc_editor, 'Doc: editor');
133ok(!$doc->doc_author, 'Doc: author');
134
135ok(!$doc->corpus_title, 'Corpus: title');
136ok(!$doc->corpus_sub_title, 'Corpus: subtitle');
137ok(!$doc->corpus_editor, 'Corpus: editor');
138ok(!$doc->corpus_author, 'Corpus: author');
139
140$hash = $doc->to_hash;
141is($hash->{title}, '@fitnessfrosch', 'Corpus title');
142
143done_testing;
144__END__
145