blob: ca3f3f12e99a78e9d077bea7ce613a3de5811214 [file] [log] [blame]
Akrona98a14c2016-02-12 16:28:39 +01001use strict;
2use warnings;
3use Test::More;
4use File::Basename 'dirname';
5use File::Spec::Functions 'catdir';
6use Data::Dumper;
7use KorAP::XML::Tokenizer;
8use KorAP::XML::Krill;
9use utf8;
10
11my $path = catdir(dirname(__FILE__), 'PRO-DUD', 'BSP-2013-01', 32);
12
13ok(my $doc = KorAP::XML::Krill->new(
Akron35db6e32016-03-17 22:42:22 +010014 path => $path . '/',
15 meta_type => 'Sgbr'
Akrona98a14c2016-02-12 16:28:39 +010016), 'Create Document');
17
18ok($doc->parse, 'Parse document');
19
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020020like($doc->path, qr!\Q$path\E/!, 'Path');
Akrona98a14c2016-02-12 16:28:39 +010021
22# Metdata
Akron1cd5b872016-03-22 00:23:46 +010023is($doc->text_sigle, 'PRO-DUD/BSP-2013-01/32', 'ID-text');
24is($doc->doc_sigle, 'PRO-DUD/BSP-2013-01', 'ID-doc');
Akrona98a14c2016-02-12 16:28:39 +010025is($doc->corpus_sigle, 'PRO-DUD', 'ID-corpus');
26
Akron35db6e32016-03-17 22:42:22 +010027my $meta = $doc->meta;
Akron5eb3aa02019-01-25 18:30:47 +010028is($meta->{T_title}, 'Nur Platt, kein Deutsch', 'title');
29ok(!$meta->{T_sub_title}, 'no subtitle');
Akrona98a14c2016-02-12 16:28:39 +010030
Akron5eb3aa02019-01-25 18:30:47 +010031is($meta->{A_publisher}, 'Dorfblatt GmbH', 'Publisher');
32is($meta->{D_pub_date}, '20130126');
33is($meta->{D_sgbr_date}, '2013-01-26');
34is($meta->{S_pub_place}, 'Stadtingen');
Akrona98a14c2016-02-12 16:28:39 +010035
Akron5eb3aa02019-01-25 18:30:47 +010036is($meta->{T_doc_title}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
37is($meta->{T_doc_sub_title}, 'Subkorpus Ortsblatt, Jahrgang 2013, Monat Januar', 'Doc Sub title');
Akrona98a14c2016-02-12 16:28:39 +010038
Akron5eb3aa02019-01-25 18:30:47 +010039is($meta->{'A_funder'}, 'Bundesministerium für Bildung und Forschung', 'Funder');
Akrona98a14c2016-02-12 16:28:39 +010040
Akron5eb3aa02019-01-25 18:30:47 +010041is($meta->{T_author}, 'unbekannt', 'Author');
42ok(!$meta->{'S_sgbr_author_sex'}, 'No Sex');
43is($meta->{'S_sgbr_kodex'}, 'T', '');
Akrona98a14c2016-02-12 16:28:39 +010044
Akron5eb3aa02019-01-25 18:30:47 +010045is($meta->keywords('K_keywords'), 'sgbrKodex:T');
Akrona98a14c2016-02-12 16:28:39 +010046
Akron5eb3aa02019-01-25 18:30:47 +010047is($meta->{S_language}, 'de', 'Language');
Akrona98a14c2016-02-12 16:28:39 +010048
Akron5eb3aa02019-01-25 18:30:47 +010049ok(!$meta->{A_editor}, 'Editor');
Akrona98a14c2016-02-12 16:28:39 +010050
Akron5eb3aa02019-01-25 18:30:47 +010051ok(!$meta->{S_text_type}, 'Text Type');
52ok(!$meta->{S_text_type_art}, 'Text Type Art');
53ok(!$meta->{S_text_type_ref}, 'Text Type Ref');
54ok(!$meta->{S_text_column}, 'Text Column');
55ok(!$meta->{S_text_domain}, 'Text Domain');
56ok(!$meta->{D_creation_date}, 'Creation Date');
57ok(!$meta->{A_license}, 'License');
58ok(!$meta->{A_pages}, 'Pages');
59ok(!$meta->{A_file_edition_statement}, 'File Edition Statement');
60ok(!$meta->{A_bibl_edition_statement}, 'Bibl Edition Statement');
61ok(!$meta->{A_reference}, 'Reference');
Akrona98a14c2016-02-12 16:28:39 +010062
63
Akron5eb3aa02019-01-25 18:30:47 +010064ok(!$meta->{A_doc_editor}, 'Doc: editor');
65ok(!$meta->{T_doc_author}, 'Doc: author');
Akrona98a14c2016-02-12 16:28:39 +010066
Akron5eb3aa02019-01-25 18:30:47 +010067ok(!$meta->{T_corpus_title}, 'Corpus: title');
68ok(!$meta->{T_corpus_sub_title}, 'Corpus: subtitle');
69ok(!$meta->{A_corpus_editor}, 'Corpus: editor');
70ok(!$meta->{T_corpus_author}, 'Corpus: author');
Akrona98a14c2016-02-12 16:28:39 +010071
72my $hash = $doc->to_hash;
73is($hash->{title}, 'Nur Platt, kein Deutsch', 'Corpus title');
Akron35db6e32016-03-17 22:42:22 +010074is($hash->{sgbrKodex}, 'T', 'store');
Akrona98a14c2016-02-12 16:28:39 +010075
76
77done_testing;
78
79
80__END__