| Akron | 8c84aa5 | 2016-02-13 21:26:54 +0100 | [diff] [blame] | 1 | use strict; | 
|  | 2 | use warnings; | 
|  | 3 | use Test::More; | 
|  | 4 | use File::Basename 'dirname'; | 
|  | 5 | use File::Spec::Functions 'catdir'; | 
|  | 6 | use Data::Dumper; | 
|  | 7 | use KorAP::XML::Tokenizer; | 
|  | 8 | use KorAP::XML::Krill; | 
|  | 9 | use utf8; | 
|  | 10 |  | 
|  | 11 | my $path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '2843'); | 
|  | 12 |  | 
|  | 13 | ok(my $doc = KorAP::XML::Krill->new( | 
|  | 14 | path => $path . '/' | 
|  | 15 | ), 'Create Document'); | 
|  | 16 |  | 
|  | 17 | ok($doc->parse, 'Parse document'); | 
|  | 18 |  | 
|  | 19 | like($doc->path, qr!$path/!, 'Path'); | 
|  | 20 |  | 
|  | 21 | # Metdata | 
|  | 22 | is($doc->text_sigle, 'CMC-TSK_2014-09.2843', 'ID-text'); | 
|  | 23 |  | 
|  | 24 | is($doc->doc_sigle, 'CMC-TSK_2014-09', 'ID-doc'); | 
|  | 25 | is($doc->corpus_sigle, 'CMC-TSK', 'ID-corpus'); | 
|  | 26 |  | 
|  | 27 | is($doc->title, '@ Koelle_am_Rhing 10:18', 'title'); | 
|  | 28 |  | 
|  | 29 | ok(!$doc->sub_title, 'no subtitle'); | 
|  | 30 |  | 
|  | 31 | is($doc->publisher, 'tagesschau.de', 'Publisher'); | 
|  | 32 |  | 
|  | 33 | is($doc->pub_date, '20140930'); | 
|  | 34 |  | 
|  | 35 | ok(!$doc->pub_place, 'No pub place'); | 
|  | 36 |  | 
|  | 37 | is($doc->doc_title, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title'); | 
|  | 38 | is($doc->doc_sub_title, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title'); | 
|  | 39 |  | 
|  | 40 | is($doc->store('funder'), 'Bundesministerium für Bildung und Forschung', 'Funder'); | 
|  | 41 |  | 
|  | 42 | is($doc->author, 'privat23', 'Author'); | 
|  | 43 | ok(!$doc->store('sgbrAuthorSex'), 'No Sex'); | 
|  | 44 | ok(!$doc->store('sgbrKodex'), 'No kodex'); | 
|  | 45 | is($doc->reference, 'http://meta.tagesschau.de/node/090285#comment-1732187', 'Publace ref'); | 
|  | 46 |  | 
|  | 47 | is($doc->keywords_string, ''); | 
|  | 48 |  | 
|  | 49 | is($doc->language, 'de', 'Language'); | 
|  | 50 |  | 
|  | 51 | ok(!$doc->editor, 'Editor'); | 
|  | 52 |  | 
|  | 53 | ok(!$doc->text_type, 'Text Type'); | 
|  | 54 | ok(!$doc->text_type_art, 'Text Type Art'); | 
|  | 55 | ok(!$doc->text_type_ref, 'Text Type Ref'); | 
|  | 56 | ok(!$doc->text_column, 'Text Column'); | 
|  | 57 | ok(!$doc->text_domain, 'Text Domain'); | 
|  | 58 | ok(!$doc->creation_date, 'Creation Date'); | 
|  | 59 | ok(!$doc->license, 'License'); | 
|  | 60 | ok(!$doc->pages, 'Pages'); | 
|  | 61 | ok(!$doc->file_edition_statement, 'File Edition Statement'); | 
|  | 62 | ok(!$doc->bibl_edition_statement, 'Bibl Edition Statement'); | 
|  | 63 |  | 
|  | 64 | ok(!$doc->doc_editor, 'Doc: editor'); | 
|  | 65 | ok(!$doc->doc_author, 'Doc: author'); | 
|  | 66 |  | 
|  | 67 | ok(!$doc->corpus_title, 'Corpus: title'); | 
|  | 68 | ok(!$doc->corpus_sub_title, 'Corpus: subtitle'); | 
|  | 69 | ok(!$doc->corpus_editor, 'Corpus: editor'); | 
|  | 70 | ok(!$doc->corpus_author, 'Corpus: author'); | 
|  | 71 |  | 
|  | 72 | my $hash = $doc->to_hash; | 
|  | 73 | is($hash->{title}, '@ Koelle_am_Rhing 10:18', 'Corpus title'); | 
|  | 74 |  | 
|  | 75 |  | 
|  | 76 | # Second document | 
|  | 77 |  | 
|  | 78 | $path = catdir(dirname(__FILE__), 'CMC-TSK', '2014-09', '3401'); | 
|  | 79 |  | 
|  | 80 | ok($doc = KorAP::XML::Krill->new( | 
|  | 81 | path => $path . '/' | 
|  | 82 | ), 'Create Document'); | 
|  | 83 |  | 
|  | 84 | ok($doc->parse, 'Parse document'); | 
|  | 85 |  | 
|  | 86 | like($doc->path, qr!$path/!, 'Path'); | 
|  | 87 |  | 
|  | 88 | # Metdata | 
|  | 89 | is($doc->text_sigle, 'CMC-TSK_2014-09.3401', 'ID-text'); | 
|  | 90 |  | 
|  | 91 | is($doc->doc_sigle, 'CMC-TSK_2014-09', 'ID-doc'); | 
|  | 92 | is($doc->corpus_sigle, 'CMC-TSK', 'ID-corpus'); | 
|  | 93 |  | 
|  | 94 | is($doc->title, '@fitnessfrosch', 'title'); | 
|  | 95 |  | 
|  | 96 | ok(!$doc->sub_title, 'no subtitle'); | 
|  | 97 |  | 
|  | 98 | is($doc->publisher, 'tagesschau.de', 'Publisher'); | 
|  | 99 |  | 
|  | 100 | is($doc->pub_date, '20141001'); | 
|  | 101 | is($doc->store('sgbrDate'), '2014-10-01 00:50:00'); | 
|  | 102 |  | 
|  | 103 | ok(!$doc->pub_place, 'No pub place'); | 
|  | 104 |  | 
|  | 105 | is($doc->doc_title, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title'); | 
|  | 106 | is($doc->doc_sub_title, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title'); | 
|  | 107 |  | 
|  | 108 | is($doc->store('funder'), 'Bundesministerium für Bildung und Forschung', 'Funder'); | 
|  | 109 |  | 
|  | 110 | is($doc->author, 'weltoffen', 'Author'); | 
|  | 111 | ok(!$doc->store('sgbrAuthorSex'), 'No Sex'); | 
|  | 112 | ok(!$doc->store('sgbrKodex'), 'No kodex'); | 
|  | 113 | is($doc->reference, 'http://meta.tagesschau.de/node/090308#comment-1732754', 'Publace ref'); | 
|  | 114 |  | 
|  | 115 | is($doc->keywords_string, ''); | 
|  | 116 |  | 
|  | 117 | is($doc->language, 'de', 'Language'); | 
|  | 118 |  | 
|  | 119 | ok(!$doc->editor, 'Editor'); | 
|  | 120 |  | 
|  | 121 | ok(!$doc->text_type, 'Text Type'); | 
|  | 122 | ok(!$doc->text_type_art, 'Text Type Art'); | 
|  | 123 | ok(!$doc->text_type_ref, 'Text Type Ref'); | 
|  | 124 | ok(!$doc->text_column, 'Text Column'); | 
|  | 125 | ok(!$doc->text_domain, 'Text Domain'); | 
|  | 126 | ok(!$doc->creation_date, 'Creation Date'); | 
|  | 127 | ok(!$doc->license, 'License'); | 
|  | 128 | ok(!$doc->pages, 'Pages'); | 
|  | 129 | ok(!$doc->file_edition_statement, 'File Edition Statement'); | 
|  | 130 | ok(!$doc->bibl_edition_statement, 'Bibl Edition Statement'); | 
|  | 131 |  | 
|  | 132 | ok(!$doc->doc_editor, 'Doc: editor'); | 
|  | 133 | ok(!$doc->doc_author, 'Doc: author'); | 
|  | 134 |  | 
|  | 135 | ok(!$doc->corpus_title, 'Corpus: title'); | 
|  | 136 | ok(!$doc->corpus_sub_title, 'Corpus: subtitle'); | 
|  | 137 | ok(!$doc->corpus_editor, 'Corpus: editor'); | 
|  | 138 | ok(!$doc->corpus_author, 'Corpus: author'); | 
|  | 139 |  | 
|  | 140 | $hash = $doc->to_hash; | 
|  | 141 | is($hash->{title}, '@fitnessfrosch', 'Corpus title'); | 
|  | 142 |  | 
|  | 143 | done_testing; | 
|  | 144 | __END__ | 
|  | 145 |  |