blob: 9e24c9efa8f07167c2ad210e5a09020c47d67fe7 [file] [log] [blame]
Akron2532f1b2023-05-15 13:41:24 +02001use strict;
2use warnings;
3use Test::More;
4use Data::Dumper;
5use JSON::XS;
6
7if ($ENV{SKIP_REAL}) {
8 plan skip_all => 'Skip real tests';
9};
10
11use Benchmark qw/:hireswallclock/;
12
13my $t = Benchmark->new;
14
15use utf8;
16use lib 'lib', '../lib';
17
18use File::Basename 'dirname';
19use File::Spec::Functions 'catdir';
20
21use_ok('KorAP::XML::Krill');
22
23# This will check preliminary HNC-Files
24
25# HNC/DOC00001/00001
26my $path = catdir(dirname(__FILE__), 'corpus','ICCGER','CCBY-LTE','WMA-00005');
27
28ok(my $doc = KorAP::XML::Krill->new(
29 path => $path . '/',
30 meta_type => 'ICC'
31), 'Load Korap::Document');
32ok($doc->parse, 'Parse document');
33
34is($doc->text_sigle, 'ICCGER/CCBY-LTE/WMA-00005', 'Correct text sigle');
35is($doc->doc_sigle, 'ICCGER/CCBY-LTE', 'Correct document sigle');
36is($doc->corpus_sigle, 'ICCGER', 'Correct corpus sigle');
37
38my $meta = $doc->meta;
39like($meta->{T_title}, qr!Affinit.tschromatografie!, 'Title');
40is($meta->{S_pub_place}, 'Zug', 'PubPlace');
41
42is($meta->{T_author}, 'Wilke, Marco; Weller, Michael G.', 'Author');
43
44is($meta->{D_pub_date}, '20190000', 'Publication date');
45
46ok(!$meta->{T_sub_title}, 'SubTitle');
47
48is($meta->{A_publisher}, 'Sigwerb Sigwerb', 'Publisher');
49
50is($meta->{S_license}, 'Lizenz (Deutsch): License LogoCreative Commons - CC BY - Namensnennung 4.0 International', 'Licence');
51
52is($meta->{S_iccGenre}, 'Learned_Technology', 'Editor');
53
54is($meta->{A_source}, 'German Reference Corpus DeReKo', 'Editor');
55
56
57# Norwegian
58$path = catdir(dirname(__FILE__), 'corpus','ICCNOR','199', '00002');
59
60ok($doc = KorAP::XML::Krill->new(
61 path => $path . '/',
62 meta_type => 'ICC'
63), 'Load Korap::Document');
64ok($doc->parse, 'Parse document');
65
66is($doc->text_sigle, 'NO/199/00002', 'Correct text sigle');
67is($doc->doc_sigle, 'NO/199', 'Correct document sigle');
68is($doc->corpus_sigle, 'NO', 'Correct corpus sigle');
69
70$meta = $doc->meta;
71like($meta->{T_title}, qr!Pengesnakk!, 'Title');
72is($meta->{S_pub_place}, 'https://www.pengesnakk.no/', 'PubPlace');
73
74is($meta->{T_author}, 'Kristoffersen, Lise Vermelid', 'Author');
75
76is($meta->{D_pub_date}, '20190000', 'Publication date');
77
78ok(!$meta->{T_sub_title}, 'SubTitle');
79
80ok(!$meta->{A_publisher}, 'Publisher');
81
82ok(!$meta->{S_license}, 'Licence');
83
84is($meta->{S_iccGenre}, 'blog', 'Editor');
85
86ok(!$meta->{A_source}, 'Editor');
87
88# English
89$path = catdir(dirname(__FILE__), 'corpus','ICCENG','144', '00005');
90
91ok($doc = KorAP::XML::Krill->new(
92 path => $path . '/',
93 meta_type => 'ICC'
94), 'Load Korap::Document');
95ok($doc->parse, 'Parse document');
96
97is($doc->text_sigle, 'EN/144/00005', 'Correct text sigle');
98is($doc->doc_sigle, 'EN/144', 'Correct document sigle');
99is($doc->corpus_sigle, 'EN', 'Correct corpus sigle');
100
101$meta = $doc->meta;
102like($meta->{T_title}, qr!Irish News!, 'Title');
103ok(!$meta->{S_pub_place}, 'PubPlace');
104
105ok(!$meta->{T_author}, 'Author');
106
107is($meta->{D_pub_date}, '19940000', 'Publication date');
108
109ok(!$meta->{T_sub_title}, 'SubTitle');
110
111ok(!$meta->{A_publisher}, 'Publisher');
112
113ok(!$meta->{S_license}, 'Licence');
114
115is($meta->{S_iccGenre}, 'PreEdi', 'Editor');
116
117ok(!$meta->{A_source}, 'Editor');
118
119
120
121
122done_testing;
123__END__