blob: 6bdbc9ef600ec893c6c6b86fc2e2828ed1934e87 [file] [log] [blame]
Akron797e8072020-02-13 07:59:40 +01001use strict;
2use warnings;
3use File::Basename 'dirname';
4use File::Spec::Functions qw/catfile/;
Akron2a60c532020-02-13 15:52:18 +01005use File::Temp ':POSIX';
6use IO::Uncompress::Unzip qw(unzip $UnzipError);
Akron797e8072020-02-13 07:59:40 +01007
8use Test::More;
9use Test::Output;
10
Akron9721c412020-02-17 12:42:09 +010011use Test::XML::Loy;
Akron2a60c532020-02-13 15:52:18 +010012
Akron797e8072020-02-13 07:59:40 +010013my $f = dirname(__FILE__);
14my $script = catfile($f, '..', 'script', 'tei2korapxml');
15ok(-f $script, 'Script found');
16
17stderr_is(
18 sub { system('perl', $script, '--help') },
19 "This program is called from inside another script.\n",
20 'Help'
21);
22
Akron2a60c532020-02-13 15:52:18 +010023# Load example file
24my $file = catfile($f, 'data', 'goe_sample.i5.xml');
25my $outzip = tmpnam();
26
27# Generate zip file (unportable!)
28stderr_like(
29 sub { `cat '$file' | perl '$script' > '$outzip'` },
30 qr!tei2korapxml: .*? text_id=GOE_AGA\.00000!,
31 'Processing'
32);
33
34# Uncompress GOE/header.xml from zip file
35my $zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/header.xml');
36
37ok($zip, 'Zip-File is created');
38
39# Read GOE/header.xml
40my $header_xml = '';
41$header_xml .= $zip->getline while !$zip->eof;
42ok($zip->close, 'Closed');
43
Akron9721c412020-02-17 12:42:09 +010044my $t = Test::XML::Loy->new($header_xml);
Akron2a60c532020-02-13 15:52:18 +010045
Akron9721c412020-02-17 12:42:09 +010046$t->text_is('korpusSigle', 'GOE', 'korpusSigle')
47 ->text_is('h\.title[type=main]', 'Goethes Werke', 'h.title')
48 ->text_is('h\.author', 'Goethe, Johann Wolfgang von', 'h.author')
49 ->text_is('pubDate[type=year]', '1982', 'pubDate');
Akron2a60c532020-02-13 15:52:18 +010050
51# Uncompress GOE/AGA/header.xml from zip file
52$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/header.xml');
53
54ok($zip, 'Zip-File is found');
55
56# Read GOE/AGA/header.xml
57$header_xml = '';
58$header_xml .= $zip->getline while !$zip->eof;
59ok($zip->close, 'Closed');
60
Akron9721c412020-02-17 12:42:09 +010061$t = Test::XML::Loy->new($header_xml);
Akron2a60c532020-02-13 15:52:18 +010062
Akron9721c412020-02-17 12:42:09 +010063$t->text_is('dokumentSigle', 'GOE/AGA', 'dokumentSigle')
64 ->text_is('d\.title', 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)', 'd.title')
65 ->text_is('creatDate', '1820-1822', 'creatDate');
Akron2a60c532020-02-13 15:52:18 +010066
67# Uncompress GOE/AGA/00000/header.xml from zip file
68$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/header.xml');
69
70ok($zip, 'Zip-File is found');
71
72# Read GOE/AGA/00000/header.xml
73$header_xml = '';
74$header_xml .= $zip->getline while !$zip->eof;
75ok($zip->close, 'Closed');
76
Akron9721c412020-02-17 12:42:09 +010077$t = Test::XML::Loy->new($header_xml);
78$t->text_is('textSigle', 'GOE/AGA.00000', 'textSigle')
79 ->text_is('analytic > h\.title[type=main]', 'Campagne in Frankreich', 'h.title');
Akron2a60c532020-02-13 15:52:18 +010080
81# Uncompress GOE/AGA/00000/data.xml from zip file
82$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/data.xml');
83
84ok($zip, 'Zip-File is found');
85
86# Read GOE/AGA/00000/data.xml
87my $data_xml = '';
88$data_xml .= $zip->getline while !$zip->eof;
89ok($zip->close, 'Closed');
90
Akron9721c412020-02-17 12:42:09 +010091$t = Test::XML::Loy->new($data_xml);
92$t->attr_is('raw_text', 'docid', 'GOE_AGA.00000', 'text id')
93 ->text_like('raw_text > text', qr!^Campagne in Frankreich 1792.*?uns allein begl.*cke\.$!, 'text content');
Akron2a60c532020-02-13 15:52:18 +010094
95# Uncompress GOE/AGA/00000/struct/structure.xml from zip file
96$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/struct/structure.xml');
97
98ok($zip, 'Zip-File is found');
99
100# Read GOE/AGA/00000/struct/structure.xml
101my $struct_xml = '';
102$struct_xml .= $zip->getline while !$zip->eof;
103ok($zip->close, 'Closed');
104
Akron9721c412020-02-17 12:42:09 +0100105$t = Test::XML::Loy->new($struct_xml);
106$t->text_is('span[id=s3] *[name=type]', 'Autobiographie', 'text content');
Akron797e8072020-02-13 07:59:40 +0100107
108done_testing;