blob: 3f9f8e96c9fd77f5ff2b512f538f4c8c847f4cbb [file] [log] [blame]
Akron797e8072020-02-13 07:59:40 +01001use strict;
2use warnings;
3use File::Basename 'dirname';
4use File::Spec::Functions qw/catfile/;
Akron68966082020-02-13 15:52:18 +01005use File::Temp ':POSIX';
6use IO::Uncompress::Unzip qw(unzip $UnzipError);
Akron797e8072020-02-13 07:59:40 +01007
8use Test::More;
9use Test::Output;
10
Akron68966082020-02-13 15:52:18 +010011# TODO:
12# This is nearly unusable slow and due to namespaces extremely verbose
13# - probably better switch to something based on Test::Mojo
14use Test::XML::Simple;
15
Akron797e8072020-02-13 07:59:40 +010016my $f = dirname(__FILE__);
17my $script = catfile($f, '..', 'script', 'tei2korapxml');
18ok(-f $script, 'Script found');
19
20stderr_is(
21 sub { system('perl', $script, '--help') },
22 "This program is called from inside another script.\n",
23 'Help'
24);
25
Akron68966082020-02-13 15:52:18 +010026# Load example file
27my $file = catfile($f, 'data', 'goe_sample.i5.xml');
28my $outzip = tmpnam();
29
30# Generate zip file (unportable!)
31stderr_like(
32 sub { `cat '$file' | perl '$script' > '$outzip'` },
33 qr!tei2korapxml: .*? text_id=GOE_AGA\.00000!,
34 'Processing'
35);
36
37# Uncompress GOE/header.xml from zip file
38my $zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/header.xml');
39
40ok($zip, 'Zip-File is created');
41
42# Read GOE/header.xml
43my $header_xml = '';
44$header_xml .= $zip->getline while !$zip->eof;
45ok($zip->close, 'Closed');
46
47xml_is($header_xml, '//korpusSigle', 'GOE', 'korpusSigle');
48xml_is($header_xml, '//h.title[@type="main"]', 'Goethes Werke', 'h.title');
49xml_is($header_xml, '//h.author', 'Goethe, Johann Wolfgang von', 'h.author');
50xml_is($header_xml, '//pubDate[@type="year"]', '1982', 'pubDate');
51
52
53# Uncompress GOE/AGA/header.xml from zip file
54$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/header.xml');
55
56ok($zip, 'Zip-File is found');
57
58# Read GOE/AGA/header.xml
59$header_xml = '';
60$header_xml .= $zip->getline while !$zip->eof;
61ok($zip->close, 'Closed');
62
63xml_is($header_xml, '//dokumentSigle', 'GOE/AGA', 'dokumentSigle');
64xml_is($header_xml, '//d.title', 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)', 'd.title');
65xml_is($header_xml, '//creatDate', '1820-1822', 'creatDate');
66
67
68# Uncompress GOE/AGA/00000/header.xml from zip file
69$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/header.xml');
70
71ok($zip, 'Zip-File is found');
72
73# Read GOE/AGA/00000/header.xml
74$header_xml = '';
75$header_xml .= $zip->getline while !$zip->eof;
76ok($zip->close, 'Closed');
77
78# This is slow - should be improved for more tests
79xml_is($header_xml, '//textSigle', 'GOE/AGA.00000', 'textSigle');
80xml_is($header_xml, '//analytic/h.title[@type="main"]', 'Campagne in Frankreich', 'h.title');
81
82
83# Uncompress GOE/AGA/00000/data.xml from zip file
84$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/data.xml');
85
86ok($zip, 'Zip-File is found');
87
88# Read GOE/AGA/00000/data.xml
89my $data_xml = '';
90$data_xml .= $zip->getline while !$zip->eof;
91ok($zip->close, 'Closed');
92
93xml_node($data_xml, '/*[name()="raw_text" and @docid="GOE_AGA.00000"]', 'text id');
94xml_like($data_xml, '/*[local-name()="raw_text"]/*[local-name()="text"]', qr!^Campagne in Frankreich 1792.*?uns allein begl.cke\.$!, 'text content');
95
96# Uncompress GOE/AGA/00000/struct/structure.xml from zip file
97$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/struct/structure.xml');
98
99ok($zip, 'Zip-File is found');
100
101# Read GOE/AGA/00000/struct/structure.xml
102my $struct_xml = '';
103$struct_xml .= $zip->getline while !$zip->eof;
104ok($zip->close, 'Closed');
105
106xml_is($struct_xml, '//*[name()="span" and @id="s3"]//*[@name="type"]', 'Autobiographie', 'text content');
107
108
Akron797e8072020-02-13 07:59:40 +0100109
110done_testing;