blob: fcdd1da250ec5d73d817c50f8afe9ae788b050cc [file] [log] [blame]
Akron797e8072020-02-13 07:59:40 +01001use strict;
2use warnings;
3use File::Basename 'dirname';
4use File::Spec::Functions qw/catfile/;
Akron68966082020-02-13 15:52:18 +01005use File::Temp ':POSIX';
6use IO::Uncompress::Unzip qw(unzip $UnzipError);
Akron797e8072020-02-13 07:59:40 +01007
8use Test::More;
9use Test::Output;
10
Akron68966082020-02-13 15:52:18 +010011# TODO:
12# This is nearly unusable slow and due to namespaces extremely verbose
13# - probably better switch to something based on Test::Mojo
14use Test::XML::Simple;
15
Akron797e8072020-02-13 07:59:40 +010016my $f = dirname(__FILE__);
17my $script = catfile($f, '..', 'script', 'tei2korapxml');
18ok(-f $script, 'Script found');
19
Akrond949e182020-02-14 12:23:57 +010020stdout_like(
Akron797e8072020-02-13 07:59:40 +010021 sub { system('perl', $script, '--help') },
Akrond949e182020-02-14 12:23:57 +010022 qr!This\s*program\s*is\s*usually\s*called\s*from\s*inside\s*another\s*script\.!,
Akron797e8072020-02-13 07:59:40 +010023 'Help'
24);
25
Akrond949e182020-02-14 12:23:57 +010026stdout_like(
27 sub { system('perl', $script, '--version') },
28 qr!tei2korapxml - v\d+?\.\d+?!,
29 'Version'
30);
31
32
Akron68966082020-02-13 15:52:18 +010033# Load example file
34my $file = catfile($f, 'data', 'goe_sample.i5.xml');
35my $outzip = tmpnam();
36
37# Generate zip file (unportable!)
38stderr_like(
39 sub { `cat '$file' | perl '$script' > '$outzip'` },
40 qr!tei2korapxml: .*? text_id=GOE_AGA\.00000!,
41 'Processing'
42);
43
44# Uncompress GOE/header.xml from zip file
45my $zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/header.xml');
46
47ok($zip, 'Zip-File is created');
48
49# Read GOE/header.xml
50my $header_xml = '';
51$header_xml .= $zip->getline while !$zip->eof;
52ok($zip->close, 'Closed');
53
54xml_is($header_xml, '//korpusSigle', 'GOE', 'korpusSigle');
55xml_is($header_xml, '//h.title[@type="main"]', 'Goethes Werke', 'h.title');
56xml_is($header_xml, '//h.author', 'Goethe, Johann Wolfgang von', 'h.author');
57xml_is($header_xml, '//pubDate[@type="year"]', '1982', 'pubDate');
58
59
60# Uncompress GOE/AGA/header.xml from zip file
61$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/header.xml');
62
63ok($zip, 'Zip-File is found');
64
65# Read GOE/AGA/header.xml
66$header_xml = '';
67$header_xml .= $zip->getline while !$zip->eof;
68ok($zip->close, 'Closed');
69
70xml_is($header_xml, '//dokumentSigle', 'GOE/AGA', 'dokumentSigle');
71xml_is($header_xml, '//d.title', 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)', 'd.title');
72xml_is($header_xml, '//creatDate', '1820-1822', 'creatDate');
73
74
75# Uncompress GOE/AGA/00000/header.xml from zip file
76$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/header.xml');
77
78ok($zip, 'Zip-File is found');
79
80# Read GOE/AGA/00000/header.xml
81$header_xml = '';
82$header_xml .= $zip->getline while !$zip->eof;
83ok($zip->close, 'Closed');
84
85# This is slow - should be improved for more tests
86xml_is($header_xml, '//textSigle', 'GOE/AGA.00000', 'textSigle');
87xml_is($header_xml, '//analytic/h.title[@type="main"]', 'Campagne in Frankreich', 'h.title');
88
89
90# Uncompress GOE/AGA/00000/data.xml from zip file
91$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/data.xml');
92
93ok($zip, 'Zip-File is found');
94
95# Read GOE/AGA/00000/data.xml
96my $data_xml = '';
97$data_xml .= $zip->getline while !$zip->eof;
98ok($zip->close, 'Closed');
99
100xml_node($data_xml, '/*[name()="raw_text" and @docid="GOE_AGA.00000"]', 'text id');
101xml_like($data_xml, '/*[local-name()="raw_text"]/*[local-name()="text"]', qr!^Campagne in Frankreich 1792.*?uns allein begl.cke\.$!, 'text content');
102
103# Uncompress GOE/AGA/00000/struct/structure.xml from zip file
104$zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/struct/structure.xml');
105
106ok($zip, 'Zip-File is found');
107
108# Read GOE/AGA/00000/struct/structure.xml
109my $struct_xml = '';
110$struct_xml .= $zip->getline while !$zip->eof;
111ok($zip->close, 'Closed');
112
113xml_is($struct_xml, '//*[name()="span" and @id="s3"]//*[@name="type"]', 'Autobiographie', 'text content');
114
Akron797e8072020-02-13 07:59:40 +0100115done_testing;