blob: 4e633e407997b2b7d17377294a4c244fde26ac5d [file] [log] [blame]
Akron03b24db2016-08-16 20:54:32 +02001#/usr/bin/env perl
2use strict;
3use warnings;
4use File::Basename 'dirname';
5use File::Spec::Functions qw/catdir catfile/;
6use File::Temp qw/tempdir/;
7use Mojo::Util qw/slurp/;
8use Mojo::JSON qw/decode_json/;
9use IO::Uncompress::Gunzip;
10use Test::More;
11use Test::Output;
12use Data::Dumper;
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020013use KorAP::XML::Archive;
Akron03b24db2016-08-16 20:54:32 +020014use utf8;
15
16my $f = dirname(__FILE__);
17my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
18
19my $call = join(
20 ' ',
21 'perl', $script,
22 'extract'
23);
24
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020025unless (KorAP::XML::Archive::test_unzip) {
26 plan skip_all => 'unzip not found';
27};
28
Akron03b24db2016-08-16 20:54:32 +020029# Test without parameters
30stdout_like(
31 sub {
32 system($call);
33 },
Akrona76d8352016-10-27 16:27:32 +020034 qr!extract.+?\$ korapxml2krill!s,
Akron03b24db2016-08-16 20:54:32 +020035 $call
36);
37
38my $input = catfile($f, '..', 'corpus', 'archive.zip');
39ok(-f $input, 'Input archive found');
40
41my $output = tempdir(CLEANUP => 1);
42ok(-d $output, 'Output directory exists');
43
44$call = join(
45 ' ',
46 'perl', $script,
47 'extract',
48 '--input' => $input,
49 '--output' => $output,
50);
51
52# Test without compression
53stdout_like(
54 sub {
55 system($call);
56 },
57 qr!TEST/BSP/1 extracted.!s,
58 $call
59);
60
61ok(-d catdir($output, 'TEST', 'BSP', '1'), 'Directory created');
62ok(-d catdir($output, 'TEST', 'BSP', '1', 'base'), 'Directory created');
63ok(-d catdir($output, 'TEST', 'BSP', '1', 'sgbr'), 'Directory created');
64ok(-d catdir($output, 'TEST', 'BSP', '1', 'struct'), 'Directory created');
65ok(-f catfile($output, 'TEST', 'BSP', '1', 'data.xml'), 'File created');
66ok(-f catfile($output, 'TEST', 'BSP', '1', 'header.xml'), 'File created');
67ok(-d catdir($output, 'TEST', 'BSP', '2'), 'Directory created');
68ok(-d catdir($output, 'TEST', 'BSP', '3'), 'Directory created');
69
70# Check sigles
71my $output2 = tempdir(CLEANUP => 1);
72ok(-d $output2, 'Output directory exists');
73
74$call = join(
75 ' ',
76 'perl', $script,
77 'extract',
78 '--input' => $input,
79 '--output' => $output2,
80 '-sg' => 'TEST/BSP/4'
81);
82
83# Test with sigle
84stdout_like(
85 sub {
86 system($call);
87 },
88 qr!TEST/BSP/4 extracted.!s,
89 $call
90);
91
92# Test with sigle
93stdout_unlike(
94 sub {
95 system($call);
96 },
97 qr!TEST/BSP/5 extracted.!s,
98 $call
99);
100
101ok(!-d catdir($output2, 'TEST', 'BSP', '1'), 'Directory created');
102ok(!-d catdir($output2, 'TEST', 'BSP', '2'), 'Directory created');
103ok(!-d catdir($output2, 'TEST', 'BSP', '3'), 'Directory created');
104ok(-d catdir($output2, 'TEST', 'BSP', '4'), 'Directory created');
105ok(!-d catdir($output2, 'TEST', 'BSP', '5'), 'Directory created');
106
Akron20807582016-10-26 17:11:34 +0200107
108# Test with document sigle
109my $input_rei = catdir($f, '..', 'corpus', 'archive_rei.zip');
110ok(-f $input_rei, 'Input archive found');
111
112$call = join(
113 ' ',
114 'perl', $script,
115 'extract',
116 '--input' => $input_rei,
117 '--output' => $output2,
118 '-sg' => 'REI/BNG'
119);
120
121# Test with sigle
122stdout_like(
123 sub {
124 system($call);
125 },
126 qr!REI/BNG extracted!s,
127 $call
128);
129
130# Test with sigle
131stdout_unlike(
132 sub {
133 system($call);
134 },
135 qr!REI/RBR extracted!s,
136 $call
137);
138
139ok(-d catdir($output2, 'REI', 'BNG', '00071'), 'Directory created');
140ok(-d catdir($output2, 'REI', 'BNG', '00128'), 'Directory created');
141ok(!-d catdir($output2, 'REI', 'RBR', '00610'), 'Directory not created');
142
Akron2fd402b2016-10-27 21:26:48 +0200143
144# Test with document sigle
145$output2 = undef;
146$output2 = tempdir(CLEANUP => 1);
147
148$call = join(
149 ' ',
150 'perl', $script,
151 'extract',
152 '--input' => $input_rei,
153 '--output' => $output2,
154 '-sg' => 'REI/BN*'
155);
156
157# Test with sigle
158stdout_like(
159 sub {
160 system($call);
161 },
162 qr!REI/BN\* extracted!s,
163 $call
164);
165
166# Test with sigle
167stdout_unlike(
168 sub {
169 system($call);
170 },
171 qr!REI/RBR extracted!s,
172 $call
173);
174
175ok(-d catdir($output2, 'REI', 'BNG', '00071'), 'Directory created');
176ok(-d catdir($output2, 'REI', 'BNG', '00128'), 'Directory created');
177ok(!-d catdir($output2, 'REI', 'RBR', '00610'), 'Directory not created');
178
179
180
181
182
183
184
Akron651cb8d2016-08-16 21:44:49 +0200185# Check multiple archives
186$output = tempdir(CLEANUP => 1);
187ok(-d $output, 'Output directory exists');
188
189$call = join(
190 ' ',
191 'perl', $script,
192 'extract',
193 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.zip'),
194 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.tree_tagger.zip'),
195 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.opennlp.zip'),
196 '--output' => $output
197);
198
199# Test with sigle
200stdout_like(
201 sub {
202 system($call);
203 },
204 qr!WPD15/A00/00081 extracted.!s,
205 $call
206);
207
208ok(-d catdir($output, 'WPD15', 'A00', '00081'), 'Directory created');
209ok(-f catfile($output, 'WPD15', 'A00', 'header.xml'), 'Header file created');
210ok(-d catdir($output, 'WPD15', 'A00', '00081', 'base'), 'Directory created');
211
212ok(-f catfile($output, 'WPD15', 'A00', '00081', 'tree_tagger', 'morpho.xml'), 'New archive');
213ok(-f catfile($output, 'WPD15', 'A00', '00081', 'opennlp', 'morpho.xml'), 'New archive');
214
Akron03b24db2016-08-16 20:54:32 +0200215
216done_testing;
217__END__