blob: 736647b0e0298e82743fd20e2ff5b196346783be [file] [log] [blame]
Akron03b24db2016-08-16 20:54:32 +02001#/usr/bin/env perl
2use strict;
3use warnings;
4use File::Basename 'dirname';
5use File::Spec::Functions qw/catdir catfile/;
6use File::Temp qw/tempdir/;
7use Mojo::Util qw/slurp/;
8use Mojo::JSON qw/decode_json/;
9use IO::Uncompress::Gunzip;
10use Test::More;
11use Test::Output;
12use Data::Dumper;
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020013use KorAP::XML::Archive;
Akron03b24db2016-08-16 20:54:32 +020014use utf8;
15
16my $f = dirname(__FILE__);
17my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
18
19my $call = join(
20 ' ',
21 'perl', $script,
22 'extract'
23);
24
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020025unless (KorAP::XML::Archive::test_unzip) {
26 plan skip_all => 'unzip not found';
27};
28
Akron03b24db2016-08-16 20:54:32 +020029# Test without parameters
30stdout_like(
31 sub {
32 system($call);
33 },
Akrona76d8352016-10-27 16:27:32 +020034 qr!extract.+?\$ korapxml2krill!s,
Akron03b24db2016-08-16 20:54:32 +020035 $call
36);
37
38my $input = catfile($f, '..', 'corpus', 'archive.zip');
39ok(-f $input, 'Input archive found');
40
41my $output = tempdir(CLEANUP => 1);
42ok(-d $output, 'Output directory exists');
43
44$call = join(
45 ' ',
46 'perl', $script,
47 'extract',
48 '--input' => $input,
49 '--output' => $output,
50);
51
Akron2812ba22016-10-28 21:55:59 +020052my $sep = qr!\.\.\.[\n\r]+?\.\.\.!;
53
Akron03b24db2016-08-16 20:54:32 +020054# Test without compression
55stdout_like(
56 sub {
57 system($call);
58 },
Akron9ec88872017-04-12 16:29:06 +020059 qr!TEST/BSP/1 $sep extracted!s,
60# qr!TEST/BSP/1 $sep extracted.!s,
Akron03b24db2016-08-16 20:54:32 +020061 $call
62);
63
64ok(-d catdir($output, 'TEST', 'BSP', '1'), 'Directory created');
65ok(-d catdir($output, 'TEST', 'BSP', '1', 'base'), 'Directory created');
66ok(-d catdir($output, 'TEST', 'BSP', '1', 'sgbr'), 'Directory created');
67ok(-d catdir($output, 'TEST', 'BSP', '1', 'struct'), 'Directory created');
68ok(-f catfile($output, 'TEST', 'BSP', '1', 'data.xml'), 'File created');
69ok(-f catfile($output, 'TEST', 'BSP', '1', 'header.xml'), 'File created');
70ok(-d catdir($output, 'TEST', 'BSP', '2'), 'Directory created');
71ok(-d catdir($output, 'TEST', 'BSP', '3'), 'Directory created');
72
73# Check sigles
74my $output2 = tempdir(CLEANUP => 1);
75ok(-d $output2, 'Output directory exists');
76
77$call = join(
78 ' ',
79 'perl', $script,
80 'extract',
81 '--input' => $input,
82 '--output' => $output2,
83 '-sg' => 'TEST/BSP/4'
84);
85
86# Test with sigle
87stdout_like(
88 sub {
89 system($call);
90 },
Akron2812ba22016-10-28 21:55:59 +020091 qr!TEST/BSP/4 $sep extracted.!s,
Akron03b24db2016-08-16 20:54:32 +020092 $call
93);
94
95# Test with sigle
96stdout_unlike(
97 sub {
98 system($call);
99 },
Akron2812ba22016-10-28 21:55:59 +0200100 qr!TEST/BSP/5 $sep extracted.!s,
Akron03b24db2016-08-16 20:54:32 +0200101 $call
102);
103
104ok(!-d catdir($output2, 'TEST', 'BSP', '1'), 'Directory created');
105ok(!-d catdir($output2, 'TEST', 'BSP', '2'), 'Directory created');
106ok(!-d catdir($output2, 'TEST', 'BSP', '3'), 'Directory created');
107ok(-d catdir($output2, 'TEST', 'BSP', '4'), 'Directory created');
108ok(!-d catdir($output2, 'TEST', 'BSP', '5'), 'Directory created');
109
Akron20807582016-10-26 17:11:34 +0200110
111# Test with document sigle
112my $input_rei = catdir($f, '..', 'corpus', 'archive_rei.zip');
113ok(-f $input_rei, 'Input archive found');
114
115$call = join(
116 ' ',
117 'perl', $script,
118 'extract',
119 '--input' => $input_rei,
120 '--output' => $output2,
121 '-sg' => 'REI/BNG'
122);
123
124# Test with sigle
125stdout_like(
126 sub {
127 system($call);
128 },
Akron9ec88872017-04-12 16:29:06 +0200129 qr!Extract .+? REI/BNG!s,
Akron20807582016-10-26 17:11:34 +0200130 $call
131);
132
133# Test with sigle
134stdout_unlike(
135 sub {
136 system($call);
137 },
Akron9ec88872017-04-12 16:29:06 +0200138 qr!Extract .+? REI/RBR!s,
Akron20807582016-10-26 17:11:34 +0200139 $call
140);
141
142ok(-d catdir($output2, 'REI', 'BNG', '00071'), 'Directory created');
143ok(-d catdir($output2, 'REI', 'BNG', '00128'), 'Directory created');
144ok(!-d catdir($output2, 'REI', 'RBR', '00610'), 'Directory not created');
145
Akron2fd402b2016-10-27 21:26:48 +0200146
147# Test with document sigle
148$output2 = undef;
149$output2 = tempdir(CLEANUP => 1);
150
151$call = join(
152 ' ',
153 'perl', $script,
154 'extract',
155 '--input' => $input_rei,
156 '--output' => $output2,
157 '-sg' => 'REI/BN*'
158);
159
160# Test with sigle
161stdout_like(
162 sub {
163 system($call);
164 },
Akron9ec88872017-04-12 16:29:06 +0200165 qr!Extract .+? REI/BN\*!s,
Akron2fd402b2016-10-27 21:26:48 +0200166 $call
167);
168
169# Test with sigle
170stdout_unlike(
171 sub {
172 system($call);
173 },
Akron2812ba22016-10-28 21:55:59 +0200174 qr!REI/RBR $sep extracted!s,
Akron2fd402b2016-10-27 21:26:48 +0200175 $call
176);
177
178ok(-d catdir($output2, 'REI', 'BNG', '00071'), 'Directory created');
179ok(-d catdir($output2, 'REI', 'BNG', '00128'), 'Directory created');
180ok(!-d catdir($output2, 'REI', 'RBR', '00610'), 'Directory not created');
181
182
183
184
185
186
187
Akron651cb8d2016-08-16 21:44:49 +0200188# Check multiple archives
189$output = tempdir(CLEANUP => 1);
190ok(-d $output, 'Output directory exists');
191
192$call = join(
193 ' ',
194 'perl', $script,
195 'extract',
196 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.zip'),
197 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.tree_tagger.zip'),
198 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.opennlp.zip'),
199 '--output' => $output
200);
201
202# Test with sigle
203stdout_like(
204 sub {
205 system($call);
206 },
Akron9ec88872017-04-12 16:29:06 +0200207 qr!WPD15/A00/00081 $sep extracted!s,
Akron651cb8d2016-08-16 21:44:49 +0200208 $call
209);
210
211ok(-d catdir($output, 'WPD15', 'A00', '00081'), 'Directory created');
212ok(-f catfile($output, 'WPD15', 'A00', 'header.xml'), 'Header file created');
213ok(-d catdir($output, 'WPD15', 'A00', '00081', 'base'), 'Directory created');
214
215ok(-f catfile($output, 'WPD15', 'A00', '00081', 'tree_tagger', 'morpho.xml'), 'New archive');
216ok(-f catfile($output, 'WPD15', 'A00', '00081', 'opennlp', 'morpho.xml'), 'New archive');
217
Akron03b24db2016-08-16 20:54:32 +0200218
Akron60a8caa2017-02-17 21:51:27 +0100219# With quotes:
220# Test with document sigle
221my $input_quotes = catfile($f, '..', 'corpus', 'archive_quotes.zip');
222ok(-f $input, 'Input archive found');
223$output2 = undef;
224$output2 = tempdir(CLEANUP => 1);
225
226$call = join(
227 ' ',
228 'perl', $script,
229 'extract',
230 '--input' => $input_quotes,
231 '--output' => $output2,
232 '-sg' => '"TEST/BSP \"Example\"/1"'
233);
234
235# Test with sigle
236stdout_like(
237 sub {
238 system($call);
239 },
240 qr!TEST/BSP "Example"\/1 $sep extracted!s,
Akron9ec88872017-04-12 16:29:06 +0200241 # qr!Extract .+? TEST/BSP "Example"\/1!s,
Akron60a8caa2017-02-17 21:51:27 +0100242 $call
243);
244
Akron03b24db2016-08-16 20:54:32 +0200245done_testing;
246__END__