blob: 884cdb5718e904b1bbeeaa294a0a579657e5b438 [file] [log] [blame]
Akron03b24db2016-08-16 20:54:32 +02001#/usr/bin/env perl
2use strict;
3use warnings;
4use File::Basename 'dirname';
5use File::Spec::Functions qw/catdir catfile/;
6use File::Temp qw/tempdir/;
7use Mojo::Util qw/slurp/;
8use Mojo::JSON qw/decode_json/;
9use IO::Uncompress::Gunzip;
10use Test::More;
11use Test::Output;
12use Data::Dumper;
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020013use KorAP::XML::Archive;
Akron03b24db2016-08-16 20:54:32 +020014use utf8;
15
16my $f = dirname(__FILE__);
17my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
18
19my $call = join(
20 ' ',
21 'perl', $script,
22 'extract'
23);
24
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020025unless (KorAP::XML::Archive::test_unzip) {
26 plan skip_all => 'unzip not found';
27};
28
Akron03b24db2016-08-16 20:54:32 +020029# Test without parameters
30stdout_like(
31 sub {
32 system($call);
33 },
Akrona76d8352016-10-27 16:27:32 +020034 qr!extract.+?\$ korapxml2krill!s,
Akron03b24db2016-08-16 20:54:32 +020035 $call
36);
37
38my $input = catfile($f, '..', 'corpus', 'archive.zip');
39ok(-f $input, 'Input archive found');
40
41my $output = tempdir(CLEANUP => 1);
42ok(-d $output, 'Output directory exists');
43
44$call = join(
45 ' ',
46 'perl', $script,
47 'extract',
48 '--input' => $input,
49 '--output' => $output,
50);
51
Akron2812ba22016-10-28 21:55:59 +020052my $sep = qr!\.\.\.[\n\r]+?\.\.\.!;
53
Akron03b24db2016-08-16 20:54:32 +020054# Test without compression
55stdout_like(
56 sub {
57 system($call);
58 },
Akron2812ba22016-10-28 21:55:59 +020059 qr!TEST/BSP/1 $sep extracted.!s,
Akron03b24db2016-08-16 20:54:32 +020060 $call
61);
62
63ok(-d catdir($output, 'TEST', 'BSP', '1'), 'Directory created');
64ok(-d catdir($output, 'TEST', 'BSP', '1', 'base'), 'Directory created');
65ok(-d catdir($output, 'TEST', 'BSP', '1', 'sgbr'), 'Directory created');
66ok(-d catdir($output, 'TEST', 'BSP', '1', 'struct'), 'Directory created');
67ok(-f catfile($output, 'TEST', 'BSP', '1', 'data.xml'), 'File created');
68ok(-f catfile($output, 'TEST', 'BSP', '1', 'header.xml'), 'File created');
69ok(-d catdir($output, 'TEST', 'BSP', '2'), 'Directory created');
70ok(-d catdir($output, 'TEST', 'BSP', '3'), 'Directory created');
71
72# Check sigles
73my $output2 = tempdir(CLEANUP => 1);
74ok(-d $output2, 'Output directory exists');
75
76$call = join(
77 ' ',
78 'perl', $script,
79 'extract',
80 '--input' => $input,
81 '--output' => $output2,
82 '-sg' => 'TEST/BSP/4'
83);
84
85# Test with sigle
86stdout_like(
87 sub {
88 system($call);
89 },
Akron2812ba22016-10-28 21:55:59 +020090 qr!TEST/BSP/4 $sep extracted.!s,
Akron03b24db2016-08-16 20:54:32 +020091 $call
92);
93
94# Test with sigle
95stdout_unlike(
96 sub {
97 system($call);
98 },
Akron2812ba22016-10-28 21:55:59 +020099 qr!TEST/BSP/5 $sep extracted.!s,
Akron03b24db2016-08-16 20:54:32 +0200100 $call
101);
102
103ok(!-d catdir($output2, 'TEST', 'BSP', '1'), 'Directory created');
104ok(!-d catdir($output2, 'TEST', 'BSP', '2'), 'Directory created');
105ok(!-d catdir($output2, 'TEST', 'BSP', '3'), 'Directory created');
106ok(-d catdir($output2, 'TEST', 'BSP', '4'), 'Directory created');
107ok(!-d catdir($output2, 'TEST', 'BSP', '5'), 'Directory created');
108
Akron20807582016-10-26 17:11:34 +0200109
110# Test with document sigle
111my $input_rei = catdir($f, '..', 'corpus', 'archive_rei.zip');
112ok(-f $input_rei, 'Input archive found');
113
114$call = join(
115 ' ',
116 'perl', $script,
117 'extract',
118 '--input' => $input_rei,
119 '--output' => $output2,
120 '-sg' => 'REI/BNG'
121);
122
123# Test with sigle
124stdout_like(
125 sub {
126 system($call);
127 },
Akron2812ba22016-10-28 21:55:59 +0200128 qr!REI/BNG $sep extracted!s,
Akron20807582016-10-26 17:11:34 +0200129 $call
130);
131
132# Test with sigle
133stdout_unlike(
134 sub {
135 system($call);
136 },
Akron2812ba22016-10-28 21:55:59 +0200137 qr!REI/RBR $sep extracted!s,
Akron20807582016-10-26 17:11:34 +0200138 $call
139);
140
141ok(-d catdir($output2, 'REI', 'BNG', '00071'), 'Directory created');
142ok(-d catdir($output2, 'REI', 'BNG', '00128'), 'Directory created');
143ok(!-d catdir($output2, 'REI', 'RBR', '00610'), 'Directory not created');
144
Akron2fd402b2016-10-27 21:26:48 +0200145
146# Test with document sigle
147$output2 = undef;
148$output2 = tempdir(CLEANUP => 1);
149
150$call = join(
151 ' ',
152 'perl', $script,
153 'extract',
154 '--input' => $input_rei,
155 '--output' => $output2,
156 '-sg' => 'REI/BN*'
157);
158
159# Test with sigle
160stdout_like(
161 sub {
162 system($call);
163 },
Akron2812ba22016-10-28 21:55:59 +0200164 qr!REI/BN\* $sep extracted!s,
Akron2fd402b2016-10-27 21:26:48 +0200165 $call
166);
167
168# Test with sigle
169stdout_unlike(
170 sub {
171 system($call);
172 },
Akron2812ba22016-10-28 21:55:59 +0200173 qr!REI/RBR $sep extracted!s,
Akron2fd402b2016-10-27 21:26:48 +0200174 $call
175);
176
177ok(-d catdir($output2, 'REI', 'BNG', '00071'), 'Directory created');
178ok(-d catdir($output2, 'REI', 'BNG', '00128'), 'Directory created');
179ok(!-d catdir($output2, 'REI', 'RBR', '00610'), 'Directory not created');
180
181
182
183
184
185
186
Akron651cb8d2016-08-16 21:44:49 +0200187# Check multiple archives
188$output = tempdir(CLEANUP => 1);
189ok(-d $output, 'Output directory exists');
190
191$call = join(
192 ' ',
193 'perl', $script,
194 'extract',
195 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.zip'),
196 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.tree_tagger.zip'),
197 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.opennlp.zip'),
198 '--output' => $output
199);
200
201# Test with sigle
202stdout_like(
203 sub {
204 system($call);
205 },
Akron2812ba22016-10-28 21:55:59 +0200206 qr!WPD15/A00/00081 $sep extracted.!s,
Akron651cb8d2016-08-16 21:44:49 +0200207 $call
208);
209
210ok(-d catdir($output, 'WPD15', 'A00', '00081'), 'Directory created');
211ok(-f catfile($output, 'WPD15', 'A00', 'header.xml'), 'Header file created');
212ok(-d catdir($output, 'WPD15', 'A00', '00081', 'base'), 'Directory created');
213
214ok(-f catfile($output, 'WPD15', 'A00', '00081', 'tree_tagger', 'morpho.xml'), 'New archive');
215ok(-f catfile($output, 'WPD15', 'A00', '00081', 'opennlp', 'morpho.xml'), 'New archive');
216
Akron03b24db2016-08-16 20:54:32 +0200217
Akron60a8caa2017-02-17 21:51:27 +0100218# With quotes:
219# Test with document sigle
220my $input_quotes = catfile($f, '..', 'corpus', 'archive_quotes.zip');
221ok(-f $input, 'Input archive found');
222$output2 = undef;
223$output2 = tempdir(CLEANUP => 1);
224
225$call = join(
226 ' ',
227 'perl', $script,
228 'extract',
229 '--input' => $input_quotes,
230 '--output' => $output2,
231 '-sg' => '"TEST/BSP \"Example\"/1"'
232);
233
234# Test with sigle
235stdout_like(
236 sub {
237 system($call);
238 },
239 qr!TEST/BSP "Example"\/1 $sep extracted!s,
240 $call
241);
242
Akron03b24db2016-08-16 20:54:32 +0200243done_testing;
244__END__