blob: 6c689f9f9d991da3a38090714927bde26bec642e [file] [log] [blame]
Akron03b24db2016-08-16 20:54:32 +02001#/usr/bin/env perl
2use strict;
3use warnings;
4use File::Basename 'dirname';
5use File::Spec::Functions qw/catdir catfile/;
6use File::Temp qw/tempdir/;
7use Mojo::Util qw/slurp/;
8use Mojo::JSON qw/decode_json/;
9use IO::Uncompress::Gunzip;
10use Test::More;
11use Test::Output;
12use Data::Dumper;
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020013use KorAP::XML::Archive;
Akron03b24db2016-08-16 20:54:32 +020014use utf8;
15
16my $f = dirname(__FILE__);
17my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
18
19my $call = join(
20 ' ',
21 'perl', $script,
22 'extract'
23);
24
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020025unless (KorAP::XML::Archive::test_unzip) {
26 plan skip_all => 'unzip not found';
27};
28
Akron03b24db2016-08-16 20:54:32 +020029# Test without parameters
30stdout_like(
31 sub {
32 system($call);
33 },
Akron7606afa2016-10-25 16:23:49 +020034 qr!extract.+?Extracts KorAP-XML files!s,
Akron03b24db2016-08-16 20:54:32 +020035 $call
36);
37
38my $input = catfile($f, '..', 'corpus', 'archive.zip');
39ok(-f $input, 'Input archive found');
40
41my $output = tempdir(CLEANUP => 1);
42ok(-d $output, 'Output directory exists');
43
44$call = join(
45 ' ',
46 'perl', $script,
47 'extract',
48 '--input' => $input,
49 '--output' => $output,
50);
51
52# Test without compression
53stdout_like(
54 sub {
55 system($call);
56 },
57 qr!TEST/BSP/1 extracted.!s,
58 $call
59);
60
61ok(-d catdir($output, 'TEST', 'BSP', '1'), 'Directory created');
62ok(-d catdir($output, 'TEST', 'BSP', '1', 'base'), 'Directory created');
63ok(-d catdir($output, 'TEST', 'BSP', '1', 'sgbr'), 'Directory created');
64ok(-d catdir($output, 'TEST', 'BSP', '1', 'struct'), 'Directory created');
65ok(-f catfile($output, 'TEST', 'BSP', '1', 'data.xml'), 'File created');
66ok(-f catfile($output, 'TEST', 'BSP', '1', 'header.xml'), 'File created');
67ok(-d catdir($output, 'TEST', 'BSP', '2'), 'Directory created');
68ok(-d catdir($output, 'TEST', 'BSP', '3'), 'Directory created');
69
70# Check sigles
71my $output2 = tempdir(CLEANUP => 1);
72ok(-d $output2, 'Output directory exists');
73
74$call = join(
75 ' ',
76 'perl', $script,
77 'extract',
78 '--input' => $input,
79 '--output' => $output2,
80 '-sg' => 'TEST/BSP/4'
81);
82
83# Test with sigle
84stdout_like(
85 sub {
86 system($call);
87 },
88 qr!TEST/BSP/4 extracted.!s,
89 $call
90);
91
92# Test with sigle
93stdout_unlike(
94 sub {
95 system($call);
96 },
97 qr!TEST/BSP/5 extracted.!s,
98 $call
99);
100
101ok(!-d catdir($output2, 'TEST', 'BSP', '1'), 'Directory created');
102ok(!-d catdir($output2, 'TEST', 'BSP', '2'), 'Directory created');
103ok(!-d catdir($output2, 'TEST', 'BSP', '3'), 'Directory created');
104ok(-d catdir($output2, 'TEST', 'BSP', '4'), 'Directory created');
105ok(!-d catdir($output2, 'TEST', 'BSP', '5'), 'Directory created');
106
Akron20807582016-10-26 17:11:34 +0200107
108# Test with document sigle
109my $input_rei = catdir($f, '..', 'corpus', 'archive_rei.zip');
110ok(-f $input_rei, 'Input archive found');
111
112$call = join(
113 ' ',
114 'perl', $script,
115 'extract',
116 '--input' => $input_rei,
117 '--output' => $output2,
118 '-sg' => 'REI/BNG'
119);
120
121# Test with sigle
122stdout_like(
123 sub {
124 system($call);
125 },
126 qr!REI/BNG extracted!s,
127 $call
128);
129
130# Test with sigle
131stdout_unlike(
132 sub {
133 system($call);
134 },
135 qr!REI/RBR extracted!s,
136 $call
137);
138
139ok(-d catdir($output2, 'REI', 'BNG', '00071'), 'Directory created');
140ok(-d catdir($output2, 'REI', 'BNG', '00128'), 'Directory created');
141ok(!-d catdir($output2, 'REI', 'RBR', '00610'), 'Directory not created');
142
Akron651cb8d2016-08-16 21:44:49 +0200143# Check multiple archives
144$output = tempdir(CLEANUP => 1);
145ok(-d $output, 'Output directory exists');
146
147$call = join(
148 ' ',
149 'perl', $script,
150 'extract',
151 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.zip'),
152 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.tree_tagger.zip'),
153 '-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.opennlp.zip'),
154 '--output' => $output
155);
156
157# Test with sigle
158stdout_like(
159 sub {
160 system($call);
161 },
162 qr!WPD15/A00/00081 extracted.!s,
163 $call
164);
165
166ok(-d catdir($output, 'WPD15', 'A00', '00081'), 'Directory created');
167ok(-f catfile($output, 'WPD15', 'A00', 'header.xml'), 'Header file created');
168ok(-d catdir($output, 'WPD15', 'A00', '00081', 'base'), 'Directory created');
169
170ok(-f catfile($output, 'WPD15', 'A00', '00081', 'tree_tagger', 'morpho.xml'), 'New archive');
171ok(-f catfile($output, 'WPD15', 'A00', '00081', 'opennlp', 'morpho.xml'), 'New archive');
172
Akron03b24db2016-08-16 20:54:32 +0200173
174done_testing;
175__END__