blob: e9f666f9dde9ebfc17cf4b31df6255d72b68207d [file] [log] [blame]
#/usr/bin/env perl
use strict;
use warnings;
use File::Basename 'dirname';
use File::Spec::Functions qw/catdir catfile/;
use File::Temp qw/:POSIX tempdir/;
use Mojo::JSON qw/decode_json/;
use IO::Uncompress::Gunzip;
use Test::More;
use Test::Output;
use Data::Dumper;
use KorAP::XML::Archive;
use utf8;
my $f = dirname(__FILE__);
my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
my $call = join(
' ',
'perl', $script,
'extract'
);
unless (KorAP::XML::Archive::test_unzip) {
plan skip_all => 'unzip not found';
};
# Test without parameters
stdout_like(
sub {
system($call);
},
qr!extract.+?\$ korapxml2krill!s,
$call
);
my $input = catfile($f, '..', 'corpus', 'archive.zip');
ok(-f $input, 'Input archive found');
my $output = tempdir(CLEANUP => 1);
ok(-d $output, 'Output directory exists');
my $cache = tmpnam();
$call = join(
' ',
'perl', $script,
'extract',
'--input' => $input,
'--output' => $output,
'--cache' => $cache
);
my $sep = qr!\.\.\.[\n\r]+?\.\.\.!;
# Test without compression
stdout_like(
sub {
system($call);
},
qr!TEST/BSP/1 $sep extracted!s,
# qr!TEST/BSP/1 $sep extracted.!s,
$call
);
ok(-d catdir($output, 'TEST', 'BSP', '1'), 'Directory created');
ok(-d catdir($output, 'TEST', 'BSP', '1', 'base'), 'Directory created');
ok(-d catdir($output, 'TEST', 'BSP', '1', 'sgbr'), 'Directory created');
ok(-d catdir($output, 'TEST', 'BSP', '1', 'struct'), 'Directory created');
ok(-f catfile($output, 'TEST', 'BSP', '1', 'data.xml'), 'File created');
ok(-f catfile($output, 'TEST', 'BSP', '1', 'header.xml'), 'File created');
ok(-d catdir($output, 'TEST', 'BSP', '2'), 'Directory created');
ok(-d catdir($output, 'TEST', 'BSP', '3'), 'Directory created');
# Check sigles
my $output2 = tempdir(CLEANUP => 1);
ok(-d $output2, 'Output directory exists');
$call = join(
' ',
'perl', $script,
'extract',
'--input' => $input,
'--output' => $output2,
'--cache' => $cache,
'-sg' => 'TEST/BSP/4'
);
# Test with sigle
stdout_like(
sub {
system($call);
},
qr!TEST/BSP/4 $sep extracted.!s,
$call
);
# Test with sigle
stdout_unlike(
sub {
system($call);
},
qr!TEST/BSP/5 $sep extracted.!s,
$call
);
ok(!-d catdir($output2, 'TEST', 'BSP', '1'), 'Directory created');
ok(!-d catdir($output2, 'TEST', 'BSP', '2'), 'Directory created');
ok(!-d catdir($output2, 'TEST', 'BSP', '3'), 'Directory created');
ok(-d catdir($output2, 'TEST', 'BSP', '4'), 'Directory created');
ok(!-d catdir($output2, 'TEST', 'BSP', '5'), 'Directory created');
# Test with document sigle
my $input_rei = catdir($f, '..', 'corpus', 'archive_rei.zip');
ok(-f $input_rei, 'Input archive found');
$call = join(
' ',
'perl', $script,
'extract',
'--input' => $input_rei,
'--output' => $output2,
'--cache' => $cache,
'-sg' => 'REI/BNG'
);
# Test with sigle
stdout_like(
sub {
system($call);
},
qr!Extract .+? REI/BNG!s,
$call
);
# Test with sigle
stdout_unlike(
sub {
system($call);
},
qr!Extract .+? REI/RBR!s,
$call
);
ok(-d catdir($output2, 'REI', 'BNG', '00071'), 'Directory created');
ok(-d catdir($output2, 'REI', 'BNG', '00128'), 'Directory created');
ok(!-d catdir($output2, 'REI', 'RBR', '00610'), 'Directory not created');
# Test with document sigle
$output2 = undef;
$output2 = tempdir(CLEANUP => 1);
$call = join(
' ',
'perl', $script,
'extract',
'--input' => $input_rei,
'--output' => $output2,
'--cache' => $cache,
'-sg' => 'REI/BN*'
);
# Test with sigle
stdout_like(
sub {
system($call);
},
qr!Extract .+? REI/BN\*!s,
$call
);
# Test with sigle
stdout_unlike(
sub {
system($call);
},
qr!REI/RBR $sep extracted!s,
$call
);
ok(-d catdir($output2, 'REI', 'BNG', '00071'), 'Directory created');
ok(-d catdir($output2, 'REI', 'BNG', '00128'), 'Directory created');
ok(!-d catdir($output2, 'REI', 'RBR', '00610'), 'Directory not created');
# Check multiple archives
$output = tempdir(CLEANUP => 1);
ok(-d $output, 'Output directory exists');
$call = join(
' ',
'perl', $script,
'extract',
'-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.zip'),
'-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.tree_tagger.zip'),
'-i' => catfile($f, '..', 'corpus', 'archives', 'wpd15-single.opennlp.zip'),
'--output' => $output,
'--cache' => $cache
);
# Test with sigle
stdout_like(
sub {
system($call);
},
qr!WPD15/A00/00081 $sep extracted!s,
$call
);
ok(-d catdir($output, 'WPD15', 'A00', '00081'), 'Directory created');
ok(-f catfile($output, 'WPD15', 'A00', 'header.xml'), 'Header file created');
ok(-d catdir($output, 'WPD15', 'A00', '00081', 'base'), 'Directory created');
ok(-f catfile($output, 'WPD15', 'A00', '00081', 'tree_tagger', 'morpho.xml'), 'New archive');
ok(-f catfile($output, 'WPD15', 'A00', '00081', 'opennlp', 'morpho.xml'), 'New archive');
# With quotes:
# Test with document sigle
my $input_quotes = catfile($f, '..', 'corpus', 'archive_quotes.zip');
ok(-f $input, 'Input archive found');
$output2 = undef;
$output2 = tempdir(CLEANUP => 1);
$call = join(
' ',
'perl', $script,
'extract',
'--input' => $input_quotes,
'--output' => $output2,
'--cache' => $cache,
'-sg' => '"TEST/BSP \"Example\"/1"'
);
# Test with sigle
stdout_like(
sub {
system($call);
},
qr!TEST/BSP "Example"\/1 $sep extracted!s,
# qr!Extract .+? TEST/BSP "Example"\/1!s,
$call
);
done_testing;
__END__