blob: adf75592fd9d4aea4fdf239c1ac9d26d19fe876e [file] [log] [blame]
Akron150b29e2016-02-14 23:06:48 +01001#!/usr/bin/env perl
2use strict;
3use warnings;
Akron31a08cb2019-02-20 20:43:26 +01004use Data::Dumper;
Akron150b29e2016-02-14 23:06:48 +01005use Test::More;
6use File::Basename 'dirname';
7use File::Spec::Functions qw/catfile catdir/;
8use File::Temp qw/tempdir/;
Akrona3518372024-01-22 23:29:00 +01009use Test::Output qw/:stdout :stderr :functions/;
Akron150b29e2016-02-14 23:06:48 +010010
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020011use KorAP::XML::Archive;
Akron150b29e2016-02-14 23:06:48 +010012
13my $file = catfile(dirname(__FILE__), 'corpus','archive.zip');
14my $archive = KorAP::XML::Archive->new($file);
15
16unless ($archive->test_unzip) {
17 plan skip_all => 'unzip not found';
18};
19
20ok($archive->test, 'Test archive');
Akron08385f62016-03-22 20:37:04 +010021like($archive->path(0), qr/archive\.zip$/, 'Archive path');
Akron150b29e2016-02-14 23:06:48 +010022
Akron20807582016-10-26 17:11:34 +020023ok($archive->check_prefix, 'Archive has dot prefix');
24
Akron150b29e2016-02-14 23:06:48 +010025my @list = $archive->list_texts;
26is(scalar @list, 10, 'Found all tests');
27is($list[0], './TEST/BSP/1', 'First document');
Akrone8adfcc2016-03-22 13:18:26 +010028is($list[-1], './TEST/BSP/10', 'First document');
Akron150b29e2016-02-14 23:06:48 +010029
Akron8b03ba52025-07-15 09:16:18 +020030# Test list_texts_iterator
31my $iter = $archive->list_texts_iterator;
32ok($iter, 'Iterator created');
33my @iter_list;
34while (defined(my $path = $iter->())) {
35 push @iter_list, $path;
36}
37is_deeply(\@iter_list, \@list, 'Iterator returns same paths as list_texts');
38
39# Test count_texts
40is($archive->count_texts, 10, 'count_texts returns correct number');
41
Akron150b29e2016-02-14 23:06:48 +010042my @path = $archive->split_path('./TEST/BSP/9');
43is($path[0],'.', 'Prefix');
44is($path[1],'TEST', 'Prefix');
45is($path[2],'BSP', 'Prefix');
46is($path[3],'9', 'Prefix');
47
48my $dir = tempdir(CLEANUP => 1);
49
50{
51 local $SIG{__WARN__} = sub {};
Akrona3518372024-01-22 23:29:00 +010052 my $stdout = stdout_from(
53 sub {
54 ok($archive->extract_sigle(0, ['TEST/BSP/8'], $dir), 'Wrong path');
55 }
56 );
57 like($stdout, qr!Extract unzip!);
Akron150b29e2016-02-14 23:06:48 +010058};
59
60ok(-d catdir($dir, 'TEST'), 'Test corpus directory exists');
61ok(-f catdir($dir, 'TEST', 'header.xml'), 'Test corpus header exists');
62ok(-d catdir($dir, 'TEST', 'BSP'), 'Test doc directory exists');
63ok(-f catdir($dir, 'TEST', 'BSP', 'header.xml'), 'Test doc header exists');
64
Akron20807582016-10-26 17:11:34 +020065$file = catfile(dirname(__FILE__), 'corpus','archive_rei.zip');
66$archive = KorAP::XML::Archive->new($file);
67ok(!$archive->check_prefix, 'Archive has no prefix');
68
Akron60a8caa2017-02-17 21:51:27 +010069# No leading '.'
70$file = catfile(dirname(__FILE__), 'corpus','archive_rei.zip');
71$archive = KorAP::XML::Archive->new($file);
72ok(!$archive->check_prefix, 'Archive has no dot prefix');
Akron08385f62016-03-22 20:37:04 +010073
Akron31a08cb2019-02-20 20:43:26 +010074my @cmd = map { join ' ', @{$_} } $archive->cmds_from_sigle(['REI/RB*', 'REI/BNG/00071']);
75
76like($cmd[0], qr!unzip -qo -uo t/corpus/archive_rei\.zip!);
77like($cmd[0], qr!\QREI/header.xml REI/RB*/header.xml REI/RB* REI/BNG/header.xml REI/BNG/00071/*\E!);
78ok(!$cmd[1]);
79
80# New temporary directory
81$dir = tempdir(CLEANUP => 1);
82
83{
84 local $SIG{__WARN__} = sub {};
Akrona3518372024-01-22 23:29:00 +010085 my $stdout = stdout_from(
86 sub {
87 ok($archive->extract_sigle(1, ['REI/RB*', 'REI/BNG/00071'], $dir), 'Fine');
88 }
89 );
90 is($stdout, '');
Akron31a08cb2019-02-20 20:43:26 +010091};
92
Akrona3518372024-01-22 23:29:00 +010093
Akron31a08cb2019-02-20 20:43:26 +010094ok(-d catdir($dir, 'REI'), 'Test corpus directory exists');
95ok(-d catdir($dir, 'REI','BNG'), 'Test corpus directory exists');
96ok(-d catdir($dir, 'REI','BNG','00071'), 'Test corpus directory exists');
97
98ok(-f catdir($dir, 'REI', 'header.xml'), 'Test corpus directory exists');
99ok(-f catdir($dir, 'REI','BNG', 'header.xml'), 'Test corpus directory exists');
100ok(-f catdir($dir, 'REI','BNG','00071', 'header.xml'), 'Test corpus directory exists');
101
102ok(-f catdir($dir, 'REI','RBR', 'header.xml'), 'Test corpus directory exists');
103ok(-f catdir($dir, 'REI','RBR','00610', 'header.xml'), 'Test corpus directory exists');
104ok(-f catdir($dir, 'REI','RBR','00610', 'header.xml'), 'Test corpus directory exists');
105
106ok(!-e catdir($dir, 'REI','BNG','00128'), 'Test corpus directory does not exist');
107
108
Akron150b29e2016-02-14 23:06:48 +0100109done_testing;
110
111__END__