Akron | 150b29e | 2016-02-14 23:06:48 +0100 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | use strict; |
| 3 | use warnings; |
Akron | 31a08cb | 2019-02-20 20:43:26 +0100 | [diff] [blame] | 4 | use Data::Dumper; |
Akron | 150b29e | 2016-02-14 23:06:48 +0100 | [diff] [blame] | 5 | use Test::More; |
| 6 | use File::Basename 'dirname'; |
| 7 | use File::Spec::Functions qw/catfile catdir/; |
| 8 | use File::Temp qw/tempdir/; |
Akron | a351837 | 2024-01-22 23:29:00 +0100 | [diff] [blame] | 9 | use Test::Output qw/:stdout :stderr :functions/; |
Akron | 150b29e | 2016-02-14 23:06:48 +0100 | [diff] [blame] | 10 | |
Nils Diewald | b3e9ccd | 2016-10-24 15:16:52 +0200 | [diff] [blame] | 11 | use KorAP::XML::Archive; |
Akron | 150b29e | 2016-02-14 23:06:48 +0100 | [diff] [blame] | 12 | |
| 13 | my $file = catfile(dirname(__FILE__), 'corpus','archive.zip'); |
| 14 | my $archive = KorAP::XML::Archive->new($file); |
| 15 | |
| 16 | unless ($archive->test_unzip) { |
| 17 | plan skip_all => 'unzip not found'; |
| 18 | }; |
| 19 | |
| 20 | ok($archive->test, 'Test archive'); |
Akron | 08385f6 | 2016-03-22 20:37:04 +0100 | [diff] [blame] | 21 | like($archive->path(0), qr/archive\.zip$/, 'Archive path'); |
Akron | 150b29e | 2016-02-14 23:06:48 +0100 | [diff] [blame] | 22 | |
Akron | 2080758 | 2016-10-26 17:11:34 +0200 | [diff] [blame] | 23 | ok($archive->check_prefix, 'Archive has dot prefix'); |
| 24 | |
Akron | 150b29e | 2016-02-14 23:06:48 +0100 | [diff] [blame] | 25 | my @list = $archive->list_texts; |
| 26 | is(scalar @list, 10, 'Found all tests'); |
| 27 | is($list[0], './TEST/BSP/1', 'First document'); |
Akron | e8adfcc | 2016-03-22 13:18:26 +0100 | [diff] [blame] | 28 | is($list[-1], './TEST/BSP/10', 'First document'); |
Akron | 150b29e | 2016-02-14 23:06:48 +0100 | [diff] [blame] | 29 | |
Akron | 8b03ba5 | 2025-07-15 09:16:18 +0200 | [diff] [blame] | 30 | # Test list_texts_iterator |
| 31 | my $iter = $archive->list_texts_iterator; |
| 32 | ok($iter, 'Iterator created'); |
| 33 | my @iter_list; |
| 34 | while (defined(my $path = $iter->())) { |
| 35 | push @iter_list, $path; |
| 36 | } |
| 37 | is_deeply(\@iter_list, \@list, 'Iterator returns same paths as list_texts'); |
| 38 | |
| 39 | # Test count_texts |
| 40 | is($archive->count_texts, 10, 'count_texts returns correct number'); |
| 41 | |
Akron | 150b29e | 2016-02-14 23:06:48 +0100 | [diff] [blame] | 42 | my @path = $archive->split_path('./TEST/BSP/9'); |
| 43 | is($path[0],'.', 'Prefix'); |
| 44 | is($path[1],'TEST', 'Prefix'); |
| 45 | is($path[2],'BSP', 'Prefix'); |
| 46 | is($path[3],'9', 'Prefix'); |
| 47 | |
| 48 | my $dir = tempdir(CLEANUP => 1); |
| 49 | |
| 50 | { |
| 51 | local $SIG{__WARN__} = sub {}; |
Akron | a351837 | 2024-01-22 23:29:00 +0100 | [diff] [blame] | 52 | my $stdout = stdout_from( |
| 53 | sub { |
| 54 | ok($archive->extract_sigle(0, ['TEST/BSP/8'], $dir), 'Wrong path'); |
| 55 | } |
| 56 | ); |
Marc Kupietz | cc95e1c | 2025-07-19 15:44:28 +0200 | [diff] [blame^] | 57 | like($stdout, qr!Extract (ripunzip|unzip)!); |
Akron | 150b29e | 2016-02-14 23:06:48 +0100 | [diff] [blame] | 58 | }; |
| 59 | |
| 60 | ok(-d catdir($dir, 'TEST'), 'Test corpus directory exists'); |
| 61 | ok(-f catdir($dir, 'TEST', 'header.xml'), 'Test corpus header exists'); |
| 62 | ok(-d catdir($dir, 'TEST', 'BSP'), 'Test doc directory exists'); |
| 63 | ok(-f catdir($dir, 'TEST', 'BSP', 'header.xml'), 'Test doc header exists'); |
| 64 | |
Akron | 2080758 | 2016-10-26 17:11:34 +0200 | [diff] [blame] | 65 | $file = catfile(dirname(__FILE__), 'corpus','archive_rei.zip'); |
| 66 | $archive = KorAP::XML::Archive->new($file); |
| 67 | ok(!$archive->check_prefix, 'Archive has no prefix'); |
| 68 | |
Akron | 60a8caa | 2017-02-17 21:51:27 +0100 | [diff] [blame] | 69 | # No leading '.' |
| 70 | $file = catfile(dirname(__FILE__), 'corpus','archive_rei.zip'); |
| 71 | $archive = KorAP::XML::Archive->new($file); |
| 72 | ok(!$archive->check_prefix, 'Archive has no dot prefix'); |
Akron | 08385f6 | 2016-03-22 20:37:04 +0100 | [diff] [blame] | 73 | |
Akron | 31a08cb | 2019-02-20 20:43:26 +0100 | [diff] [blame] | 74 | my @cmd = map { join ' ', @{$_} } $archive->cmds_from_sigle(['REI/RB*', 'REI/BNG/00071']); |
| 75 | |
Marc Kupietz | cc95e1c | 2025-07-19 15:44:28 +0200 | [diff] [blame^] | 76 | like($cmd[0], qr!(ripunzip unzip-file -q|unzip -qo -uo) t/corpus/archive_rei\.zip!); |
Akron | 31a08cb | 2019-02-20 20:43:26 +0100 | [diff] [blame] | 77 | like($cmd[0], qr!\QREI/header.xml REI/RB*/header.xml REI/RB* REI/BNG/header.xml REI/BNG/00071/*\E!); |
| 78 | ok(!$cmd[1]); |
| 79 | |
| 80 | # New temporary directory |
| 81 | $dir = tempdir(CLEANUP => 1); |
| 82 | |
| 83 | { |
| 84 | local $SIG{__WARN__} = sub {}; |
Akron | a351837 | 2024-01-22 23:29:00 +0100 | [diff] [blame] | 85 | my $stdout = stdout_from( |
| 86 | sub { |
| 87 | ok($archive->extract_sigle(1, ['REI/RB*', 'REI/BNG/00071'], $dir), 'Fine'); |
| 88 | } |
| 89 | ); |
| 90 | is($stdout, ''); |
Akron | 31a08cb | 2019-02-20 20:43:26 +0100 | [diff] [blame] | 91 | }; |
| 92 | |
Akron | a351837 | 2024-01-22 23:29:00 +0100 | [diff] [blame] | 93 | |
Akron | 31a08cb | 2019-02-20 20:43:26 +0100 | [diff] [blame] | 94 | ok(-d catdir($dir, 'REI'), 'Test corpus directory exists'); |
| 95 | ok(-d catdir($dir, 'REI','BNG'), 'Test corpus directory exists'); |
| 96 | ok(-d catdir($dir, 'REI','BNG','00071'), 'Test corpus directory exists'); |
| 97 | |
| 98 | ok(-f catdir($dir, 'REI', 'header.xml'), 'Test corpus directory exists'); |
| 99 | ok(-f catdir($dir, 'REI','BNG', 'header.xml'), 'Test corpus directory exists'); |
| 100 | ok(-f catdir($dir, 'REI','BNG','00071', 'header.xml'), 'Test corpus directory exists'); |
| 101 | |
| 102 | ok(-f catdir($dir, 'REI','RBR', 'header.xml'), 'Test corpus directory exists'); |
| 103 | ok(-f catdir($dir, 'REI','RBR','00610', 'header.xml'), 'Test corpus directory exists'); |
| 104 | ok(-f catdir($dir, 'REI','RBR','00610', 'header.xml'), 'Test corpus directory exists'); |
| 105 | |
| 106 | ok(!-e catdir($dir, 'REI','BNG','00128'), 'Test corpus directory does not exist'); |
| 107 | |
| 108 | |
Akron | 150b29e | 2016-02-14 23:06:48 +0100 | [diff] [blame] | 109 | done_testing; |
| 110 | |
| 111 | __END__ |