Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 1 | #/usr/bin/env perl |
| 2 | use strict; |
| 3 | use warnings; |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 4 | use File::Basename 'dirname'; |
| 5 | use File::Spec::Functions qw/catdir catfile/; |
Akron | 5fd2d8e | 2017-06-19 15:29:39 +0200 | [diff] [blame] | 6 | use File::Temp qw/:POSIX tempfile/; |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 7 | use Mojo::File; |
| 8 | use Test::More; |
| 9 | use Test::Output qw/combined_from/; |
| 10 | use Data::Dumper; |
Akron | 0c14f56 | 2021-03-17 12:19:23 +0100 | [diff] [blame] | 11 | use KorAP::XML::Archive; |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 12 | |
Akron | fab17d3 | 2020-07-31 14:38:29 +0200 | [diff] [blame] | 13 | if ($ENV{SKIP_SCRIPT}) { |
| 14 | plan skip_all => 'Skip script tests'; |
| 15 | }; |
| 16 | |
Akron | 0c14f56 | 2021-03-17 12:19:23 +0100 | [diff] [blame] | 17 | unless (KorAP::XML::Archive::test_unzip) { |
| 18 | plan skip_all => 'unzip not found'; |
| 19 | }; |
| 20 | |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 21 | my $f = dirname(__FILE__); |
| 22 | |
| 23 | my ($fh, $cfg_file) = tempfile(); |
| 24 | |
Akron | 63f20d4 | 2017-04-10 23:40:29 +0200 | [diff] [blame] | 25 | my $input_base = catdir($f, '..', 'corpus', 'archives'); |
| 26 | |
Akron | a472a24 | 2023-02-13 13:46:30 +0100 | [diff] [blame] | 27 | # Temporary extract |
| 28 | my $temp_out = File::Temp->newdir(CLEANUP => 0); |
| 29 | |
Akron | 63f20d4 | 2017-04-10 23:40:29 +0200 | [diff] [blame] | 30 | print $fh <<"CFG"; |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 31 | overwrite 0 |
| 32 | token OpenNLP#tokens |
| 33 | base-sentences DeReKo#Structure |
| 34 | base-paragraphs DeReKo#Structure |
| 35 | base-pagebreaks DeReKo#Structure |
| 36 | jobs -1 |
| 37 | meta I5 |
| 38 | gzip 1 |
| 39 | log DEBUG |
Akron | a472a24 | 2023-02-13 13:46:30 +0100 | [diff] [blame] | 40 | temporary-extract $temp_out |
| 41 | sequential-extraction 1 |
Akron | 63f20d4 | 2017-04-10 23:40:29 +0200 | [diff] [blame] | 42 | input-base $input_base |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 43 | CFG |
| 44 | |
| 45 | close($fh); |
| 46 | |
| 47 | # Path for script |
| 48 | my $script = catfile($f, '..', '..', 'script', 'korapxml2krill'); |
| 49 | |
| 50 | # Path for input |
Akron | 63f20d4 | 2017-04-10 23:40:29 +0200 | [diff] [blame] | 51 | my $input = "'".catfile('wpd15*.zip') . "'"; |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 52 | |
| 53 | # Temporary output |
| 54 | my $output = File::Temp->newdir(CLEANUP => 0); |
| 55 | |
Akron | 5fd2d8e | 2017-06-19 15:29:39 +0200 | [diff] [blame] | 56 | my $cache = tmpnam(); |
| 57 | |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 58 | my $call = join( |
| 59 | ' ', |
| 60 | 'perl', $script, |
| 61 | 'archive', |
| 62 | '--config' => $cfg_file, |
| 63 | '--input' => $input, |
Akron | 5fd2d8e | 2017-06-19 15:29:39 +0200 | [diff] [blame] | 64 | '--output' => $output, |
| 65 | '--cache' => $cache |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 66 | ); |
| 67 | |
| 68 | like($call, qr!config!, 'Call string'); |
| 69 | |
| 70 | my $stdout = combined_from(sub { system($call) }); |
| 71 | |
| 72 | like($stdout, qr!Reading config from!, 'Config'); |
| 73 | |
| 74 | # Processed using gzip |
| 75 | like($stdout, qr!Processed .+?WPD15-A00-00081\.json\.gz!, 'Gzip'); |
| 76 | |
Akron | a472a24 | 2023-02-13 13:46:30 +0100 | [diff] [blame] | 77 | like($stdout, qr!Extract sequentially to!); |
| 78 | |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 79 | # Check log level |
| 80 | like($stdout, qr!Unable to parse KorAP::XML::Annotation::Glemm::Morpho!, 'Check log level'); |
| 81 | |
| 82 | # Check wildcard input |
Akron | 63f20d4 | 2017-04-10 23:40:29 +0200 | [diff] [blame] | 83 | like($stdout, qr!Input is .+?wpd15-single\.zip,.+?wpd15-single\.malt\.zip,.+?wpd15-single\.corenlp\.zip,.+?wpd15-single\.opennlp\.zip,.+?wpd15-single\.mdparser\.zip,.+?wpd15-single\.tree_tagger\.zip!is, 'Wildcards'); |
Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 84 | |
| 85 | like($stdout, qr!Run using \d+ jobs on \d+ cores!, 'Jobs'); |
| 86 | |
| 87 | done_testing; |
| 88 | __END__ |