| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 1 | #/usr/bin/env perl | 
|  | 2 | use strict; | 
|  | 3 | use warnings; | 
| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 4 | use File::Basename 'dirname'; | 
|  | 5 | use File::Spec::Functions qw/catdir catfile/; | 
| Akron | 5fd2d8e | 2017-06-19 15:29:39 +0200 | [diff] [blame] | 6 | use File::Temp qw/:POSIX tempfile/; | 
| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 7 | use Mojo::File; | 
|  | 8 | use Test::More; | 
|  | 9 | use Test::Output qw/combined_from/; | 
|  | 10 | use Data::Dumper; | 
| Akron | 0c14f56 | 2021-03-17 12:19:23 +0100 | [diff] [blame] | 11 | use KorAP::XML::Archive; | 
| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 12 |  | 
| Akron | fab17d3 | 2020-07-31 14:38:29 +0200 | [diff] [blame] | 13 | if ($ENV{SKIP_SCRIPT}) { | 
|  | 14 | plan skip_all => 'Skip script tests'; | 
|  | 15 | }; | 
|  | 16 |  | 
| Akron | 0c14f56 | 2021-03-17 12:19:23 +0100 | [diff] [blame] | 17 | unless (KorAP::XML::Archive::test_unzip) { | 
|  | 18 | plan skip_all => 'unzip not found'; | 
|  | 19 | }; | 
|  | 20 |  | 
| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 21 | my $f = dirname(__FILE__); | 
|  | 22 |  | 
|  | 23 | my ($fh, $cfg_file) = tempfile(); | 
|  | 24 |  | 
| Akron | 63f20d4 | 2017-04-10 23:40:29 +0200 | [diff] [blame] | 25 | my $input_base = catdir($f, '..', 'corpus', 'archives'); | 
|  | 26 |  | 
|  | 27 | print $fh <<"CFG"; | 
| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 28 | overwrite       0 | 
|  | 29 | token           OpenNLP#tokens | 
|  | 30 | base-sentences  DeReKo#Structure | 
|  | 31 | base-paragraphs DeReKo#Structure | 
|  | 32 | base-pagebreaks DeReKo#Structure | 
|  | 33 | jobs            -1 | 
|  | 34 | meta            I5 | 
|  | 35 | gzip            1 | 
|  | 36 | log             DEBUG | 
| Akron | 63f20d4 | 2017-04-10 23:40:29 +0200 | [diff] [blame] | 37 | input-base      $input_base | 
| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 38 | CFG | 
|  | 39 |  | 
|  | 40 | close($fh); | 
|  | 41 |  | 
|  | 42 | # Path for script | 
|  | 43 | my $script = catfile($f, '..', '..', 'script', 'korapxml2krill'); | 
|  | 44 |  | 
|  | 45 | # Path for input | 
| Akron | 63f20d4 | 2017-04-10 23:40:29 +0200 | [diff] [blame] | 46 | my $input = "'".catfile('wpd15*.zip') . "'"; | 
| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 47 |  | 
|  | 48 | # Temporary output | 
|  | 49 | my $output = File::Temp->newdir(CLEANUP => 0); | 
|  | 50 |  | 
| Akron | 5fd2d8e | 2017-06-19 15:29:39 +0200 | [diff] [blame] | 51 | my $cache = tmpnam(); | 
|  | 52 |  | 
| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 53 | my $call = join( | 
|  | 54 | ' ', | 
|  | 55 | 'perl', $script, | 
|  | 56 | 'archive', | 
|  | 57 | '--config' => $cfg_file, | 
|  | 58 | '--input' => $input, | 
| Akron | 5fd2d8e | 2017-06-19 15:29:39 +0200 | [diff] [blame] | 59 | '--output' => $output, | 
|  | 60 | '--cache' => $cache | 
| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 61 | ); | 
|  | 62 |  | 
|  | 63 | like($call, qr!config!, 'Call string'); | 
|  | 64 |  | 
|  | 65 | my $stdout = combined_from(sub { system($call) }); | 
|  | 66 |  | 
|  | 67 | like($stdout, qr!Reading config from!, 'Config'); | 
|  | 68 |  | 
|  | 69 | # Processed using gzip | 
|  | 70 | like($stdout, qr!Processed .+?WPD15-A00-00081\.json\.gz!, 'Gzip'); | 
|  | 71 |  | 
|  | 72 | # Check log level | 
|  | 73 | like($stdout, qr!Unable to parse KorAP::XML::Annotation::Glemm::Morpho!, 'Check log level'); | 
|  | 74 |  | 
|  | 75 | # Check wildcard input | 
| Akron | 63f20d4 | 2017-04-10 23:40:29 +0200 | [diff] [blame] | 76 | like($stdout, qr!Input is .+?wpd15-single\.zip,.+?wpd15-single\.malt\.zip,.+?wpd15-single\.corenlp\.zip,.+?wpd15-single\.opennlp\.zip,.+?wpd15-single\.mdparser\.zip,.+?wpd15-single\.tree_tagger\.zip!is, 'Wildcards'); | 
| Akron | 636aa11 | 2017-04-07 18:48:56 +0200 | [diff] [blame] | 77 |  | 
|  | 78 | like($stdout, qr!Run using \d+ jobs on \d+ cores!, 'Jobs'); | 
|  | 79 |  | 
|  | 80 | done_testing; | 
|  | 81 | __END__ |