blob: 5ec57cbf45bb4652617504fe43074bc273a6dad6 [file] [log] [blame]
Akron636aa112017-04-07 18:48:56 +02001#/usr/bin/env perl
2use strict;
3use warnings;
Akron636aa112017-04-07 18:48:56 +02004use File::Basename 'dirname';
5use File::Spec::Functions qw/catdir catfile/;
Akron5fd2d8e2017-06-19 15:29:39 +02006use File::Temp qw/:POSIX tempfile/;
Akron636aa112017-04-07 18:48:56 +02007use Mojo::File;
8use Test::More;
9use Test::Output qw/combined_from/;
10use Data::Dumper;
Akron0c14f562021-03-17 12:19:23 +010011use KorAP::XML::Archive;
Akron636aa112017-04-07 18:48:56 +020012
Akronfab17d32020-07-31 14:38:29 +020013if ($ENV{SKIP_SCRIPT}) {
14 plan skip_all => 'Skip script tests';
15};
16
Akron0c14f562021-03-17 12:19:23 +010017unless (KorAP::XML::Archive::test_unzip) {
18 plan skip_all => 'unzip not found';
19};
20
Akron636aa112017-04-07 18:48:56 +020021my $f = dirname(__FILE__);
22
23my ($fh, $cfg_file) = tempfile();
24
Akron63f20d42017-04-10 23:40:29 +020025my $input_base = catdir($f, '..', 'corpus', 'archives');
26
27print $fh <<"CFG";
Akron636aa112017-04-07 18:48:56 +020028overwrite 0
29token OpenNLP#tokens
30base-sentences DeReKo#Structure
31base-paragraphs DeReKo#Structure
32base-pagebreaks DeReKo#Structure
33jobs -1
34meta I5
35gzip 1
36log DEBUG
Akron63f20d42017-04-10 23:40:29 +020037input-base $input_base
Akron636aa112017-04-07 18:48:56 +020038CFG
39
40close($fh);
41
42# Path for script
43my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
44
45# Path for input
Akron63f20d42017-04-10 23:40:29 +020046my $input = "'".catfile('wpd15*.zip') . "'";
Akron636aa112017-04-07 18:48:56 +020047
48# Temporary output
49my $output = File::Temp->newdir(CLEANUP => 0);
50
Akron5fd2d8e2017-06-19 15:29:39 +020051my $cache = tmpnam();
52
Akron636aa112017-04-07 18:48:56 +020053my $call = join(
54 ' ',
55 'perl', $script,
56 'archive',
57 '--config' => $cfg_file,
58 '--input' => $input,
Akron5fd2d8e2017-06-19 15:29:39 +020059 '--output' => $output,
60 '--cache' => $cache
Akron636aa112017-04-07 18:48:56 +020061);
62
63like($call, qr!config!, 'Call string');
64
65my $stdout = combined_from(sub { system($call) });
66
67like($stdout, qr!Reading config from!, 'Config');
68
69# Processed using gzip
70like($stdout, qr!Processed .+?WPD15-A00-00081\.json\.gz!, 'Gzip');
71
72# Check log level
73like($stdout, qr!Unable to parse KorAP::XML::Annotation::Glemm::Morpho!, 'Check log level');
74
75# Check wildcard input
Akron63f20d42017-04-10 23:40:29 +020076like($stdout, qr!Input is .+?wpd15-single\.zip,.+?wpd15-single\.malt\.zip,.+?wpd15-single\.corenlp\.zip,.+?wpd15-single\.opennlp\.zip,.+?wpd15-single\.mdparser\.zip,.+?wpd15-single\.tree_tagger\.zip!is, 'Wildcards');
Akron636aa112017-04-07 18:48:56 +020077
78like($stdout, qr!Run using \d+ jobs on \d+ cores!, 'Jobs');
79
80done_testing;
81__END__