blob: bd6e9fa783dfcfac6160c1355bb39dfc2d07d5a1 [file] [log] [blame]
Akron636aa112017-04-07 18:48:56 +02001#/usr/bin/env perl
2use strict;
3use warnings;
Akron636aa112017-04-07 18:48:56 +02004use File::Basename 'dirname';
5use File::Spec::Functions qw/catdir catfile/;
Akron5fd2d8e2017-06-19 15:29:39 +02006use File::Temp qw/:POSIX tempfile/;
Akron636aa112017-04-07 18:48:56 +02007use Mojo::File;
8use Test::More;
9use Test::Output qw/combined_from/;
10use Data::Dumper;
Akron0c14f562021-03-17 12:19:23 +010011use KorAP::XML::Archive;
Akron636aa112017-04-07 18:48:56 +020012
Akronfab17d32020-07-31 14:38:29 +020013if ($ENV{SKIP_SCRIPT}) {
14 plan skip_all => 'Skip script tests';
15};
16
Akron0c14f562021-03-17 12:19:23 +010017unless (KorAP::XML::Archive::test_unzip) {
18 plan skip_all => 'unzip not found';
19};
20
Akron636aa112017-04-07 18:48:56 +020021my $f = dirname(__FILE__);
22
23my ($fh, $cfg_file) = tempfile();
24
Akron63f20d42017-04-10 23:40:29 +020025my $input_base = catdir($f, '..', 'corpus', 'archives');
26
Akrona472a242023-02-13 13:46:30 +010027# Temporary extract
28my $temp_out = File::Temp->newdir(CLEANUP => 0);
29
Akron63f20d42017-04-10 23:40:29 +020030print $fh <<"CFG";
Akron636aa112017-04-07 18:48:56 +020031overwrite 0
32token OpenNLP#tokens
33base-sentences DeReKo#Structure
34base-paragraphs DeReKo#Structure
35base-pagebreaks DeReKo#Structure
36jobs -1
37meta I5
38gzip 1
39log DEBUG
Akrona472a242023-02-13 13:46:30 +010040temporary-extract $temp_out
41sequential-extraction 1
Akron63f20d42017-04-10 23:40:29 +020042input-base $input_base
Akron636aa112017-04-07 18:48:56 +020043CFG
44
45close($fh);
46
47# Path for script
48my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
49
50# Path for input
Akron63f20d42017-04-10 23:40:29 +020051my $input = "'".catfile('wpd15*.zip') . "'";
Akron636aa112017-04-07 18:48:56 +020052
53# Temporary output
54my $output = File::Temp->newdir(CLEANUP => 0);
55
Akron5fd2d8e2017-06-19 15:29:39 +020056my $cache = tmpnam();
57
Akron636aa112017-04-07 18:48:56 +020058my $call = join(
59 ' ',
60 'perl', $script,
61 'archive',
62 '--config' => $cfg_file,
63 '--input' => $input,
Akron5fd2d8e2017-06-19 15:29:39 +020064 '--output' => $output,
65 '--cache' => $cache
Akron636aa112017-04-07 18:48:56 +020066);
67
68like($call, qr!config!, 'Call string');
69
70my $stdout = combined_from(sub { system($call) });
71
72like($stdout, qr!Reading config from!, 'Config');
73
74# Processed using gzip
75like($stdout, qr!Processed .+?WPD15-A00-00081\.json\.gz!, 'Gzip');
76
Akrona472a242023-02-13 13:46:30 +010077like($stdout, qr!Extract sequentially to!);
78
Akron636aa112017-04-07 18:48:56 +020079# Check log level
80like($stdout, qr!Unable to parse KorAP::XML::Annotation::Glemm::Morpho!, 'Check log level');
81
82# Check wildcard input
Akron63f20d42017-04-10 23:40:29 +020083like($stdout, qr!Input is .+?wpd15-single\.zip,.+?wpd15-single\.malt\.zip,.+?wpd15-single\.corenlp\.zip,.+?wpd15-single\.opennlp\.zip,.+?wpd15-single\.mdparser\.zip,.+?wpd15-single\.tree_tagger\.zip!is, 'Wildcards');
Akron636aa112017-04-07 18:48:56 +020084
85like($stdout, qr!Run using \d+ jobs on \d+ cores!, 'Jobs');
86
87done_testing;
88__END__