blob: 308414a3ed20aa19f5c8570cae5ef0f5cb7f41c4 [file] [log] [blame]
Akron7d4cdd82016-08-17 21:39:45 +02001#/usr/bin/env perl
2use strict;
3use warnings;
4use File::Basename 'dirname';
5use File::Spec::Functions qw/catdir catfile/;
6use File::Temp qw/tempdir/;
7use Mojo::Util qw/slurp/;
8use Mojo::JSON qw/decode_json/;
9use IO::Uncompress::Gunzip;
10use Test::More;
11use Test::Output qw/:stdout :stderr :functions/;
12use Data::Dumper;
13use utf8;
14
15my $f = dirname(__FILE__);
16my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
17
18my $call = join(
19 ' ',
20 'perl', $script,
21 'archive'
22);
23
24# Test without parameters
25stdout_like(
26 sub {
27 system($call);
28 },
29 qr!archive.+?Process an!s,
30 $call
31);
32
33my $input = catfile($f, '..', 'corpus', 'archive.zip');
34ok(-f $input, 'Input archive found');
35
Akron3ec48972016-08-17 23:24:52 +020036my $output = File::Temp->newdir(CLEANUP => 0);
37$output->unlink_on_destroy(0);
38
Akron7d4cdd82016-08-17 21:39:45 +020039ok(-d $output, 'Output directory exists');
40
41$call = join(
42 ' ',
43 'perl', $script,
44 'archive',
45 '--input' => $input,
46 '--output' => $output,
47 '-t' => 'Base#tokens_aggr',
48 '-m' => 'Sgbr'
49);
50
51# Test without compression
52my $json;
53{
54 local $SIG{__WARN__} = sub {};
55 my $out = stdout_from(sub { system($call); });
56
57 like($out, qr!TEST-BSP-1\.json!s, $call);
58
59 $out =~ m!Processed (.+?\.json)!;
60 $json = $1;
61};
62
63ok(-f $json, 'Json file exists');
64ok((my $file = slurp $json), 'Slurp data');
65ok(($json = decode_json $file), 'decode json');
66
67is($json->{data}->{tokenSource}, 'base#tokens_aggr', 'Title');
68is($json->{data}->{foundries}, 'base base/paragraphs base/sentences dereko dereko/structure sgbr sgbr/lemma sgbr/morpho', 'Foundries');
69is($json->{sgbrKodex}, 'M', 'Kodex meta data');
70
Akron3ec48972016-08-17 23:24:52 +020071
72# Use directory
73$input = catdir($f, '..', 'annotation', 'corpus');
74
75$call = join(
76 ' ',
77 'perl', $script,
78 'archive',
79 '--input' => $input,
80 '--output' => $output,
81 '-t' => 'Tree_Tagger#Tokens',
82 '-j' => 4 # 4 jobs!
83);
84
85my ($json_1, $json_2);
86
87{
88 local $SIG{__WARN__} = sub {};
89
90 # That's not really stable on slow machines!
91 my $out = stdout_from(sub { system($call); });
92
93 ok($out =~ m!\[\$(\d+?):1\/2\]!s, $call . ' pid 1');
94 my $pid1 = $1;
95 ok($out =~ m!\[\$(\d+?):2\/2\]!s, $call . ' pid 2');
96 my $pid2 = $1;
97
98 isnt($pid1, $pid2, 'No PID match');
99
100 ok($out =~ m!Processed .+?\/corpus-doc-0001\.json!s, $call);
101 ok($out =~ m!Processed .+?\/corpus-doc-0002\.json!s, $call);
102
103 ok(-d $output, 'Temporary directory still exists');
104 my $json_1 = catfile($output, 'corpus-doc-0001.json');
105 ok(-f $json_1, 'Json file exists 1');
106 my $json_2 = catfile($output, 'corpus-doc-0002.json');
107 ok(-f $json_2, 'Json file exists 2');
108
109 ok(($file = slurp $json_1), 'Slurp data');
110 ok(($json_1 = decode_json $file), 'decode json');
111
112 is($json_1->{data}->{tokenSource}, 'tree_tagger#tokens', 'TokenSource');
113 is($json_1->{data}->{foundries}, 'base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/constituency corenlp/morpho corenlp/sentences dereko dereko/structure glemm glemm/morpho mate mate/dependency mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/morpho xip/sentences', 'Foundries');
114 is($json_1->{textSigle}, 'Corpus/Doc/0001', 'Sigle');
115
116 ok(-f $json_2, 'Json file exists');
117 ok(($file = slurp $json_2), 'Slurp data');
118 ok(($json_2 = decode_json $file), 'decode json');
119
120 is($json_2->{data}->{tokenSource}, 'tree_tagger#tokens', 'TokenSource');
121 is($json_2->{data}->{foundries}, 'base base/paragraphs base/sentences dereko dereko/structure malt malt/dependency treetagger treetagger/morpho treetagger/sentences', 'Foundries');
122 is($json_2->{textSigle}, 'Corpus/Doc/0002', 'Sigle');
123};
124
125ok(-d $output, 'Ouput directory exists');
126unlink($output);
127
Akron7d4cdd82016-08-17 21:39:45 +0200128done_testing;
129__END__