blob: 545846818353e19f204840dce88d84b69b592512 [file] [log] [blame]
Akron414ec952020-08-03 15:48:43 +02001#/usr/bin/env perl
2use strict;
3use warnings;
4use File::Basename 'dirname';
5use File::Spec::Functions qw/catdir catfile/;
6use File::Temp qw/ :POSIX /;
7use Mojo::File;
8use Mojo::JSON qw/decode_json/;
9use IO::Uncompress::Gunzip;
10use Test::More;
11use Test::Output;
12use Data::Dumper;
13use utf8;
14
15if ($ENV{SKIP_SCRIPT} || $ENV{SKIP_REAL}) {
16 plan skip_all => 'Skip script/real tests';
17};
18
19my $f = dirname(__FILE__);
20my $script = catfile($f, '..', '..', '..', 'script', 'korapxml2krill');
21
22my $input = catdir($f, '..', 'corpus', 'WPD', '00001');
23ok(-d $input, 'Input directory found');
24
25my $output = tmpnam();
26my $cache = tmpnam();
27
28ok(!-f $output, 'Output does not exist');
29
30my $call = join(
31 ' ',
32 'perl', $script,
33 '--input' => $input,
34 '--output' => $output,
35 '--cache' => $cache,
36 '-t' => 'OpenNLP#tokens',
37 '-l' => 'INFO'
38);
39
40# Test without compression
41stderr_like(
42 sub {
43 system($call);
44 },
45 qr!The code took!,
46 $call
47);
48
49ok(-f $output, 'Output does exist');
50ok((my $file = Mojo::File->new($output)->slurp), 'Slurp data');
51ok((my $json = decode_json $file), 'decode json');
52is($json->{textSigle}, 'WPD/AAA/00001', 'text sigle');
53is($json->{title}, 'A', 'Title');
54is($json->{data}->{tokenSource}, 'opennlp#tokens', 'Title');
55is($json->{data}->{foundries}, 'base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/sentences dereko dereko/structure mate mate/dependency mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/morpho xip/sentences', 'Foundries');
56my $stream = $json->{data}->{stream};
57my $token = $stream->[12];
58is($token->[16], 's:Vokal', 'Token');
59$token = $stream->[13];
60is($token->[23], 's:Der', 'Token');
61
62
63$call = join(
64 ' ',
65 'perl', $script,
66 '--input' => $input,
67 '--output' => $output,
68 '--cache' => $cache,
69 '-t' => 'OpenNLP#tokens',
70 '-l' => 'INFO',
71 '-w' => '',
72 '-nwt' => ''
73);
74
75# Test without compression
76stderr_like(
77 sub {
78 system($call);
79 },
80 qr!The code took!,
81 $call
82);
83
84ok(-f $output, 'Output does exist');
85ok(($file = Mojo::File->new($output)->slurp), 'Slurp data');
86ok(($json = decode_json $file), 'decode json');
87$stream = $json->{data}->{stream};
88$token = $stream->[12];
89is($token->[17], 's:Vokal', 'Token');
90$token = $stream->[13];
91is($token->[7], 's:.', 'Token');
92is($token->[11], 'xip/p:PUNCT', 'Token');
93$token = $stream->[14];
94is($token->[23], 's:Der', 'Token');
95
96
97done_testing;
98
99__END__