blob: 62e9e1560b4a92b37850b803c6aa06876ba050be [file] [log] [blame]
Akron414ec952020-08-03 15:48:43 +02001#/usr/bin/env perl
2use strict;
3use warnings;
4use File::Basename 'dirname';
5use File::Spec::Functions qw/catdir catfile/;
6use File::Temp qw/ :POSIX /;
7use Mojo::File;
8use Mojo::JSON qw/decode_json/;
9use IO::Uncompress::Gunzip;
10use Test::More;
11use Test::Output;
12use Data::Dumper;
13use utf8;
14
15if ($ENV{SKIP_SCRIPT} || $ENV{SKIP_REAL}) {
16 plan skip_all => 'Skip script/real tests';
17};
18
19my $f = dirname(__FILE__);
20my $script = catfile($f, '..', '..', '..', 'script', 'korapxml2krill');
21
22my $input = catdir($f, '..', 'corpus', 'GOE2', 'AGA', '03828');
23ok(-d $input, 'Input directory found');
24
25my $output = tmpnam();
26my $cache = tmpnam();
27
28ok(!-f $output, 'Output does not exist');
29
30my $call = join(
31 ' ',
32 'perl', $script,
33 '--input' => $input,
34 '--output' => $output,
35 '--cache' => $cache,
36 '-t' => 'Base#tokens_aggr.xml',
37 '-bs' => 'DeReKo#Structure',
38 '-bp' => 'DeReKo#Structure',
39 '-l' => 'INFO'
40);
41
42# Test without compression
43stderr_like(
44 sub {
45 system($call);
46 },
47 qr!The code took!,
48 $call
49);
50
51ok(-f $output, 'Output does exist');
52ok((my $file = Mojo::File->new($output)->slurp), 'Slurp data');
53ok((my $json = decode_json $file), 'decode json');
54is($json->{textType}, 'Autobiographie', 'text type');
55is($json->{title}, 'Autobiographische Einzelheiten', 'Title');
56is($json->{data}->{tokenSource}, 'base#tokens_aggr', 'Title');
57is($json->{data}->{foundries}, 'dereko dereko/structure dereko/structure/base-sentences-paragraphs marmot marmot/morpho', 'Foundries');
58my $stream = $json->{data}->{stream};
59my $token = $stream->[0];
60is($token->[0], '-:base/paragraphs$<i>14', 'Paragraphs');
61is($token->[1], '-:base/sentences$<i>215', 'Sentences');
62
63is($token->[5], '<>:base/s:s$<b>64<i>0<i>30<i>2<b>2', 'struct');
64is($token->[7], '<>:dereko/s:s$<b>64<i>0<i>30<i>2<b>4', 'struct');
65is($token->[8], '<>:base/s:t$<b>64<i>0<i>35242<i>5239<b>0', 'struct');
66
67$token = $stream->[4];
68is($token->[0], '<>:base/s:s$<b>64<i>53<i>254<i>32<b>2', 'struct');
69is($token->[1], '<>:dereko/s:s$<b>64<i>53<i>254<i>32<b>5<s>1', 'struct');
70is($token->[2], '<>:base/s:p$<b>64<i>53<i>3299<i>504<b>1', 'struct');
71is($token->[3], '<>:dereko/s:p$<b>64<i>53<i>3299<i>504<b>4', 'struct');
72
73done_testing;
74
75__END__