blob: 69fc90860fd842ecee30a61ace2618794225ec36 [file] [log] [blame]
Akron6e886f72020-02-19 07:42:32 +01001#!/usr/bin/env perl
2use strict;
3use warnings;
4use Dumbbench;
5use File::Basename 'dirname';
6use File::Spec::Functions qw/catfile catdir rel2abs/;
7use File::Temp ':POSIX';
8use FindBin;
9use Getopt::Long;
10
11BEGIN {
12 unshift @INC, "$FindBin::Bin/../lib";
13};
14
15my $columns = 0;
16my $no_header = 0;
17GetOptions(
18 'columns|c' => \$columns,
19 'no-header|n' => \$no_header,
20 'help|h' => sub {
21 print "--columns|-c Print instances in columns\n";
22 print "--no-header|-n Dismiss benchmark names\n";
23 print "--help|-h Print this page\n\n";
24 exit(0);
25 }
26);
27
28our $SCRIPT_NAME = 'korapxml2krill';
29
30my $f = dirname(__FILE__);
31my $script = rel2abs(catfile($f, '..', 'script', $SCRIPT_NAME));
32
33
34# begin instance 1 setup
35# Load example file
36my $input = rel2abs(catdir($f, '..', 't', 'annotation', 'corpus', 'doc', '0001'));
37my $output = tmpnam();
38my $cache = tmpnam();
39# end instance 1
40
Akronb573e012020-08-04 09:40:34 +020041
42# begin instance 2 setup
43# Load example file
44use KorAP::XML::Krill;
45use KorAP::XML::Tokenizer;
46my $path = catdir(dirname(__FILE__), '..','t','real', 'corpus','GOE-TAGGED','AGA','03828');
47my ($tokens, $result);
48# end instance 2
49
50
Akron6e886f72020-02-19 07:42:32 +010051# Create a new benchmark object
52my $bench = Dumbbench->new(
53 verbosity => 0
54);
55
56# Add benchmark instances
57$bench->add_instances(
58 Dumbbench::Instance::PerlSub->new(
Akron414ec952020-08-03 15:48:43 +020059 name => 'Script-ExampleRun-1',
Akron6e886f72020-02-19 07:42:32 +010060 code => sub {
61 my $cmd = join(
62 ' ',
63 'perl', $script,
64 '--input' => $input,
65 '--output' => $output,
66 '--cache' => $cache,
67 '-k' => '0.03',
68 '-t' => 'OpenNLP#Tokens',
69 '-l' => 'ERROR',
70 '>' => '/dev/null'
71 );
72 `$cmd`;
73 }
Akronb573e012020-08-04 09:40:34 +020074 ),
75 Dumbbench::Instance::PerlSub->new(
76 name => 'Conversion-GOE-Tagged-1',
77 code => sub {
78 my $doc = KorAP::XML::Krill->new(path => $path . '/');
79 $doc->parse;
80 my $meta = $doc->meta;
81 $tokens = KorAP::XML::Tokenizer->new(
82 path => $doc->path,
83 doc => $doc,
84 foundry => 'Base',
85 layer => 'Tokens_conservative',
86 name => 'tokens'
87 );
88 $tokens->parse;
89 $tokens->add('DeReKo', 'Structure', 'base_sentences_paragraphs');
90 $tokens->add('DRuKoLa', 'Morpho');
91 $result = $tokens->to_data;
92 $tokens = undef;
93 }
94 ),
95 Dumbbench::Instance::PerlSub->new(
96 name => 'Conversion-GOE-Tagged-1',
97 code => sub {
98 my $doc = KorAP::XML::Krill->new(path => $path . '/');
99 $doc->parse;
100 my $meta = $doc->meta;
101 $tokens = KorAP::XML::Tokenizer->new(
102 path => $doc->path,
103 doc => $doc,
104 foundry => 'Base',
105 layer => 'Tokens_conservative',
106 name => 'tokens'
107 );
108 $tokens->parse;
109 $tokens->add('DeReKo', 'Structure', 'base_sentences_paragraphs');
110 $tokens->add('DRuKoLa', 'Morpho');
111 $result = $tokens->to_data;
112 $tokens = undef;
113 }
Akron6e886f72020-02-19 07:42:32 +0100114 )
115);
116
117# Run benchmarks
118$bench->run;
119
120# Output in a single row
121if ($columns) {
122 unless ($no_header) {
123 print join("\t", map { $_->name } $bench->instances), "\n";
124 };
Akron9711ed32020-02-19 08:10:29 +0100125 print join("\t", map { $_->result->raw_number } $bench->instances), "\n";
Akron6e886f72020-02-19 07:42:32 +0100126 exit(0);
127};
128
Akronb573e012020-08-04 09:40:34 +0200129print "----------------------------------\n";
130
Akron6e886f72020-02-19 07:42:32 +0100131# Output simple timings for comparation
132foreach my $inst ($bench->instances) {
133 unless ($no_header) {
134 print $inst->name, ': ';
135 };
Akron9711ed32020-02-19 08:10:29 +0100136 print $inst->result->raw_number, "\n";
Akron6e886f72020-02-19 07:42:32 +0100137};
138
139exit(0);