Akron | 6e886f7 | 2020-02-19 07:42:32 +0100 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | use strict; |
| 3 | use warnings; |
| 4 | use Dumbbench; |
| 5 | use File::Basename 'dirname'; |
| 6 | use File::Spec::Functions qw/catfile catdir rel2abs/; |
| 7 | use File::Temp ':POSIX'; |
| 8 | use FindBin; |
| 9 | use Getopt::Long; |
| 10 | |
| 11 | BEGIN { |
| 12 | unshift @INC, "$FindBin::Bin/../lib"; |
| 13 | }; |
| 14 | |
| 15 | my $columns = 0; |
| 16 | my $no_header = 0; |
| 17 | GetOptions( |
| 18 | 'columns|c' => \$columns, |
| 19 | 'no-header|n' => \$no_header, |
| 20 | 'help|h' => sub { |
| 21 | print "--columns|-c Print instances in columns\n"; |
| 22 | print "--no-header|-n Dismiss benchmark names\n"; |
| 23 | print "--help|-h Print this page\n\n"; |
| 24 | exit(0); |
| 25 | } |
| 26 | ); |
| 27 | |
| 28 | our $SCRIPT_NAME = 'korapxml2krill'; |
| 29 | |
| 30 | my $f = dirname(__FILE__); |
| 31 | my $script = rel2abs(catfile($f, '..', 'script', $SCRIPT_NAME)); |
| 32 | |
| 33 | |
| 34 | # begin instance 1 setup |
| 35 | # Load example file |
| 36 | my $input = rel2abs(catdir($f, '..', 't', 'annotation', 'corpus', 'doc', '0001')); |
| 37 | my $output = tmpnam(); |
| 38 | my $cache = tmpnam(); |
| 39 | # end instance 1 |
| 40 | |
Akron | b573e01 | 2020-08-04 09:40:34 +0200 | [diff] [blame] | 41 | |
| 42 | # begin instance 2 setup |
| 43 | # Load example file |
| 44 | use KorAP::XML::Krill; |
| 45 | use KorAP::XML::Tokenizer; |
| 46 | my $path = catdir(dirname(__FILE__), '..','t','real', 'corpus','GOE-TAGGED','AGA','03828'); |
| 47 | my ($tokens, $result); |
| 48 | # end instance 2 |
| 49 | |
| 50 | |
Akron | 6e886f7 | 2020-02-19 07:42:32 +0100 | [diff] [blame] | 51 | # Create a new benchmark object |
| 52 | my $bench = Dumbbench->new( |
| 53 | verbosity => 0 |
| 54 | ); |
| 55 | |
| 56 | # Add benchmark instances |
| 57 | $bench->add_instances( |
| 58 | Dumbbench::Instance::PerlSub->new( |
Akron | 414ec95 | 2020-08-03 15:48:43 +0200 | [diff] [blame] | 59 | name => 'Script-ExampleRun-1', |
Akron | 6e886f7 | 2020-02-19 07:42:32 +0100 | [diff] [blame] | 60 | code => sub { |
| 61 | my $cmd = join( |
| 62 | ' ', |
| 63 | 'perl', $script, |
| 64 | '--input' => $input, |
| 65 | '--output' => $output, |
| 66 | '--cache' => $cache, |
| 67 | '-k' => '0.03', |
| 68 | '-t' => 'OpenNLP#Tokens', |
| 69 | '-l' => 'ERROR', |
| 70 | '>' => '/dev/null' |
| 71 | ); |
| 72 | `$cmd`; |
| 73 | } |
Akron | b573e01 | 2020-08-04 09:40:34 +0200 | [diff] [blame] | 74 | ), |
| 75 | Dumbbench::Instance::PerlSub->new( |
Akron | 72e671f | 2020-08-04 11:35:40 +0200 | [diff] [blame^] | 76 | name => 'Script-Conversion-GOE-Tagged-1', |
Akron | b573e01 | 2020-08-04 09:40:34 +0200 | [diff] [blame] | 77 | code => sub { |
| 78 | my $doc = KorAP::XML::Krill->new(path => $path . '/'); |
| 79 | $doc->parse; |
| 80 | my $meta = $doc->meta; |
| 81 | $tokens = KorAP::XML::Tokenizer->new( |
| 82 | path => $doc->path, |
| 83 | doc => $doc, |
| 84 | foundry => 'Base', |
| 85 | layer => 'Tokens_conservative', |
| 86 | name => 'tokens' |
| 87 | ); |
| 88 | $tokens->parse; |
| 89 | $tokens->add('DeReKo', 'Structure', 'base_sentences_paragraphs'); |
| 90 | $tokens->add('DRuKoLa', 'Morpho'); |
| 91 | $result = $tokens->to_data; |
| 92 | $tokens = undef; |
| 93 | } |
| 94 | ), |
| 95 | Dumbbench::Instance::PerlSub->new( |
| 96 | name => 'Conversion-GOE-Tagged-1', |
| 97 | code => sub { |
| 98 | my $doc = KorAP::XML::Krill->new(path => $path . '/'); |
| 99 | $doc->parse; |
| 100 | my $meta = $doc->meta; |
| 101 | $tokens = KorAP::XML::Tokenizer->new( |
| 102 | path => $doc->path, |
| 103 | doc => $doc, |
| 104 | foundry => 'Base', |
| 105 | layer => 'Tokens_conservative', |
| 106 | name => 'tokens' |
| 107 | ); |
| 108 | $tokens->parse; |
| 109 | $tokens->add('DeReKo', 'Structure', 'base_sentences_paragraphs'); |
| 110 | $tokens->add('DRuKoLa', 'Morpho'); |
| 111 | $result = $tokens->to_data; |
| 112 | $tokens = undef; |
| 113 | } |
Akron | 6e886f7 | 2020-02-19 07:42:32 +0100 | [diff] [blame] | 114 | ) |
| 115 | ); |
| 116 | |
| 117 | # Run benchmarks |
| 118 | $bench->run; |
| 119 | |
| 120 | # Output in a single row |
| 121 | if ($columns) { |
| 122 | unless ($no_header) { |
| 123 | print join("\t", map { $_->name } $bench->instances), "\n"; |
| 124 | }; |
Akron | 9711ed3 | 2020-02-19 08:10:29 +0100 | [diff] [blame] | 125 | print join("\t", map { $_->result->raw_number } $bench->instances), "\n"; |
Akron | 6e886f7 | 2020-02-19 07:42:32 +0100 | [diff] [blame] | 126 | exit(0); |
| 127 | }; |
| 128 | |
Akron | b573e01 | 2020-08-04 09:40:34 +0200 | [diff] [blame] | 129 | print "----------------------------------\n"; |
| 130 | |
Akron | 6e886f7 | 2020-02-19 07:42:32 +0100 | [diff] [blame] | 131 | # Output simple timings for comparation |
| 132 | foreach my $inst ($bench->instances) { |
| 133 | unless ($no_header) { |
| 134 | print $inst->name, ': '; |
| 135 | }; |
Akron | 9711ed3 | 2020-02-19 08:10:29 +0100 | [diff] [blame] | 136 | print $inst->result->raw_number, "\n"; |
Akron | 6e886f7 | 2020-02-19 07:42:32 +0100 | [diff] [blame] | 137 | }; |
| 138 | |
| 139 | exit(0); |