Minor speedup in tokenization by merging array pushes
Change-Id: I138226acb2648cf606563c57b3783f011bab7795
diff --git a/xt/benchmark.pl b/xt/benchmark.pl
index ddd17a2..85effd8 100644
--- a/xt/benchmark.pl
+++ b/xt/benchmark.pl
@@ -13,6 +13,7 @@
};
use KorAP::XML::TEI;
+use KorAP::XML::TEI::Tokenization;
my $columns = 0;
my $no_header = 0;
@@ -41,6 +42,8 @@
);
my $result;
+
+# Data for delHTMLcom-long
my ($fh, $filename) = tempfile();
print $fh <<'HTML';
@@ -50,6 +53,20 @@
-->ist <!-- a --><!-- b --> ein Test
HTML
+# Data for Tokenization
+# Test data
+my $t_dataf = catfile(dirname(__FILE__), '..', 't', 'data', 'wikipedia.txt');
+my $t_data = '';
+if ((open(FH, '<' . $t_dataf))) {
+ while (!eof(FH)) {
+ $t_data .= <FH>
+ };
+ close(FH);
+}
+else {
+ die "Unable to load $t_dataf";
+}
+
# Add benchmark instances
$bench->add_instances(
@@ -82,6 +99,20 @@
};
}
),
+ Dumbbench::Instance::PerlSub->new(
+ name => 'Tokenization-conservative',
+ code => sub {
+ $result = KorAP::XML::TEI::Tokenization::conservative($t_data, 0);
+ $result = 0;
+ }
+ ),
+ Dumbbench::Instance::PerlSub->new(
+ name => 'Tokenization-aggressive',
+ code => sub {
+ $result = KorAP::XML::TEI::Tokenization::aggressive($t_data, 0);
+ $result = 0;
+ }
+ ),
);
# Run benchmarks