Add micro optimizations based on profiling in Tokenizer::Units
Change-Id: Id0506e033c6ec13dcec989b0ca9ea52b30e013d6
diff --git a/lib/KorAP/XML/Annotation/DRuKoLa/Morpho.pm b/lib/KorAP/XML/Annotation/DRuKoLa/Morpho.pm
index 3cdfe8b..375ced7 100644
--- a/lib/KorAP/XML/Annotation/DRuKoLa/Morpho.pm
+++ b/lib/KorAP/XML/Annotation/DRuKoLa/Morpho.pm
@@ -15,16 +15,19 @@
my $found;
+ my $name;
foreach my $f (@{$content->{fs}->{f}}) {
+ $name = $f->{-name};
+
# pos tag
- if (($f->{-name} eq 'pos') &&
+ if (($name eq 'pos') &&
($found = $f->{'#text'})) {
$mtt->add_by_term('drukola/p:' . $found);
}
# ana tag
- elsif ($f->{-name} eq 'msd' &&
+ elsif ($name eq 'msd' &&
($found = $f->{'#text'})) {
my ($pos, $msd) = split(/ /, $found);
if ($msd) {
@@ -43,7 +46,7 @@
}
# lemma tag
- elsif (($f->{-name} eq 'lemma')
+ elsif (($name eq 'lemma')
&& ($found = $f->{'#text'})
&& $found ne '--') {
# b($found)->decode('latin-1')->encode->to_string
diff --git a/lib/KorAP/XML/Tokenizer/Units.pm b/lib/KorAP/XML/Tokenizer/Units.pm
index df22ab0..e2ec7cf 100644
--- a/lib/KorAP/XML/Tokenizer/Units.pm
+++ b/lib/KorAP/XML/Tokenizer/Units.pm
@@ -1,6 +1,9 @@
package KorAP::XML::Tokenizer::Units;
use KorAP::XML::Tokenizer::Span;
use KorAP::XML::Tokenizer::Token;
+
+# TODO:
+# Don't use Mojo::Base! - "encodings" is called too often
use Mojo::Base -base;
has [qw/path foundry layer match range primary stream/];
@@ -118,24 +121,25 @@
};
sub token {
- my $self = shift;
- my ($from, $to, $s) = @_;
+ my ($self, $from, $to, $s) = @_;
($from, $to) = $self->_offset($from, $to);
return if !$to;
- $from ||= 0;
return unless $to > $from;
+ $from ||= 0;
my $pos = $self->match->lookup($from, $to);
return unless defined $pos;
my $token = KorAP::XML::Tokenizer::Token->new;
- $token->set_id($s->{-id}) if $s && $s->{-id};
$token->set_pos($pos);
- $token->set_hash($s) if $s;
+ if ($s) {
+ $token->set_id($s->{-id}) if $s->{-id};
+ $token->set_hash($s);
+ };
$token;
};
diff --git a/xt/prof-conv-goe-tagged-1.pl b/xt/prof-conv-goe-tagged-1.pl
index 3cc54ad..37c023e 100644
--- a/xt/prof-conv-goe-tagged-1.pl
+++ b/xt/prof-conv-goe-tagged-1.pl
@@ -2,6 +2,9 @@
use strict;
use warnings;
use FindBin;
+BEGIN {
+ unshift @INC, "$FindBin::Bin/../lib";
+};
use File::Basename 'dirname';
use File::Spec::Functions qw/catfile catdir rel2abs/;
@@ -9,10 +12,6 @@
# $ perl -d:NYTProf xt/prof-conv-goe-tagged-1.pl
# $ nytprofhtml --open
-BEGIN {
- unshift @INC, "$FindBin::Bin/../lib";
-};
-
use KorAP::XML::Krill;
use KorAP::XML::Tokenizer;
my $path = catdir(dirname(__FILE__), '..','t','real', 'corpus','GOE-TAGGED','AGA','03828');