Optimize performance slightly by reducing calls to _offset()

Change-Id: Ieda9628b6903eea159216a7d6b0125b716ad2ab6
diff --git a/Changes b/Changes
index eee7df7..a28b5e6 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,4 @@
-0.41 2020-08-05
+0.41 2020-08-06
         - Added support for RWK annotations.
         - Improved DGD support.
         - Fixed bug in RWK support that broke on
@@ -8,6 +8,7 @@
         - Optimizations and cleanup based on profiling.
         - Remove MultiTerm->add() in favor of
           MultiTerm->add_by_term().
+        - Optimization by reducing calls to _offset().
 
 0.40 2020-03-03
         - Fixed XIP parser.
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index d548d37..2dfa32e 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -186,6 +186,7 @@
 };
 
 
+# Start token parsing
 sub tokenize {
   my $self = shift;
   my ($token_foundry, $token_layer) = @_;
@@ -247,10 +248,14 @@
   $_[0]->{pd};
 };
 
+
+# Get meta object
 sub meta {
   return $_[0]->{meta};
 };
 
+
+# Serialize to hash
 sub to_hash {
   my $self = shift;
 
diff --git a/lib/KorAP/XML/Tokenizer/Units.pm b/lib/KorAP/XML/Tokenizer/Units.pm
index e2ec7cf..322df2c 100644
--- a/lib/KorAP/XML/Tokenizer/Units.pm
+++ b/lib/KorAP/XML/Tokenizer/Units.pm
@@ -1,23 +1,165 @@
 package KorAP::XML::Tokenizer::Units;
+use strict;
+use warnings;
 use KorAP::XML::Tokenizer::Span;
 use KorAP::XML::Tokenizer::Token;
 
-# TODO:
-#   Don't use Mojo::Base! - "encodings" is called too often
-use Mojo::Base -base;
-
-has [qw/path foundry layer match range primary stream/];
-has 'should' => 0;
-has 'have' => 0;
-has 'encoding' => 'utf-8';
-
 use constant DEBUG => 0;
 
+
+# Construct a new units object
+sub new {
+  my $class = shift;
+  my $self = bless {@_}, $class;
+
+  $self->{should} //= 0;
+  $self->{have} //= 0;
+
+  # Set _offset
+  $self->encoding(
+    $self->{encoding} // 'utf-8'
+  );
+  return $self;
+};
+
+
+# Get or set "should"
+sub should {
+  if (defined $_[1]) {
+    $_[0]->{should} = $_[1];
+    return $_[0];
+  };
+  $_[0]->{should};
+};
+
+
+# Get or set "have"
+sub have {
+  if (defined $_[1]) {
+    $_[0]->{have} = $_[1];
+    return $_[0];
+  };
+  $_[0]->{have};
+};
+
+
+# Get or set encoding
+sub encoding {
+
+  # Set encoding
+  if (defined $_[1]) {
+    my $self = shift;
+    $self->{encoding} = $_[0];
+
+    # Set offset handling for bytes
+    if ($_[0] eq 'bytes') {
+      $self->{_offset} = sub {
+        my ($self, $from, $to) = @_;
+        my $p = $self->primary;
+        $from = $p->bytes2chars($from);
+        $to = $p->bytes2chars($to);
+        return ($from, $to);
+      }
+    }
+
+    # Set offset method for xip
+    elsif ($_[0] eq 'xip') {
+      $self->{_offset} = sub {
+        my ($self, $from, $to) = @_;
+        my $p = $self->primary;
+        $from = $p->xip2chars($from);
+        $to = $p->xip2chars($to);
+        return ($from, $to);
+      }
+    }
+
+    # Set to default
+    else {
+      $self->{_offset} = undef;
+    }
+    return $self;
+  };
+
+  # Get encoding
+  $_[0]->{encoding};
+};
+
+
+# Get or set path
+sub path {
+  if (@_ == 1) {
+    return $_[0]->{path};
+  };
+  $_[0]->{path} = $_[1];
+  return $_[0];
+};
+
+# Get or set foundry
+sub foundry {
+  if (@_ == 1) {
+    return $_[0]->{foundry};
+  };
+  $_[0]->{foundry} = $_[1];
+  return $_[0];
+};
+
+
+# Get or set layer
+sub layer {
+  if (@_ == 1) {
+    return $_[0]->{layer};
+  };
+  $_[0]->{layer} = $_[1];
+  return $_[0];
+};
+
+
+# Get or set match
+sub match {
+  if (defined $_[1]) {
+    $_[0]->{match} = $_[1];
+    return $_[0];
+  };
+  $_[0]->{match};
+};
+
+
+# Get or set range
+sub range {
+  if (defined $_[1]) {
+    $_[0]->{range} = $_[1];
+    return $_[0];
+  };
+  $_[0]->{range};
+};
+
+
+# Get or set primary
+sub primary {
+  if (defined $_[1]) {
+    $_[0]->{primary} = $_[1];
+    return $_[0];
+  };
+  $_[0]->{primary};
+};
+
+
+# Get or set stream
+sub stream {
+  if (defined $_[1]) {
+    $_[0]->{stream} = $_[1];
+    return $_[0];
+  };
+  $_[0]->{stream};
+};
+
+
+# Create new span
 sub span {
   my $self = shift;
   my ($from, $to, $s) = @_;
 
-  ($from, $to) = $self->_offset($from, $to);
+  ($from, $to) = $self->{_offset}->($self, $from, $to) if $self->{_offset};
 
   # return if !$to;
   $to   //= 0;
@@ -120,10 +262,12 @@
   $span;
 };
 
+
+# Create new token
 sub token {
   my ($self, $from, $to, $s) = @_;
 
-  ($from, $to) = $self->_offset($from, $to);
+  ($from, $to) = $self->{_offset}->($self, $from, $to) if $self->{_offset};
 
   return if !$to;
   return unless $to > $from;
@@ -145,25 +289,4 @@
 };
 
 
-sub _offset {
-  my $self = shift;
-  return @_ if ($self->encoding eq 'utf-8' || !$self->encoding);
-
-  my ($from, $to) = @_;
-
-  my $p = $self->primary;
-  if ($self->encoding eq 'bytes') {
-    $from = $p->bytes2chars($from);
-    $to = $p->bytes2chars($to);
-  }
-
-  # This is legacy treating of bytes2chars
-  elsif ($self->encoding eq 'xip') {
-    $from = $p->xip2chars($from);
-    $to = $p->xip2chars($to);
-  };
-
-  ($from, $to);
-};
-
 1;