Clean up primary data handling

Change-Id: Id1188637806ba5fd29294b0aa01cbdefb7b3b62e
diff --git a/lib/KorAP/XML/Document/Primary.pm b/lib/KorAP/XML/Document/Primary.pm
index bb7e9f5..e760873 100644
--- a/lib/KorAP/XML/Document/Primary.pm
+++ b/lib/KorAP/XML/Document/Primary.pm
@@ -1,12 +1,17 @@
 package KorAP::XML::Document::Primary;
 use strict;
 use warnings;
-use Carp qw/croak carp/;
 use Mojo::ByteStream 'b';
 use feature 'state';
 use Packed::Array;
 use utf8;
 
+use constant {
+  DATA => 0,
+  BYTES => 1,
+  XIP => 2
+};
+
 # our $QUOT = b("„“”")->decode;
 our $QUOT_RE = qr/[„“”]/;
 
@@ -22,15 +27,15 @@
   my ($self, $from, $to) = @_;
 
   # Get range data from primary
-  return substr($self->[0], $from) if $from && !$to;
+  return substr($self->[DATA], $from) if $from && !$to;
 
   # Get full data
-  return $self->[0] unless $to;
+  return $self->[DATA] unless $to;
 
   return if $to > $self->data_length;
 
   # Return substring
-  return (substr($self->[0], $from, $to - $from) // undef);
+  return (substr($self->[DATA], $from, $to - $from) // undef);
 };
 
 
@@ -42,14 +47,14 @@
 
   # Only start offset defined
   if ($from && !$to) {
-    return b(substr($self->[0], $from))->decode;
+    return b(substr($self->[DATA], $from))->decode;
   };
 
   # No offset defined
-  return b($self->[0])->decode unless $to;
+  return b($self->[DATA])->decode unless $to;
 
   # Get the substring based on offsets
-  my $substr = substr($self->[0], $from, $to - $from);
+  my $substr = substr($self->[DATA], $from, $to - $from);
 
   # Decode
   return b($substr)->decode if defined $substr;
@@ -61,42 +66,34 @@
 
 # The length of the primary text in characters
 sub data_length {
-  my $self = shift;
-  return $self->[1] if $self->[1];
-  $self->[1] = length($self->[0]);
-  return $self->[1];
+  length($_[0]->[DATA]);
 };
 
 
 # Get correct offset
 sub bytes2chars {
   my $self = shift;
-  unless ($self->[2]) {
-    $self->[2] = $self->_calc_chars($self->[0]);
+  unless ($self->[BYTES]) {
+    $self->[BYTES] = _calc_chars($self->[DATA]);
   };
-  return $self->[2]->[shift];
+  return $self->[BYTES]->[shift];
 };
 
 
 # Get correct offset
 sub xip2chars {
   my $self = shift;
-  unless ($self->[3]) {
-    my $buffer = $self->[0];
-
+  unless ($self->[XIP]) {
     # Hacky work around: replace fancy quotation marks for XIP
-    $buffer =~ s{$QUOT_RE}{"}g;
-
-    $self->[3] = $self->_calc_chars($buffer);
+    $self->[XIP] = _calc_chars($self->[DATA] =~ s{$QUOT_RE}{"}gr);
   };
-  return $self->[3]->[shift];
+  return $self->[XIP]->[shift];
 };
 
 
 # Calculate character offsets
 sub _calc_chars {
   use bytes;
-  my ($self, $text) = @_;
 
   tie my @array, 'Packed::Array';
 
@@ -107,14 +104,14 @@
   my $c;
 
   # Init array
-  my $l = length($text);
+  my $l = length($_[0]);
   $array[$l-1] = 0;
 
   # Iterate over every character
   while ($i <= $l) {
 
     # Get actual character
-    $c = substr($text, $i, 1);
+    $c = substr($_[0], $i, 1);
 
     # store character position
     $array[$i++] = $j;
@@ -123,16 +120,16 @@
     if (ord($c & $leading) && ord($c & $start)) {
 
       # Get the next byte - expecting a following character
-      $c = substr($text, $i, 1);
+      $c = substr($_[0], $i, 1);
 
       # Character is part of a multibyte
       while (ord($c & $leading)) {
 
-	# Set count
-	$array[$i] = (ord($c & $start)) ? ++$j : $j;
+        # Set count
+        $array[$i] = (ord($c & $start)) ? ++$j : $j;
 
-	# Get next character
-	$c = substr($text, ++$i, 1);
+        # Get next character
+        $c = substr($_[0], ++$i, 1);
       };
     };
 
diff --git a/lib/KorAP/XML/Meta/Base.pm b/lib/KorAP/XML/Meta/Base.pm
index de9ad44..4a61d87 100644
--- a/lib/KorAP/XML/Meta/Base.pm
+++ b/lib/KorAP/XML/Meta/Base.pm
@@ -55,6 +55,7 @@
   return join(' ', @{$self->{$_[0]} // []});
 };
 
+
 # Check if cached
 # Cache differently!
 sub is_cached {
@@ -93,7 +94,7 @@
     if ($self->doc_sigle) {
       $new{doc_sigle} = $self->doc_sigle;
       if ($self->text_sigle) {
-	$new{text_sigle} = $self->text_sigle;
+        $new{text_sigle} = $self->text_sigle;
       }
     }
   };
diff --git a/lib/KorAP/XML/Tokenizer/Spans.pm b/lib/KorAP/XML/Tokenizer/Spans.pm
index fa91bc5..2722aec 100644
--- a/lib/KorAP/XML/Tokenizer/Spans.pm
+++ b/lib/KorAP/XML/Tokenizer/Spans.pm
@@ -48,10 +48,10 @@
   return if $error;
 
   if (ref $spans && $spans->{span}) {
-      $spans = $spans->{span};
+    $spans = $spans->{span};
   }
   else {
-      return [];
+    return [];
   };
 
 
diff --git a/lib/KorAP/XML/Tokenizer/Tokens.pm b/lib/KorAP/XML/Tokenizer/Tokens.pm
index 3881979..50367d4 100644
--- a/lib/KorAP/XML/Tokenizer/Tokens.pm
+++ b/lib/KorAP/XML/Tokenizer/Tokens.pm
@@ -3,7 +3,6 @@
 use Mojo::ByteStream 'b';
 use Mojo::File;
 use KorAP::XML::Tokenizer::Token;
-use Carp qw/croak carp/;
 use File::Spec::Functions qw/catdir catfile/;
 use XML::Fast;
 use Try::Tiny;
@@ -15,7 +14,6 @@
 sub parse {
   my $self = shift;
 
-  # my $path = $self->path . $self->foundry . '/' . $self->layer . '.xml';
   my $path = catfile($self->path, $self->foundry, $self->layer . '.xml');
 
   # Legacy data support
@@ -38,32 +36,31 @@
   # Bug workaround
   if ($self->foundry eq 'glemm') {
     if (index($file, "</span\n") > 0 || index($file, "</span\r") > 0) {
-	$file =~ s!</span[\n\r]!</span>\n!g;
+      $file =~ s!</span[\n\r]!</span>\n!g;
     };
   };
 
-#  my $spans = Mojo::DOM->new($file);
-#  $spans->xml(1);
+  #  my $spans = Mojo::DOM->new($file);
+  #  $spans->xml(1);
 
   my ($spans, $error);
   try {
-      local $SIG{__WARN__} = sub {
-	  $error = 1;
-      };
-      $spans = xml2hash($file, text => '#text', attr => '-')->{layer}->{spanList};
-  }
-  catch  {
-      $self->log->warn('Span error in ' . $path . ($_ ? ': ' . $_ : ''));
+    local $SIG{__WARN__} = sub {
       $error = 1;
+    };
+    $spans = xml2hash($file, text => '#text', attr => '-')->{layer}->{spanList};
+  } catch  {
+    $self->log->warn('Span error in ' . $path . ($_ ? ': ' . $_ : ''));
+    $error = 1;
   };
 
   return if $error;
 
   if (ref $spans && $spans->{span}) {
-      $spans = $spans->{span};
+    $spans = $spans->{span};
   }
   else {
-      return [];
+    return [];
   };
 
   $spans = [$spans] if ref $spans ne 'ARRAY';