Improve error handling

Change-Id: If55488859011ac541412d1132d1f279e50178605
diff --git a/lib/KorAP/XML/Batch/File.pm b/lib/KorAP/XML/Batch/File.pm
index 33ef565..2dd5171 100644
--- a/lib/KorAP/XML/Batch/File.pm
+++ b/lib/KorAP/XML/Batch/File.pm
@@ -62,7 +62,7 @@
 
   # Unable to process base tokenization
   unless ($tokens->parse) {
-    $self->{log}->error(($output // $input) . " can't be processed - no working base tokenization");
+    $self->{log}->error(($output // $input) . " can't be processed - " . $tokens->error);
     return;
   };
 
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index a8ebb8c..92c8d1f 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -36,6 +36,8 @@
 has layer => 'Tokens';
 has non_word_tokens => 0;
 
+has 'error';
+
 has log => sub {
   if (Log::Log4perl->initialized()) {
     state $log = Log::Log4perl->get_logger(__PACKAGE__);
@@ -60,7 +62,8 @@
   my $path = $self->path . lc($self->foundry) . '/' . $layer_file;
 
   unless (-e $path) {
-    $self->log->warn('Unable to load base tokenization: ' . $path);
+    $self->error('Unable to load base tokenization: ' . $path);
+    $self->log->warn($self->error);
     return;
   };
 
@@ -93,8 +96,8 @@
     )->{layer}->{spanList};
   } catch {
 
-    $self->log->warn('Token error in ' . $path . ($_ ? ': ' . $_ : ''));
-    $error = 1;
+    $self->error('Token error in ' . $path . ($_ ? ': ' . $_ : ''));
+    $self->log->warn($self->error);
   };
 
   return if $error;
@@ -124,9 +127,8 @@
 
     # Token is undefined
     unless (defined $token) {
+      $self->error("Tokenization with failing offsets in $path");
       $self->log->warn("Unable to find substring [$from-$to] in $path");
-      $self->log->error("Tokenization with failing offsets in $path");
-      # next;
       return;
     };
 
@@ -192,7 +194,10 @@
     $have++;
   };
 
-  return if $have == 0;
+  if ($have == 0) {
+    $self->error('No tokens found in ' . $path);
+    return;
+  };
 
   # Add token count
   $mtts->add_meta('tokens', '<i>' . $have);
diff --git a/t/script/archive.t b/t/script/archive.t
index 598c8e5..93edc8d 100644
--- a/t/script/archive.t
+++ b/t/script/archive.t
@@ -170,9 +170,9 @@
 # Test without compression
 {
   local $SIG{__WARN__} = sub {};
-  my $out = stderr_from(sub { system($call); });
+  my $out = combined_from(sub { system($call); });
 
-  like($out, qr!no working base tokenization!s, $call);
+  like($out, qr!No tokens found!s, $call);
 };
 
 my $input_quotes = catfile($f, '..', 'corpus', 'archive_quotes.zip');