Fixed tiny offset issue for documents ending with non-tokens
diff --git a/lib/KorAP/Document.pm b/lib/KorAP/Document.pm
index 59a1022..5b4ef73 100644
--- a/lib/KorAP/Document.pm
+++ b/lib/KorAP/Document.pm
@@ -19,9 +19,9 @@
   my $self = shift;
   my $file = b($self->path . 'data.xml')->slurp;
 
-  state $unable = 'Unable to parse document';
+  state $unable = 'Unable to parse document ' . $self->path;
 
-  $self->log->trace('Parse document ' . $self->path);
+  $self->log->debug('Parse document ' . $self->path);
 
   my $dom = Mojo::DOM->new($file);
 
@@ -34,11 +34,11 @@
       $self->corpus_id($1);
     }
     else {
-      croak $unable;
+      croak $unable . ': ID not parseable';
     };
   }
   else {
-    croak $unable;
+    croak $unable . ': No raw_text found or no ID';
   };
 
   # Get primary data
diff --git a/lib/KorAP/Index/Base/Sentences.pm b/lib/KorAP/Index/Base/Sentences.pm
new file mode 100644
index 0000000..16e4673
--- /dev/null
+++ b/lib/KorAP/Index/Base/Sentences.pm
@@ -0,0 +1,29 @@
+package KorAP::Index::Base::Sentences;
+use KorAP::Index::Base;
+
+sub parse {
+  my $self = shift;
+  my $i = 0;
+
+  $$self->add_spandata(
+    foundry => 'base',
+    layer => 'sentences',
+    cb => sub {
+      my ($stream, $span) = @_;
+      my $mtt = $stream->pos($span->p_start);
+      $mtt->add(
+	term => '<>:s',
+	o_start => $span->o_start,
+	o_end => $span->o_end,
+	p_end => $span->p_end
+      );
+      $i++;
+    }
+  ) or return;
+
+  $$self->stream->add_meta('sentences', '<i>' . $i);
+
+  return 1;
+};
+
+1;
diff --git a/lib/KorAP/Tokenizer.pm b/lib/KorAP/Tokenizer.pm
index 3b83b5f..3f2c7f2 100644
--- a/lib/KorAP/Tokenizer.pm
+++ b/lib/KorAP/Tokenizer.pm
@@ -49,6 +49,8 @@
       my $to = $span->attr('to');
       my $token = $doc->primary->data($from, $to);
 
+      # warn 'Has ' . $from . '->' . $to . "($old)";
+
       unless (defined $token) {
 	  $self->log->error("Unable to find substring [$from-$to] in $path");
 	  return;
@@ -89,7 +91,7 @@
   # Add token count
   $mtts->add_meta('tokens', '<i>' . $have);
 
-  $range->gap($old, $doc->primary->data_length, $have-1) if $doc->primary->data_length >= $old;
+  $range->gap($old, $doc->primary->data_length + 1, $have-1) if $doc->primary->data_length >= ($old - 1);
 
   # Add info
   $self->stream($mtts);
diff --git a/lib/KorAP/Tokenizer/Range.pm b/lib/KorAP/Tokenizer/Range.pm
index 110fbc6..d129dc3 100644
--- a/lib/KorAP/Tokenizer/Range.pm
+++ b/lib/KorAP/Tokenizer/Range.pm
@@ -11,11 +11,13 @@
 
 sub set {
   my $self = shift;
+  # warn 'Set range: ', join(',', @_);
   $$self->set_range(@_);
 };
 
 sub gap {
   my $self = shift;
+  # warn 'Gap range: ', join(',', @_);
   $$self->set_range($_[0], $_[1], '!' . ($_[2] - 1) . ':' . $_[2]);
 };
 
@@ -31,6 +33,7 @@
   my $found = $$self->lookup( $offset );
   unless (defined $found) {
     warn 'There is no value for ', $offset;
+    return;
   };
 
   if ($found =~ /!(\d+):(\d+)$/) {
diff --git a/script/prepare_index.pl b/script/prepare_index.pl
index 8acdc80..17a5470 100644
--- a/script/prepare_index.pl
+++ b/script/prepare_index.pl
@@ -105,7 +105,6 @@
 my $doc = KorAP::Document->new( path => $input );
 $doc->parse;
 
-
 my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/);
 if ($token_base) {
   ($token_base_foundry, $token_base_layer) = split /#/, $token_base;