Fixed gap behind last token and <base/s:t> length

Change-Id: I7b8d9cc90280c29d3ba90a8f97ddb63315dc8b0c
diff --git a/Changes b/Changes
index a351a92..9acb4a1 100644
--- a/Changes
+++ b/Changes
@@ -1,10 +1,12 @@
-0.40 2020-03-01
+0.40 2020-03-03
         - Fixed XIP parser.
         - Added example corpus of the
           Redewiedergabe-Korpus.
         - Fixed span offset bug.
         - Fixed milestones behind the last
           token bug.
+        - Fixed gap behind last token bug.
+        - Fixed <base/s:t> length.
 
 0.39 2020-02-19
         - Added Talismane support.
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index f9595dd..59ae17f 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -138,18 +138,19 @@
     $should++;
 
     # Ignore non-word, non-number, and non-verbal tokens per default
+    # '9646' equals the musical pause, used in speech corpora
     if ($self->non_verbal_tokens && ord($token) == 9646) {
       # Non-verbal token
     } elsif (!$self->non_word_tokens && $token !~ /[\w\d]/) {
       # TODO: Recognize punctuations!
-      #	if ($mtt) {
-      #	  my $term = [$token, $from, $to];
-      #	  $mtt->add(
-      #	    term => '.>:'.$token,
-      #	    payload => '<i>'.$from . '<i>' . $to . '<b>' . $distance++
-      #	  );
-      #	  push(@non_word_tokens, $term);
-      #	}
+      #  if ($mtt) {
+      #    my $term = [$token, $from, $to];
+      #    $mtt->add(
+      #      term => '.>:'.$token,
+      #      payload => '<i>'.$from . '<i>' . $to . '<b>' . $distance++
+      #    );
+      #    push(@non_word_tokens, $term);
+      #  }
       next;
     };
 
@@ -157,12 +158,12 @@
     $mtt = $mtts->add;
 
     #      while (scalar @non_word_tokens) {
-    #	local $_ = shift @non_word_tokens;
-    #	$mtt->add(
-    #	  term => '.<:' . $_->[0],
-    #	  payload => '<i>' . $_->[1] . '<i>' . $_->[2] . '<b>' . --$distance
-    #	);
-    #	$distance = 0;
+    #  local $_ = shift @non_word_tokens;
+    #  $mtt->add(
+    #    term => '.<:' . $_->[0],
+    #    payload => '<i>' . $_->[1] . '<i>' . $_->[2] . '<b>' . --$distance
+    #  );
+    #  $distance = 0;
     #      };
 
     # Add gap for later finding matching positions before or after
@@ -208,15 +209,15 @@
   $mtts->pos(0)->add(
     term => '<>:base/s:t',
     o_start => 0,
-    p_end => ($have - 1),
+    p_end => $have,
     o_end => $doc->primary->data_length,
     payload => '<b>0',
     pti => 64
   );
 
-  # Create a gap for the 
+  # Create a gap for the end
   if ($doc->primary->data_length >= ($old - 1)) {
-    $range->gap($old, $doc->primary->data_length + 1, $have-1)
+    $range->gap($old, $doc->primary->data_length + 1, $have)
   };
 
   # Add info
@@ -256,27 +257,27 @@
       my $from = $-[1];
       my $to = $+[1];
       $mtt->add(
-	term => 'i^1:' . substr($os, $from, $from + $to),
-	o_start => $from + $o_start,
-	o_end => $to + $o_start
+  term => 'i^1:' . substr($os, $from, $from + $to),
+  o_start => $from + $o_start,
+  o_end => $to + $o_start
       ) unless $to - $from == $l;
     };
     while ($s =~ /(0+)[^0]/g) {
       my $from = $-[1];
       my $to = $+[1];
       $mtt->add(
-	term => 'i^2:' . substr($os, $from, $from + $to),
-	o_start => $from + $o_start,
-	o_end => $to + $o_start
+  term => 'i^2:' . substr($os, $from, $from + $to),
+  o_start => $from + $o_start,
+  o_end => $to + $o_start
       ) unless $to - $from == $l;
     };
     while ($s =~ /(#)/g) {
       my $from = $-[1];
       my $to = $+[1];
       $mtt->add(
-	term => 'i^3:' . substr($os, $from, $from + $to),
-	o_start => $from + $o_start,
-	o_end => $to + $o_start
+  term => 'i^3:' . substr($os, $from, $from + $to),
+  o_start => $from + $o_start,
+  o_end => $to + $o_start
       ) unless $to - $from == $l;
     };
   };
@@ -772,10 +773,10 @@
       my ($stream, $span) = @_;
       my $mtt = $stream->pos($span->p_start);
       $mtt->add(
-	term    => '<>:s',
-	o_start => $span->o_start,
-	o_end   => $span->o_end,
-	p_end   => $span->p_end
+        term    => '<>:s',
+        o_start => $span->o_start,
+        o_end   => $span->o_end,
+        p_end   => $span->p_end
       );
     }
   );
@@ -804,9 +805,9 @@
 
       # syntax
       if ((my $found = $content->at('f[name="pos"]')) && ($found = $found->text)) {
-	$mtt->add(
-	  term => 'cnx_syn:' . $found
-	);
+        $mtt->add(
+          term => 'cnx_syn:' . $found
+        );
       };
     });
 
diff --git a/lib/KorAP/XML/Tokenizer/Units.pm b/lib/KorAP/XML/Tokenizer/Units.pm
index 22f50d5..3ace9eb 100644
--- a/lib/KorAP/XML/Tokenizer/Units.pm
+++ b/lib/KorAP/XML/Tokenizer/Units.pm
@@ -70,6 +70,10 @@
     unless (defined $end) {
       $end = $self->range->before($span->o_end);
 
+      if (DEBUG && $span->o_end == 196) {
+        warn 'SPAN ends at ' . $span->o_end . ' and has ' . $end;
+      };
+
       unless (defined $end) {
         if (DEBUG) {
           warn $span->id . ' has no valid end';
diff --git a/t/annotation/base_paragraphs.t b/t/annotation/base_paragraphs.t
index e5d0c6b..9f02f27 100644
--- a/t/annotation/base_paragraphs.t
+++ b/t/annotation/base_paragraphs.t
@@ -17,8 +17,8 @@
 like($data->{foundries}, qr!base/paragraphs!, 'data');
 is($data->{stream}->[0]->[0], '-:base/paragraphs$<i>1', 'Number of paragraphs');
 is($data->{stream}->[0]->[1], '-:tokens$<i>18', 'Number of tokens');
-is($data->{stream}->[0]->[2], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text');
-is($data->{stream}->[0]->[3], '<>:base/s:p$<b>64<i>0<i>129<i>17<b>1', 'Paragraph');
+is($data->{stream}->[0]->[2], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text');
+is($data->{stream}->[0]->[3], '<>:base/s:p$<b>64<i>0<i>129<i>18<b>1', 'Paragraph');
 is($data->{stream}->[0]->[4], '_0$<i>0<i>3', 'Position');
 
 done_testing;
diff --git a/t/annotation/base_sentences.t b/t/annotation/base_sentences.t
index 69bb313..8096e5b 100644
--- a/t/annotation/base_sentences.t
+++ b/t/annotation/base_sentences.t
@@ -17,8 +17,8 @@
 like($data->{foundries}, qr!base/sentences!, 'data');
 is($data->{stream}->[0]->[0], '-:base/sentences$<i>1', 'Number of paragraphs');
 is($data->{stream}->[0]->[1], '-:tokens$<i>18', 'Number of tokens');
-is($data->{stream}->[0]->[2], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text');
-is($data->{stream}->[0]->[3], '<>:base/s:s$<b>64<i>0<i>129<i>17<b>2', 'Sentence');
+is($data->{stream}->[0]->[2], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text');
+is($data->{stream}->[0]->[3], '<>:base/s:s$<b>64<i>0<i>129<i>18<b>2', 'Sentence');
 is($data->{stream}->[0]->[4], '_0$<i>0<i>3', 'Position');
 
 done_testing;
diff --git a/t/annotation/connexor_morpho.t b/t/annotation/connexor_morpho.t
index de0f704..d426571 100644
--- a/t/annotation/connexor_morpho.t
+++ b/t/annotation/connexor_morpho.t
@@ -14,7 +14,7 @@
 
 my $data = $tokens->to_data->{data};
 like($data->{foundries}, qr!connexor/morpho!, 'data');
-is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text boundary');
+is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text boundary');
 is($data->{stream}->[0]->[2], '_0$<i>0<i>3', 'Position');
 is($data->{stream}->[1]->[1], 'cnx/l:letzt', 'Lemma');
 is($data->{stream}->[1]->[2], 'cnx/p:A', 'POS');
diff --git a/t/annotation/connexor_phrase.t b/t/annotation/connexor_phrase.t
index a2cf6e3..f3df759 100644
--- a/t/annotation/connexor_phrase.t
+++ b/t/annotation/connexor_phrase.t
@@ -15,7 +15,7 @@
 my $data = $tokens->to_data->{data};
 
 like($data->{foundries}, qr!connexor/phrase!, 'data');
-is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text boundary');
+is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text boundary');
 is($data->{stream}->[1]->[0], '<>:cnx/c:np$<b>64<i>4<i>30<i>4<b>0', 'Noun phrase');
 
 done_testing;
diff --git a/t/annotation/connexor_sentences.t b/t/annotation/connexor_sentences.t
index 0c2891c..37db3fb 100644
--- a/t/annotation/connexor_sentences.t
+++ b/t/annotation/connexor_sentences.t
@@ -42,8 +42,8 @@
 like($data->{foundries}, qr!connexor/sentences!, 'data');
 is($data->{stream}->[0]->[0], '-:cnx/sentences$<i>1', 'Number of paragraphs');
 is($data->{stream}->[0]->[1], '-:tokens$<i>18', 'Number of tokens');
-is($data->{stream}->[0]->[2], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text boundary');
-is($data->{stream}->[0]->[3], '<>:cnx/s:s$<b>64<i>0<i>129<i>17<b>0', 'Sentence');
+is($data->{stream}->[0]->[2], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text boundary');
+is($data->{stream}->[0]->[3], '<>:cnx/s:s$<b>64<i>0<i>129<i>18<b>0', 'Sentence');
 is($data->{stream}->[0]->[4], '_0$<i>0<i>3', 'Position');
 
 done_testing;
diff --git a/t/annotation/corenlp_morpho.t b/t/annotation/corenlp_morpho.t
index ec6e518..cefc94e 100644
--- a/t/annotation/corenlp_morpho.t
+++ b/t/annotation/corenlp_morpho.t
@@ -15,7 +15,7 @@
 my $data = $tokens->to_data->{data};
 like($data->{foundries}, qr!corenlp/morpho!, 'data');
 like($data->{layerInfos}, qr!corenlp/p=tokens!, 'data');
-is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text boundary');
+is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text boundary');
 is($data->{stream}->[0]->[3], 'corenlp/p:APPRART', 'POS');
 is($data->{stream}->[1]->[1], 'corenlp/p:ADJ', 'POS');
 is($data->{stream}->[2]->[1], 'corenlp/p:ADJA', 'POS');
diff --git a/t/annotation/corenlp_sentences.t b/t/annotation/corenlp_sentences.t
index b9e9b75..09611c6 100644
--- a/t/annotation/corenlp_sentences.t
+++ b/t/annotation/corenlp_sentences.t
@@ -17,8 +17,8 @@
 like($data->{foundries}, qr!corenlp/sentences!, 'data');
 is($data->{stream}->[0]->[0], '-:corenlp/sentences$<i>1', 'Number of paragraphs');
 is($data->{stream}->[0]->[1], '-:tokens$<i>18', 'Number of tokens');
-is($data->{stream}->[0]->[2], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text boundary');
-is($data->{stream}->[0]->[3], '<>:corenlp/s:s$<b>64<i>0<i>129<i>17<b>0', 'Text');
+is($data->{stream}->[0]->[2], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text boundary');
+is($data->{stream}->[0]->[3], '<>:corenlp/s:s$<b>64<i>0<i>129<i>18<b>0', 'Text');
 is($data->{stream}->[0]->[4], '_0$<i>0<i>3', 'Position');
 is($data->{stream}->[-1]->[0], '_17$<i>124<i>128', 'Position');
 
diff --git a/t/annotation/dereko_struct.t b/t/annotation/dereko_struct.t
index 4e00bb3..921b58e 100644
--- a/t/annotation/dereko_struct.t
+++ b/t/annotation/dereko_struct.t
@@ -23,7 +23,7 @@
    'Empty element');
 
 
-is($data->{stream}->[0]->[5], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text boundary');
+is($data->{stream}->[0]->[5], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text boundary');
 
 # Attributes:
 is($data->{stream}->[0]->[11],
diff --git a/t/annotation/glemm_morpho.t b/t/annotation/glemm_morpho.t
index 208d9d1..0ac9705 100644
--- a/t/annotation/glemm_morpho.t
+++ b/t/annotation/glemm_morpho.t
@@ -17,7 +17,7 @@
 like($data->{foundries}, qr!glemm/morpho!, 'data');
 like($data->{layerInfos}, qr!glemm/l=tokens!, 'data');
 
-is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text boundary');
+is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text boundary');
 is($data->{stream}->[0]->[3], 'glemm/l:__zu', 'Lemma');
 is($data->{stream}->[1]->[1], 'glemm/l:__letzt-', 'Lemma');
 
diff --git a/t/annotation/malt_dependency.t b/t/annotation/malt_dependency.t
index f58204f..515ae8f 100644
--- a/t/annotation/malt_dependency.t
+++ b/t/annotation/malt_dependency.t
@@ -51,7 +51,7 @@
 is($stream->[0]->[5], '>:malt/d:ROOT$<b>33<i>0<i>49<i>0<i>6', 'Term2Term relation');
 
 # Text element
-is($stream->[0]->[4], '<>:base/s:t$<b>64<i>0<i>238<i>30<b>0', 'Text element');
+is($stream->[0]->[4], '<>:base/s:t$<b>64<i>0<i>238<i>31<b>0', 'Text element');
 
 done_testing;
 __END__
diff --git a/t/annotation/mate_morpho.t b/t/annotation/mate_morpho.t
index 736fd31..9798ba0 100644
--- a/t/annotation/mate_morpho.t
+++ b/t/annotation/mate_morpho.t
@@ -19,7 +19,7 @@
 like($data->{layerInfos}, qr!mate/l=tokens!, 'data');
 like($data->{layerInfos}, qr!mate/m=tokens!, 'data');
 
-is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text boundary');
+is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text boundary');
 is($data->{stream}->[0]->[4], 'mate/l:zu', 'POS');
 is($data->{stream}->[0]->[5], 'mate/m:case:dat', 'POS');
 is($data->{stream}->[0]->[6], 'mate/m:gender:neut', 'POS');
diff --git a/t/annotation/mate_morpho_attr.t b/t/annotation/mate_morpho_attr.t
index b32c12c..e556d0f 100644
--- a/t/annotation/mate_morpho_attr.t
+++ b/t/annotation/mate_morpho_attr.t
@@ -18,7 +18,7 @@
 like($data->{layerInfos}, qr!mate/p=tokens!, 'data');
 like($data->{layerInfos}, qr!mate/l=tokens!, 'data');
 
-is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Text boundary');
+is($data->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Text boundary');
 is($data->{stream}->[0]->[2], '@:gender=neut$<b>16<s>1', 'POS');
 is($data->{stream}->[0]->[3], '@:number=sg$<b>16<s>1', 'POS');
 is($data->{stream}->[0]->[4], '@:case=dat$<b>16<s>1', 'POS');
diff --git a/t/annotation/mdp_dependency.t b/t/annotation/mdp_dependency.t
index de9144c..2fd341f 100644
--- a/t/annotation/mdp_dependency.t
+++ b/t/annotation/mdp_dependency.t
@@ -82,8 +82,8 @@
 is($stream->[0]->[10], '>:mdp/d:ROOT$<b>33<i>0<i>317<i>0<i>40', 'Term-to-Element');
 
 
-is($stream->[-1]->[0], '>:mdp/d:ROOT$<b>33<i>26130<i>26153<i>3553<i>3554', 'Term-to-Element');
-is($stream->[3553]->[1], '<:mdp/d:ROOT$<b>34<i>26130<i>26153<i>3554<i>3553', 'Element-to-Term');
+is($stream->[-1]->[0], '>:mdp/d:ROOT$<b>33<i>26130<i>26153<i>3553<i>3555', 'Term-to-Element');
+is($stream->[3553]->[1], '<:mdp/d:ROOT$<b>34<i>26130<i>26153<i>3555<i>3553', 'Element-to-Term');
 
 done_testing;
 __END__
diff --git a/t/annotation/opennlp_morpho.t b/t/annotation/opennlp_morpho.t
index c8bcbb7..64d9008 100644
--- a/t/annotation/opennlp_morpho.t
+++ b/t/annotation/opennlp_morpho.t
@@ -10,7 +10,7 @@
 
 ok(my $tokens = TestInit::tokens('0001'), 'Parse tokens');
 
-is($tokens->stream->pos(0)->to_string, '[(0-3)-:tokens$<i>18|<>:base/s:t$<b>64<i>0<i>129<i>17<b>0|_0$<i>0<i>3|i:zum|s:Zum]', 'Token is correct');
+is($tokens->stream->pos(0)->to_string, '[(0-3)-:tokens$<i>18|<>:base/s:t$<b>64<i>0<i>129<i>18<b>0|_0$<i>0<i>3|i:zum|s:Zum]', 'Token is correct');
 
 is($tokens->stream->pos(1)->to_string, '[(4-11)_1$<i>4<i>11|i:letzten|s:letzten]', 'Token is correct');
 
diff --git a/t/annotation/opennlp_sentences.t b/t/annotation/opennlp_sentences.t
index 16c8a65..02eac83 100644
--- a/t/annotation/opennlp_sentences.t
+++ b/t/annotation/opennlp_sentences.t
@@ -17,7 +17,7 @@
 like($data->{foundries}, qr!opennlp/sentences!, 'data');
 is($data->{stream}->[0]->[0], '-:opennlp/sentences$<i>1', 'Number of Sentences');
 is($data->{stream}->[0]->[1], '-:tokens$<i>18', 'Number of tokens');
-is($data->{stream}->[0]->[3], '<>:opennlp/s:s$<b>64<i>0<i>129<i>17<b>0', 'Sentence');
+is($data->{stream}->[0]->[3], '<>:opennlp/s:s$<b>64<i>0<i>129<i>18<b>0', 'Sentence');
 is($data->{stream}->[0]->[4], '_0$<i>0<i>3', 'Position');
 
 done_testing;
diff --git a/t/annotation/tt_sentences.t b/t/annotation/tt_sentences.t
index 703f1a6..a7894b8 100644
--- a/t/annotation/tt_sentences.t
+++ b/t/annotation/tt_sentences.t
@@ -19,7 +19,7 @@
 like($data->{foundries}, qr!treetagger/sentences!, 'data');
 is($data->{stream}->[0]->[0], '-:tokens$<i>18', 'Number of tokens');
 is($data->{stream}->[0]->[1], '-:tt/sentences$<i>1', 'Number of paragraphs');
-is($data->{stream}->[0]->[3], '<>:tt/s:s$<b>64<i>0<i>130<i>17<b>0', 'Text');
+is($data->{stream}->[0]->[3], '<>:tt/s:s$<b>64<i>0<i>130<i>18<b>0', 'Text');
 is($data->{stream}->[0]->[4], '_0$<i>0<i>3', 'Position');
 is($data->{stream}->[-1]->[0], '_17$<i>124<i>128', 'Position');
 
diff --git a/t/annotation/xip_constituency.t b/t/annotation/xip_constituency.t
index 026321f..0f8a0b8 100644
--- a/t/annotation/xip_constituency.t
+++ b/t/annotation/xip_constituency.t
@@ -19,8 +19,8 @@
 # The length includes the punct - but that doesn't matter
 is($data->{stream}->[0]->[1], '<>:xip/c:PREP$<b>64<i>0<i>3<i>1<b>3', 'Prep phrase');
 is($data->{stream}->[0]->[2], '<>:xip/c:PP$<b>64<i>0<i>30<i>4<b>2', 'pp phrase');
-is($data->{stream}->[0]->[4], '<>:xip/c:TOP$<b>64<i>0<i>129<i>17<b>0', 'top phrase');
-is($data->{stream}->[0]->[5], '<>:xip/c:MC$<b>64<i>0<i>129<i>17<b>1', 'mc phrase');
+is($data->{stream}->[0]->[4], '<>:xip/c:TOP$<b>64<i>0<i>129<i>18<b>0', 'top phrase');
+is($data->{stream}->[0]->[5], '<>:xip/c:MC$<b>64<i>0<i>129<i>18<b>1', 'mc phrase');
 is($data->{stream}->[-1]->[0], '<>:xip/c:VERB$<b>64<i>124<i>128<i>18<b>4', 'Noun phrase');
 
 
diff --git a/t/annotation/xip_sentences.t b/t/annotation/xip_sentences.t
index 3f2c62c..88b3e21 100644
--- a/t/annotation/xip_sentences.t
+++ b/t/annotation/xip_sentences.t
@@ -18,7 +18,7 @@
 
 is($data->{stream}->[0]->[0], '-:tokens$<i>18', 'Number of tokens');
 is($data->{stream}->[0]->[1], '-:xip/sentences$<i>1', 'Number of paragraphs');
-is($data->{stream}->[0]->[3], '<>:xip/s:s$<b>64<i>0<i>129<i>17<b>0', 'Text');
+is($data->{stream}->[0]->[3], '<>:xip/s:s$<b>64<i>0<i>129<i>18<b>0', 'Text');
 is($data->{stream}->[0]->[4], '_0$<i>0<i>3', 'Position');
 is($data->{stream}->[-1]->[0], '_17$<i>124<i>128', 'Position');
 
diff --git a/t/batch_file.t b/t/batch_file.t
index ed32401..55c4d2c 100644
--- a/t/batch_file.t
+++ b/t/batch_file.t
@@ -35,7 +35,7 @@
 is($json->{data}->{foundries}, '', 'Foundries');
 like($json->{data}->{text}, qr/^Zum letzten kulturellen/, 'Foundries');
 is($json->{data}->{stream}->[0]->[0], '-:tokens$<i>18', 'Tokens');
-is($json->{data}->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Data');
+is($json->{data}->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Data');
 
 # Generate with Gzip
 $bf->{gzip} = 1;
@@ -56,7 +56,7 @@
 is($json->{data}->{foundries}, '', 'Foundries');
 like($json->{data}->{text}, qr/^Zum letzten kulturellen/, 'Foundries');
 is($json->{data}->{stream}->[0]->[0], '-:tokens$<i>18', 'Tokens');
-is($json->{data}->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'Data');
+is($json->{data}->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'Data');
 
 # Generate with annotations
 $bf->{gzip} = 0;
@@ -80,7 +80,7 @@
 
 like($json->{data}->{text}, qr/Ende Schuljahr eingestellt wird\.$/, 'Primary text');
 
-is($token->[1], '<>:base/s:t$<b>64<i>0<i>129<i>17<b>0', 'base/s');
+is($token->[1], '<>:base/s:t$<b>64<i>0<i>129<i>18<b>0', 'base/s');
 is($token->[2], '_0$<i>0<i>3', 'position');
 is($token->[3], 'corenlp/p:APPRART', 'corenlp');
 is($token->[5], 'opennlp/p:APPRART', 'opennlp');
diff --git a/t/real/goethe-2.t b/t/real/goethe-2.t
index ff3cba4..691889d 100644
--- a/t/real/goethe-2.t
+++ b/t/real/goethe-2.t
@@ -138,7 +138,7 @@
 like($first_token, qr/s:Autobiographische/, 'data');
 like($first_token, qr/_0\$<i>0<i>17/, 'data');
 like($first_token, qr!<>:dereko/s:s\$<b>64<i>0<i>30<i>2<b>4!, 'data');
-like($first_token, qr!<>:base\/s:t\$<b>64<i>0<i>35242<i>5233<b>0!, 'data');
+like($first_token, qr!<>:base\/s:t\$<b>64<i>0<i>35242<i>5234<b>0!, 'data');
 # like($first_token, qr!<>:base\/s:t\$<b>64<i>0<i>35250<i>5233<b>0!, 'data');
 like($first_token, qr!<>:base/s:s\$<b>64<i>0<i>30<i>2<b>2!, 'data');
 like($first_token, qr!-:base\/paragraphs\$\<i\>14!, 'data');
diff --git a/t/real/goethe.t b/t/real/goethe.t
index 5bc5eb2..03af7cc 100644
--- a/t/real/goethe.t
+++ b/t/real/goethe.t
@@ -138,7 +138,7 @@
 like($first_token, qr/s:Autobiographische/, 'data');
 like($first_token, qr/_0\$<i>0<i>17/, 'data');
 like($first_token, qr!<>:base/s:s\$<b>64<i>0<i>30<i>2<b>2!, 'data');
-like($first_token, qr!<>:base\/s:t\$<b>64<i>0<i>35199<i>5226<b>0!, 'data');
+like($first_token, qr!<>:base\/s:t\$<b>64<i>0<i>35199<i>5227<b>0!, 'data');
 
 ## OpenNLP
 $tokens->add('OpenNLP', 'Sentences');
diff --git a/t/real/hnc.t b/t/real/hnc.t
index 9aca507..5e44f6b 100644
--- a/t/real/hnc.t
+++ b/t/real/hnc.t
@@ -88,8 +88,9 @@
 
 my $output = decode_json( $tokens->to_json );
 
-is($output->{data}->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>4368<i>577<b>0', 't');
+is($output->{data}->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>4368<i>578<b>0', 't');
 is($output->{data}->{stream}->[0]->[3], 'i:addbot', 't');
+is($output->{data}->{stream}->[-1]->[0], '_577$<i>4359<i>4368', 't');
 
 
 ## Base
diff --git a/t/real/redew.t b/t/real/redew.t
index 25f369c..a5fe45b 100644
--- a/t/real/redew.t
+++ b/t/real/redew.t
@@ -114,15 +114,15 @@
 my $first = $output->{data}->{stream}->[0];
 
 is('-:tokens$<i>13',$first->[0]);
-is('<>:base/s:t$<b>64<i>0<i>197<i>12<b>0',$first->[1]);
-is('<>:dereko/s:text$<b>64<i>0<i>197<i>12<b>0',$first->[2]);
-is('<>:dereko/s:body$<b>64<i>118<i>197<i>12<b>1',$first->[3]);
-is('<>:dereko/s:p$<b>64<i>118<i>197<i>12<b>2',$first->[4]);
-is('<>:dereko/s:said$<b>64<i>118<i>197<i>12<b>3<s>1',$first->[5]);
-is('@:dereko/s:level:1$<b>17<s>1<i>12',$first->[6]);
-is('@:dereko/s:content:speech$<b>17<s>1<i>12',$first->[7]);
-is('@:dereko/s:mode:direct$<b>17<s>1<i>12',$first->[8]);
-is('@:dereko/s:id:1$<b>17<s>1<i>12',$first->[9]);
+is('<>:base/s:t$<b>64<i>0<i>197<i>13<b>0',$first->[1]);
+is('<>:dereko/s:text$<b>64<i>0<i>197<i>13<b>0',$first->[2]);
+is('<>:dereko/s:body$<b>64<i>118<i>197<i>13<b>1',$first->[3]);
+is('<>:dereko/s:p$<b>64<i>118<i>197<i>13<b>2',$first->[4]);
+is('<>:dereko/s:said$<b>64<i>118<i>197<i>13<b>3<s>1',$first->[5]);
+is('@:dereko/s:level:1$<b>17<s>1<i>13',$first->[6]);
+is('@:dereko/s:content:speech$<b>17<s>1<i>13',$first->[7]);
+is('@:dereko/s:mode:direct$<b>17<s>1<i>13',$first->[8]);
+is('@:dereko/s:id:1$<b>17<s>1<i>13',$first->[9]);
 is('_0$<i>123<i>128',$first->[10]);
 is("drukola/l:H\x{f6}rst",$first->[11]);
 is('drukola/m:msd:rfpos',$first->[12]);
@@ -135,11 +135,11 @@
 my $nine = join(',', @{$output->{data}->{stream}->[9]});
 like($nine, qr{drukola\/l:nichts}, 'Nichts');
 like($nine, qr{_9\$<i>170<i>176}, 'Term boundaries');
-unlike($nine, qr{<>:dereko/s:said\$<b>64<i>176<i>196<i>12<b>4<s>1}, 'Term boundaries');
+unlike($nine, qr{<>:dereko/s:said\$<b>64<i>176<i>196<i>13<b>4<s>1}, 'Term boundaries');
 
 my $ten = join(',', @{$output->{data}->{stream}->[10]});
 like($ten, qr{_10\$<i>177<i>180}, 'Term boundaries');
-like($ten, qr{<>:dereko/s:said\$<b>64<i>176<i>196<i>12<b>4<s>1}, 'Term boundaries');
+like($ten, qr{<>:dereko/s:said\$<b>64<i>176<i>196<i>13<b>4<s>1}, 'Term boundaries');
 
 my $eleven = join(',', @{$output->{data}->{stream}->[11]});
 like($eleven, qr{_11\$<i>181<i>188}, 'Term boundaries');
diff --git a/t/real/rei.t b/t/real/rei.t
index d50a732..e08516d 100644
--- a/t/real/rei.t
+++ b/t/real/rei.t
@@ -209,7 +209,7 @@
 is('<>:dereko/s:docTitle$<b>64<i>0<i>91<i>11<b>3', $first->[15]);
 is('<>:dereko/s:titlePart$<b>64<i>0<i>91<i>11<b>4<s>2', $first->[16]);
 is('<>:dereko/s:s$<b>64<i>0<i>91<i>11<b>5', $first->[17]);
-is('<>:base/s:t$<b>64<i>0<i>17859<i>2640<b>0', $first->[18]);
+is('<>:base/s:t$<b>64<i>0<i>17859<i>2641<b>0', $first->[18]);
 is('>:malt/d:ROOT$<b>33<i>0<i>48<i>0<i>7', $first->[19]);
 is('<:malt/d:PP$<b>32<i>1', $first->[20]);
 is('<:malt/d:ROOT$<b>34<i>0<i>48<i>7<i>0', $first->[21]);
@@ -298,8 +298,8 @@
 is('<>:dereko/s:titlePage$<b>64<i>0<i>61<i>8<b>2<s>1', $first->[17]);
 is('<>:dereko/s:docTitle$<b>64<i>0<i>61<i>8<b>3', $first->[18]);
 is('<>:opennlp/s:s$<b>64<i>0<i>173<i>24<b>0', $first->[19]);
-is('<>:base/s:t$<b>64<i>0<i>7008<i>1008<b>0', $first->[20]);
-is('<>:dereko/s:text$<b>64<i>0<i>7008<i>1008<b>0', $first->[21]);
+is('<>:base/s:t$<b>64<i>0<i>7008<i>1009<b>0', $first->[20]);
+is('<>:dereko/s:text$<b>64<i>0<i>7008<i>1009<b>0', $first->[21]);
 is('>:malt/d:GMOD$<b>32<i>3', $first->[22]);
 is('<:malt/d:ROOT$<b>34<i>0<i>51<i>6<i>3', $first->[23]);
 is('@:dereko/s:id:bng.00071-0-titlepage$<b>17<s>1<i>8', $first->[24]);
@@ -313,16 +313,17 @@
 is('tt/p:NE', $first->[32]);
 
 $last = $output->{data}->{stream}->[-1];
-is('<>:dereko/s:back$<b>65<i>7008<i>7008<i>1008<b>1', $last->[0]);
-is('<>:dereko/s:div$<b>65<i>7008<i>7008<i>1008<b>2<s>1', $last->[1]);
-is('@:dereko/s:n:1$<b>17<s>1', $last->[2]);
-is('@:dereko/s:type:footnotes$<b>17<s>1', $last->[3]);
-is('@:dereko/s:complete:y$<b>17<s>1', $last->[4]);
-is('_1008$<i>6990<i>7006', $last->[5]);
-is('corenlp/p:NN', $last->[6]);
-is('i:befreiungsschlag', $last->[7]);
-is('opennlp/p:NN', $last->[8]);
-is('s:Befreiungsschlag', $last->[9]);
+# No longer indexed:
+#is('<>:dereko/s:back$<b>65<i>7008<i>7008<i>1009<b>1', $last->[0]);
+#is('<>:dereko/s:div$<b>65<i>7008<i>7008<i>1009<b>2<s>1', $last->[1]);
+#is('@:dereko/s:n:1$<b>17<s>1', $last->[2]);
+#is('@:dereko/s:type:footnotes$<b>17<s>1', $last->[3]);
+#is('@:dereko/s:complete:y$<b>17<s>1', $last->[4]);
+is('_1008$<i>6990<i>7006', $last->[0]);
+is('corenlp/p:NN', $last->[1]);
+is('i:befreiungsschlag', $last->[2]);
+is('opennlp/p:NN', $last->[3]);
+is('s:Befreiungsschlag', $last->[4]);
 
 done_testing;
 __END__
diff --git a/t/script/base.t b/t/script/base.t
index 08baa64..7bfe48d 100644
--- a/t/script/base.t
+++ b/t/script/base.t
@@ -58,7 +58,7 @@
 
 is($token->[5], '<>:base/s:s$<b>64<i>0<i>30<i>2<b>2', 'struct');
 is($token->[7], '<>:dereko/s:s$<b>64<i>0<i>30<i>2<b>4', 'struct');
-is($token->[8], '<>:base/s:t$<b>64<i>0<i>35242<i>5238<b>0', 'struct');
+is($token->[8], '<>:base/s:t$<b>64<i>0<i>35242<i>5239<b>0', 'struct');
 
 $token = $stream->[4];
 is($token->[0], '<>:base/s:s$<b>64<i>53<i>254<i>32<b>2', 'struct');
diff --git a/t/sgbr/base.t b/t/sgbr/base.t
index 718a0ba..073209e 100644
--- a/t/sgbr/base.t
+++ b/t/sgbr/base.t
@@ -32,8 +32,8 @@
 
 is($stream->[0]->[0], '-:base/sentences$<i>1');
 is($stream->[0]->[1], '-:tokens$<i>15');
-is($stream->[0]->[2], '<>:base/s:t$<b>64<i>0<i>115<i>14<b>0');
-is($stream->[0]->[3], '<>:base/s:s$<b>64<i>16<i>114<i>14<b>2');
+is($stream->[0]->[2], '<>:base/s:t$<b>64<i>0<i>115<i>15<b>0');
+is($stream->[0]->[3], '<>:base/s:s$<b>64<i>16<i>114<i>15<b>2');
 is($stream->[0]->[4], '_0$<i>17<i>18');
 
 done_testing;
diff --git a/t/sgbr/lemma.t b/t/sgbr/lemma.t
index 3221dba..6635175 100644
--- a/t/sgbr/lemma.t
+++ b/t/sgbr/lemma.t
@@ -32,7 +32,7 @@
 
 my $stream = $data->{stream};
 is($stream->[0]->[0], '-:tokens$<i>51', 'Token number');
-is($stream->[0]->[1], '<>:base/s:t$<b>64<i>0<i>365<i>50<b>0', 'Text Boundary');
+is($stream->[0]->[1], '<>:base/s:t$<b>64<i>0<i>365<i>51<b>0', 'Text Boundary');
 is($stream->[0]->[2], '_0$<i>0<i>18', 'Position');
 is($stream->[0]->[3], 'i:sommerüberraschung', 'First term');
 is($stream->[0]->[4], 's:Sommerüberraschung', 'First term');
diff --git a/t/sgbr/pos.t b/t/sgbr/pos.t
index 0163ed1..d750cb1 100644
--- a/t/sgbr/pos.t
+++ b/t/sgbr/pos.t
@@ -33,7 +33,7 @@
 my $stream = $data->{stream};
 
 is($stream->[0]->[0], '-:tokens$<i>51', 'Token number');
-is($stream->[0]->[1], '<>:base/s:t$<b>64<i>0<i>365<i>50<b>0', 'Text boundary');
+is($stream->[0]->[1], '<>:base/s:t$<b>64<i>0<i>365<i>51<b>0', 'Text boundary');
 is($stream->[0]->[2], '_0$<i>0<i>18', 'Position');
 is($stream->[0]->[3], 'i:sommerüberraschung', 'First term');
 is($stream->[0]->[4], 's:Sommerüberraschung', 'First term');
diff --git a/t/tokenization.t b/t/tokenization.t
index eecb1d7..da135e5 100644
--- a/t/tokenization.t
+++ b/t/tokenization.t
@@ -73,7 +73,8 @@
 
 is($json->{data}->{name}, 'tokens');
 is($json->{data}->{tokenSource}, 'opennlp#tokens');
-is($json->{data}->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>6083<i>1067<b>0');
+is($json->{data}->{stream}->[0]->[0], '-:tokens$<i>1068');
+is($json->{data}->{stream}->[0]->[1], '<>:base/s:t$<b>64<i>0<i>6083<i>1068<b>0');
 
 done_testing;
 
diff --git a/t/transform.t b/t/transform.t
index 97c5a8b..c071f81 100644
--- a/t/transform.t
+++ b/t/transform.t
@@ -133,7 +133,7 @@
 
 is_deeply(
   _t2h($tokens->stream->pos(0)->to_string),
-  _t2h('[(0-1)s:A|i:a|_0$<i>0<i>1|-:tokens$<i>923|mate/p:XY|<>:base/s:s$<b>64<i>0<i>74<i>13<b>2|<>:base/s:t$<b>64<i>0<i>6083<i>922<b>0|-:base/sentences$<i>96]'),
+  _t2h('[(0-1)s:A|i:a|_0$<i>0<i>1|-:tokens$<i>923|mate/p:XY|<>:base/s:s$<b>64<i>0<i>74<i>13<b>2|<>:base/s:t$<b>64<i>0<i>6083<i>923<b>0|-:base/sentences$<i>96]'),
   'Startinfo'
 );
 
@@ -146,7 +146,7 @@
   '-:tokens$<i>923|'.
   'mate/p:XY|'.
   '<>:base/s:s$<b>64<i>0<i>74<i>13<b>2|'.
-  '<>:base/s:t$<b>64<i>0<i>6083<i>922<b>0|'.
+  '<>:base/s:t$<b>64<i>0<i>6083<i>923<b>0|'.
   '-:base/sentences$<i>96|'.
   '<>:base/s:p$<b>64<i>0<i>224<i>34<b>1|'.
   '-:base/paragraphs$<i>76|'.