Send <EOT>\n to external tokenizer for separating texts

This is now the standard for the KorAP tokenizer.

Change-Id: I30c2d6ca82211b1d312364899d4f56ea7908b4f8
diff --git a/t/tokenization-external.t b/t/tokenization-external.t
index 742c656..874f0fe 100644
--- a/t/tokenization-external.t
+++ b/t/tokenization-external.t
@@ -6,6 +6,8 @@
 use Test::XML::Loy;
 
 use FindBin;
+use utf8;
+
 BEGIN {
   unshift @INC, "$FindBin::Bin/../lib";
 };
@@ -22,10 +24,6 @@
 );
 
 $ext->tokenize("Der alte Mann");
-# TODO:
-#   see comments on $sep in 'lib/KorAP/XML/TEI/Tokenizer/External.pm'
-#$ext->tokenize("ging über die Straße");
-
 my $str = $ext->to_string('unknown');
 my $t = Test::XML::Loy->new($str);
 $t->attr_is('layer spanList span:nth-child(1)', 'to', 3);
@@ -35,8 +33,20 @@
 $t->attr_is('layer spanList span:nth-child(3)', 'to', 13);
 $t->element_count_is('layer spanList span', 3);
 
+$ext->tokenize("ging über die Straße");
+$str = $ext->to_string('unknown');
+$t = Test::XML::Loy->new($str);
+$t->attr_is('layer spanList span:nth-child(1)', 'to', 4);
+$t->attr_is('layer spanList span:nth-child(2)', 'from', 5);
+$t->attr_is('layer spanList span:nth-child(2)', 'to', 9);
+$t->attr_is('layer spanList span:nth-child(3)', 'from', 10);
+$t->attr_is('layer spanList span:nth-child(3)', 'to', 13);
+$t->attr_is('layer spanList span:nth-child(4)', 'from', 14);
+$t->attr_is('layer spanList span:nth-child(4)', 'to', 20);
+$t->element_count_is('layer spanList span', 4);
+
 $ext->reset;
-$ext->tokenize("Hu aha\ndas ist cool");
+$ext->tokenize("Hu aha\x{04}\ndas ist cool");
 
 $str = $ext->to_string('unknown');
 $t = Test::XML::Loy->new($str);