Fix possible IO deadlocks with KorAP tokenizer

Text separators should always have a newline in front of artificial EOTs
to make sure they are recognized and to avoid them being consumed
by regular expressions for tokens.

Change-Id: I528c903904da50312a7472c7a34775476b0955be
diff --git a/t/script.t b/t/script.t
index fbe28bf..773bb60 100644
--- a/t/script.t
+++ b/t/script.t
@@ -175,6 +175,26 @@
     ->element_count_is('spanList span', 227);
 };
 
+subtest 'Check KorAP tokenizer for infinite loop bug' => sub {
+
+  my $file = catfile($f, 'data', 'korap_tokenizer_challenge.xml');
+
+  eval {
+    require KorAP::XML::TEI::Tokenizer::KorAP;
+    1;
+  } or do {
+    plan skip_all => "KorAP::XML::TEI::Tokenizer::KorAP cannot be used";
+  };
+
+  test_tei2korapxml(
+    file => $file,
+    param => "-tk -s",
+    tmp => 'script_bug_check'
+  )
+    ->stderr_like(qr!tei2korapxml: .*? text_id=WDD19_H0039\.87242!)
+    ->file_readable('WDD19/H0039/87242/struct/structure.xml');
+};
+
 subtest 'Sentence split with KorAP tokenizer' => sub {
 
   eval {