Handle tokenizer crashes more gracefully

Change-Id: I6b7300fd81e19ec608d892331efcdcea5611dfbc
diff --git a/t/tokenization-external.t b/t/tokenization-external.t
index 874f0fe..ac72e68 100644
--- a/t/tokenization-external.t
+++ b/t/tokenization-external.t
@@ -3,6 +3,7 @@
 use Test::More;
 use File::Basename 'dirname';
 use File::Spec::Functions qw/catfile/;
+use File::Temp qw/tempfile/;
 use Test::XML::Loy;
 
 use FindBin;
@@ -16,6 +17,7 @@
 
 my $f = dirname(__FILE__);
 my $cmd = catfile($f, 'cmd', 'tokenizer.pl');
+my $faulty_cmd = catfile($f, 'cmd', 'tokenizer_faulty.pl');
 
 # Test aggressive
 my $ext = KorAP::XML::TEI::Tokenizer::External->new(
@@ -55,5 +57,24 @@
 $t->attr_is('layer spanList span:nth-child(2)', 'to', 6);
 $t->element_count_is('layer spanList span', 2);
 
+my (undef, $state_file) = tempfile();
+
+$ext = KorAP::XML::TEI::Tokenizer::External->new(
+  "perl $faulty_cmd '$state_file'"
+);
+$ext->tokenize("Der __CRASH_ONCE__ Mann");
+$str = $ext->to_string('retry-doc');
+ok($str, 'Tokenization succeeds after restarting the external tokenizer');
+$t = Test::XML::Loy->new($str);
+$t->element_exists('layer spanList span:nth-child(1)', 'Retry produces token bounds');
+
+$ext->tokenize("Der __ALWAYS_CRASH__ Mann");
+ok(!defined $ext->to_string('skip-doc'), 'Tokenization can be skipped after repeated crashes');
+
+$ext->tokenize("Der alte Mann");
+$str = $ext->to_string('recovered-doc');
+ok($str, 'Tokenizer can continue after a skipped text');
+$t = Test::XML::Loy->new($str);
+$t->element_count_is('layer spanList span', 3);
 
 done_testing;