Handle tokenizer crashes more gracefully

Change-Id: I6b7300fd81e19ec608d892331efcdcea5611dfbc
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 3d35077..f8a26c2 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -10,7 +10,7 @@
 
 use File::Basename qw(dirname);
 
-use Encode qw(decode);
+use Encode qw(decode encode);
 
 use FindBin;
 BEGIN {
@@ -356,14 +356,22 @@
           # Tokenize with external tokenizer
           if ($ext_tok) {
 
-            # Tokenize and output
-            $ext_tok->tokenize($data->data)->to_zip(
-              $zipper->new_stream("$dir/$base_dir/${tokens_file}.xml"),
-              $text_id_esc
-            );
+            my $tokens_output = eval {
+              $ext_tok->tokenize($data->data)->to_string($text_id_esc);
+            };
 
-            if ($use_tokenizer_sentence_splits) {
-              $ext_tok->sentencize_from_previous_input($inline->structures);
+            if (my $err = $@) {
+              $err =~ s/\s+$//;
+              $log->error("Skipping external tokenization for '$text_id_esc': $err");
+              $ext_tok->reset;
+            }
+            elsif (defined $tokens_output) {
+              $zipper->new_stream("$dir/$base_dir/${tokens_file}.xml")
+                ->print(encode('UTF-8', $tokens_output));
+
+              if ($use_tokenizer_sentence_splits) {
+                $ext_tok->sentencize_from_previous_input($inline->structures);
+              };
             };
           };