Improve error handling for docs without text ids or offset

Change-Id: I48ecdd587ea0ef5b4e95a1f3244f1374caeb9613
diff --git a/script/conllu2korapxml b/script/conllu2korapxml
index 6de032f..33e042c 100755
--- a/script/conllu2korapxml
+++ b/script/conllu2korapxml
@@ -74,7 +74,7 @@
   }
   my $i=0; my $s=0; my $first_in_sentence=0;
   my $lastDocSigle="";
-  while (<$fh>) {
+  MAIN: while (<$fh>) {
     if(/^\s*(?:#|0\.\d)/) {
       if(/^(?:#|0\.1)\s+filename\s*[:=]\s*(.*)/) {
         $filename=$1;
@@ -102,7 +102,7 @@
         } else {
           $log->debug("Ignored foundry name: ud\n");
         }
-      } elsif(/^(?:#|0\.2)\s+.*id\s*[:=]\s*(.*)/) {
+      } elsif(/^(?:#|0\.2)\s+text_id\s*[:=]\s*(.*)/) {
         $docid=$1;
         my $docSigle = $docid;
         $docSigle =~ s/\..*//;
@@ -123,10 +123,23 @@
       }  elsif (/^(?:#|0\.4)\s+(?:end_offsets|to)\s+[:=]\s*(.*)/) {
         @spansTo = split(/\s+/, $1);
       }
-    } elsif (! /^\s*$/) {
+    } elsif ( !/^\s*$/ ) {
+      if ( !$docid || scalar @spansTo == 0 || scalar @spansFrom == 0 ) {
+        if ( !$docid ) {
+          $log->warn("WARNING: No valid input document: text_id (e.g. '# text_id = GOE_AGA.00000') missing");
+        }
+        if ( scalar @spansTo == 0 || scalar @spansFrom == 0 ) {
+          $log->warn("WARNING: No valid input document: token offsets missing");
+        }
+
+        # Skip to next potentially valid document
+        while (<$fh>) {
+          next MAIN if m!^\s*$!s;
+        }
+      };
       my @parsed=split('\t');
       chomp  $parsed[9];
-      if(@parsed != 10) {
+      if (@parsed != 10) {
         $log->warn("WARNING: skipping strange parser output line in $docid");
         $i++;
         next;
@@ -175,7 +188,7 @@
   }
   $current .= "\n";
   closeDoc(1);
-  $zip->close();
+  $zip->close() if $zip;
   close($fh);
 }
 exit;
@@ -194,11 +207,11 @@
 }
 
 sub closeDoc {
-  if ($write_morpho) {
+  if ($write_morpho && $morpho_file) {
     newZipStream($morpho_file);
     $zip->print($morpho, qq( </spanList>\n</layer>\n));
   }
-  if ($write_syntax) {
+  if ($write_syntax && $parser_file) {
     $write_syntax = 0;
     newZipStream($parser_file);
     $zip->print($parse, qq(</spanList>\n</layer>\n));