Handle UDPipe comments and ignore non-interpretable comments

TODO:
* handle XPosTags (column 5)?
* convert more metadata (udpipe_model_licence, ...)

Resolves #1, #2

Change-Id: Ic29125bdcdf7ba9bb8d84c94757a72cea6bcf500
diff --git a/script/conllu2korapxml b/script/conllu2korapxml
index a9bb030..6de032f 100755
--- a/script/conllu2korapxml
+++ b/script/conllu2korapxml
@@ -75,43 +75,54 @@
   my $i=0; my $s=0; my $first_in_sentence=0;
   my $lastDocSigle="";
   while (<$fh>) {
-    if(/^(?:#|0\.1)\s+filename\s*[:=]\s*(.*)/) {
-      $filename=$1;
-      if(!$first) {
-        closeDoc(0);
-      } else {
-        $first=0;
+    if(/^\s*(?:#|0\.\d)/) {
+      if(/^(?:#|0\.1)\s+filename\s*[:=]\s*(.*)/) {
+        $filename=$1;
+        if(!$first) {
+          closeDoc(0);
+        } else {
+          $first=0;
+        }
+        if($processedFilenames{$filename}) {
+          $log->warn("WARNING: $filename is already processed");
+        }
+        $processedFilenames{$filename}=1;
+        $i=0;
+      } elsif(/^#\s*foundry\s*[:=]\s*(.*)/) {
+        if(!$foundry_name) {
+          $foundry_name = $1;
+          $log->debug("Foundry: $foundry_name\n");
+        } else {
+          $log->debug("Ignored foundry name: $1\n");
+        }
+      } elsif(/^#\s*generator\s*[=]\s*udpipe/i) {
+        if(!$foundry_name) {
+          $foundry_name = "ud";
+          $log->debug("Foundry: $foundry_name\n");
+        } else {
+          $log->debug("Ignored foundry name: ud\n");
+        }
+      } elsif(/^(?:#|0\.2)\s+.*id\s*[:=]\s*(.*)/) {
+        $docid=$1;
+        my $docSigle = $docid;
+        $docSigle =~ s/\..*//;
+        if($docSigle ne $lastDocSigle) {
+          $log->info("Analyzing $docSigle");
+          $lastDocSigle = $docSigle;
+        }
+        $known=$unknown=0;
+        $current="";
+        $parser_file = dirname($filename);
+        $parser_file =~ s@(.*)/[^/]+$@$1@;
+        $morpho_file = $parser_file;
+        $morpho_file .= "/$foundry_name/morpho.xml";
+        $parser_file .= "/$foundry_name/dependency.xml";
+        $parse = $morpho = layer_header($docid);
+      }  elsif (/^(?:#|0\.3)\s+(?:start_offsets|from)\s*[:=]\s*(.*)/) {
+        @spansFrom = split(/\s+/, $1);
+      }  elsif (/^(?:#|0\.4)\s+(?:end_offsets|to)\s+[:=]\s*(.*)/) {
+        @spansTo = split(/\s+/, $1);
       }
-      if($processedFilenames{$filename}) {
-        $log->warn("WARNING: $filename is already processed");
-      }
-      $processedFilenames{$filename}=1;
-      $i=0;
-    } elsif(/^#\s*foundry\s*[:=]\s*(.*)/) {
-      if(!$foundry_name) {
-        $foundry_name = $1;
-        $log->debug("Foundry: $foundry_name\n");
-      }
-    } elsif(/^(?:#|0\.2)\s+.*id\s*[:=]\s*(.*)/) {
-      $docid=$1;
-      my $docSigle = $docid;
-      $docSigle =~ s/\..*//;
-      if($docSigle ne $lastDocSigle) {
-        $log->info("Analyzing $docSigle");
-        $lastDocSigle = $docSigle;
-      }
-      $known=$unknown=0;
-      $current="";
-      $parser_file = dirname($filename);
-      $parser_file =~ s@(.*)/[^/]+$@$1@;
-      $morpho_file = $parser_file;
-      $morpho_file .= "/$foundry_name/morpho.xml";
-      $parser_file .= "/$foundry_name/dependency.xml";
-      $parse = $morpho = layer_header($docid);
-    }  elsif (/^(?:#|0\.3)\s+(?:start_offsets|from)\s*[:=]\s*(.*)/) {
-      @spansFrom = split(/\s+/, $1);
-    }  elsif (/^(?:#|0\.4)\s+(?:end_offsets|to)\s+[:=]\s*(.*)/) {
-      @spansTo = split(/\s+/, $1);
     } elsif (! /^\s*$/) {
       my @parsed=split('\t');
       chomp  $parsed[9];