Added preliminary support for C2 def-files in VC conversion tool

Change-Id: If2a6a24e7401bc1222597670fb38b5cba7e3aa80
diff --git a/tools/list2vc.pl b/tools/list2vc.pl
index c632ec7..508f88f 100755
--- a/tools/list2vc.pl
+++ b/tools/list2vc.pl
@@ -1,24 +1,13 @@
 #!/usr/bin/env perl
-
-
-
-package main;
 use strict;
 use warnings;
 
+# 2020-05-20
+#   Preliminary support for C2 def-files.
+
+
 our @ARGV;
 
-sub shorten ($) {
-  my $line = shift;
-  if (length($line) < 20) {
-    return $line;
-  }
-  else {
-    return substr($line,0,17) . '...';
-  };
-};
-
-
 unless (@ARGV) {
   print <<'HELP';
 Convert a line-separated list of corpus sigles, doc sigles or
@@ -31,6 +20,18 @@
 exit 0;
 };
 
+
+sub shorten ($) {
+  my $line = shift;
+  if (length($line) < 20) {
+    return $line;
+  }
+  else {
+    return substr($line,0,17) . '...';
+  };
+};
+
+
 my $fh;
 if ($ARGV[0] eq '-') {
   $fh = *STDIN;
@@ -58,23 +59,54 @@
     next;
   };
 
+  my ($key, $value, $desc);
+
+  # Line-Type: <e>c</a>
+  if ($line =~ /^\s*<([^>]+)>\s*([^<]*)\s*<\/\1>\s*$/) {
+    $key = $1;
+    $value = $2 // undef;
+  }
+
+  # Line-Type: <e>c
+  elsif($line =~ /^\s*<([^>]+)>\s*([^<]+)\s*$/) {
+    $key = $1;
+    $value = $2;
+  }
+
   # Get text sigles
-  if ($line =~ m!^([^\/]+\/){2}[^\/]+$!) {
-    push @{$data{text}}, $line;
+  elsif ($line =~ m!^(?:[^\/\s]+\/){2}[^\/\s]+$!) {
+    $key = 'text';
+    $value = $line;
   }
 
   # Get doc sigles
-  elsif ($line =~ m!^[^\/]+\/[^\/]+$!) {
-    push @{$data{doc}}, $line;
+  elsif ($line =~ m!^([^\/\s]+\/[^\/\s]+?)(?:\s.+?)?$!) {
+    $key = 'doc';
+    $value = $1;
   }
 
   # Get corpus sigles
-  elsif ($line !~ m!\/!) {
-    push @{$data{corpus}}, $line;
+  elsif ($line !~ m!(?:\/|\s)!) {
+    $key = 'corpus';
+    $value = $line;
   }
 
+  # Not known
   else {
     warn shorten($line) . q! isn't a valid sigle!;
+    next;
+  };
+
+  if ($key eq 'text') {
+    push @{$data{text}}, $value;
+  }
+
+  elsif ($key eq 'doc') {
+    push @{$data{doc}}, $value;
+  }
+
+  elsif ($key eq 'corpus') {
+    push @{$data{corpus}}, $value;
   };
 };