korapxml2conllu: add option --colmuns=<n>

Print only n columns and only the token if n=1. Mainly useful to pass data
tools that are not CoNLL-U-ready.

Change-Id: I078093b2484cc9ef9eb40b87c63ec3784d8eae38
diff --git a/script/korapxml2conllu b/script/korapxml2conllu
index 2dd912e..386567f 100755
--- a/script/korapxml2conllu
+++ b/script/korapxml2conllu
@@ -31,6 +31,7 @@
   'sigle-pattern|p=s'            => \(my $sigle_pattern = ''),
   'extract-attributes-regex|e=s' => \(my $extract_attributes_regex = ''),
   'log|l=s'                      => \(my $log_level = 'warn'),
+  'columns|c=n'                  => \(my $columns = 10),
 
   'help|h'                       => sub {
     pod2usage(
@@ -195,7 +196,11 @@
         push @current_lines, \@vals;
         $known++;
         $conll[$ID_idx] = $#current_lines+1;
-        $current .= join("\t", @conll) . "\n"; # conll columns
+        if ($columns == 1) {
+          $current .= "$conll[1]\n";
+        } else {
+          $current .= join("\t", @conll[0..$columns-1]) . "\n"; # conll columns
+        }
         fetch_plaintext($docid);
         if ($sentence_ends{$docid}{$current_to}) {
           $current .= "\n";
@@ -213,7 +218,11 @@
       push @current_lines, \@vals;
       # convert gathered information to CONLL
       $conll[$ID_idx] = $#current_lines+1;
-      $current .= join("\t", @conll) . "\n"; # conll columns
+      if ($columns == 1) {
+        $current .= "$conll[1]\n";
+      } else {
+        $current .= join("\t", @conll[0..$columns-1]) . "\n"; # conll columns
+      }
       if($conll[$XPOS_idx] eq '$.' || ($conll[$XPOS_idx] eq 'SENT' && $token eq '.') || $known + $unknown >= $MAX_SENTENCE_LENGTH) {
         $current .= "\n";
         if($known + $unknown > 0) { # only print sentence if it contains some words
@@ -383,6 +392,10 @@
 
 Extract element/attribute regular expressions to comments.
 
+=item B<--columns>=I<int> | B<-c> I<int>
+
+Print n columns (default: 10). If n=1, only the token itself is printed.
+
 =item B<--help|-h>
 
 Print help information.