c2k: use Getopt::Long, Log::Any, and Pod::Usage

Change-Id: Ifc17f23e3b5c18ee7a2713855c6b4e55f0091dfd
diff --git a/script/conllu2korapxml b/script/conllu2korapxml
index 0058618..b4aa17a 100755
--- a/script/conllu2korapxml
+++ b/script/conllu2korapxml
@@ -2,37 +2,51 @@
 use strict;
 use warnings;
 use POSIX;
-use Getopt::Std;
+use Getopt::Long qw(GetOptions :config no_auto_abbrev);
+use Log::Any '$log';
+use Log::Any::Adapter;
 use Encode;
 use IO::Compress::Zip qw(zip $ZipError :constants);
 use File::Basename;
+use Pod::Usage;
 
 my $_COMPRESSION_METHOD = ZIP_CM_DEFLATE;
 my %opts;
 my %processedFilenames;
 
 our $VERSION = '0.4.1.9000';
+our $VERSION_MSG = "\nconllu2korapxml - v$VERSION\n";
 
-my $usage=<<EOF;
-Usage: $0 [options] [CoNLL-U-FILE...]
+use constant {
+    # Set to 1 for minimal more debug output (no need to be parametrized)
+    DEBUG => $ENV{KORAPXMLCONLLU_DEBUG} // 0
+};
 
-Options:
- -d        debug
-Description:
- Converts CoNLL-U files that follow KorAP-specific comment conventions
- and contain morphosyntactic and/or dependency annotations to
- corresponding KorAP-XML zip files.
+GetOptions(
+    'force-foundry|f=s'            => \(my $foundry_name = ''),
+    'log|l=s'                      => \(my $log_level = 'warn'),
 
-Examples:
- $0 zca20.spacy.conllu > zca20.spacy.zip
+    'help|h'                       => sub {
+      pod2usage(
+          -verbose  => 99,
+          -sections => 'NAME|DESCRIPTION|SYNOPSIS|ARGUMENTS|OPTIONS|EXAMPLES',
+          -msg      => $VERSION_MSG,
+          -output   => '-'
+      )
+    },
+    'version|v'                    => sub {
+      pod2usage(
+          -verbose => 0,
+          -msg     => $VERSION_MSG,
+          -output  => '-'
+      );
+    }
+);
 
- $0 < zca20.spacy.conllu > zca20.spacy.zip
-EOF
-
-
-getopts('hd', \%opts);
-die $usage if($opts{h});
-my $debug=($opts{d}? 1 : 0);
+# Establish logger
+binmode(STDERR, ':encoding(UTF-8)');
+Log::Any::Adapter->set('Stderr', log_level => $log_level);
+$log->notice('Debugging is activated') if DEBUG;
 
 my $docid="";
 my $zip = undef;
@@ -48,7 +62,6 @@
 
 my ($write_morpho, $write_syntax, $base) = (1, 0, 0);
 my $filename;
-my $foundry_name;
 my $first=1;
 my @conllu_files = @ARGV;
 push @conllu_files, "-" if (@conllu_files == 0);
@@ -70,19 +83,21 @@
         $first=0;
       }
       if($processedFilenames{$filename}) {
-        print STDERR "WARNING: $filename is already processed\n";
+        $log->warn("WARNING: $filename is already processed");
       }
       $processedFilenames{$filename}=1;
       $i=0;
     } elsif(/^#\s*foundry\s*[:=]\s*(.*)/) {
-      $foundry_name=$1;
-      print STDERR "Foundry: $foundry_name\n" if($debug);
+      if(!$foundry_name) {
+        $foundry_name = $1;
+        $log->debug("Foundry: $foundry_name\n");
+      }
     } elsif(/^(?:#|0\.2)\s+.*id\s*[:=]\s*(.*)/) {
       $docid=$1;
       my $docSigle = $docid;
       $docSigle =~ s/\..*//;
       if($docSigle ne $lastDocSigle) {
-        print STDERR "Analyzing $docSigle\n";
+        $log->info("Analyzing $docSigle");
         $lastDocSigle = $docSigle;
       }
       $known=$unknown=0;
@@ -101,7 +116,7 @@
       my @parsed=split('\t');
       chomp  $parsed[9];
       if(@parsed != 10) {
-        print STDERR "WARNING: skipping strange parser output line in $docid\n";
+        $log->warn("WARNING: skipping strange parser output line in $docid");
         $i++;
         next;
       }
@@ -125,7 +140,6 @@
    <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
     <f name="lex">
      <fs>
-      <f name="lemma">$parsed[2]</f>
       <f name="pos">$parsed[3]</f>
 );
       $morpho .= qq(      <f name="msd">$parsed[5]</f>\n) if($parsed[5] ne "_");
@@ -184,4 +198,70 @@
 <layer docid="$docid" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
 <spanList>
 ));
-}
\ No newline at end of file
+}
+
+=pod
+
+=encoding utf8
+
+=head1 NAME
+
+conllu2korapxml - Conversion of KorAP-XML CoNLL-U to KorAP-XML zips
+
+=head1 SYNOPSIS
+
+  conllu2korapxml < zca15.tree_tagger.conllu > zca15.tree_tagger.zip
+
+=head1 DESCRIPTION
+
+C<conllu2korapxml> converts CoNLL-U files that follow KorAP-specific comment conventions
+ and contain morphosyntactic and/or dependency annotations to
+ corresponding KorAP-XML zip files.
+
+=head1 INSTALLATION
+
+  $ cpanm https://github.com/KorAP/KorAP-XML-CoNLL-U.git
+
+=head1 OPTIONS
+
+=over 2
+
+=item B<--force-foundry|-f>
+
+Set foundry name and ignore foundry names in the input.
+
+=item B<--help|-h>
+
+Print help information.
+
+=item B<--version|-v>
+
+Print version information.
+
+
+=item B<--log|-l>
+
+Loglevel for I<Log::Any>. Defaults to C<warn>.
+
+=back
+
+=head1 EXAMPLES
+
+ conllu2korapxml -f tree_tagger < t/data/wdf19.morpho.conllu > wdf19.tree_tagger.zip
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2021, L<IDS Mannheim|https://www.ids-mannheim.de/>
+
+Author: Marc Kupietz
+
+Contributors: Nils Diewald
+
+L<KorAP::XML::CoNNL-U> is developed as part of the L<KorAP|https://korap.ids-mannheim.de/>
+Corpus Analysis Platform at the
+L<Leibniz Institute for the German Language (IDS)|http://ids-mannheim.de/>,
+member of the
+L<Leibniz-Gemeinschaft|http://www.leibniz-gemeinschaft.de/>.
+
+This program is free software published under the
+L<BSD-2 License|https://opensource.org/licenses/BSD-2-Clause>.