Use Test::XML::Loy instead of Test::XML::Simple for performance reasons

Change-Id: I712e1e575808ca80930bd70b8c5cfd2eea0af684
diff --git a/LICENSE b/LICENSE
new file mode 100755
index 0000000..f1083af
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2015, IDS Mannheim
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, 
+   this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, 
+   this list of conditions and the following disclaimer in the documentation 
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 
+DAMAGE.
\ No newline at end of file
diff --git a/Makefile.PL b/Makefile.PL
index d4714f9..1467c1e 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -8,13 +8,14 @@
   NAME         => 'tei2korapxml',
   AUTHOR       => 'Peter Harders',
   ABSTRACT     => 'Conversion of TEI P5 based formats to KorAP-XML',
-  VERSION      => '0.1',
+  VERSION_FROM => 'script/tei2korapxml',
   LICENSE      => 'freebsd',
   BUILD_REQUIRES => {
     'Test::More' => 0,
     'Test::Output' => 0,
     'XML::Loy' => 0.49,
     'IO::Uncompress::Unzip' => '2.091',
+    'Pod::Usage'      => 0,
   },
   PREREQ_PM => {
     'XML::CompactTree::XS'     => '0.03',
@@ -22,5 +23,5 @@
     'IO::Compress::Zip' => '2.091',
   },
   MIN_PERL_VERSION => '5.016',
-  EXE_FILES => ['./script/tei2korapxml']
+  EXE_FILES => ['script/tei2korapxml']
 );
diff --git a/script/tei2korapxml b/script/tei2korapxml
index d2c5fb9..3f6d717 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -23,6 +23,7 @@
 use open qw(:std :utf8); # assume utf-8 encoding
 use Encode qw(encode_utf8 decode_utf8);
 
+use Pod::Usage;
 use Getopt::Long;
 use XML::CompactTree::XS;
 use XML::LibXML::Reader;
@@ -32,6 +33,9 @@
 my $zip; my $first_write=1;
 my $outh = *STDOUT;
 
+our $VERSION = '0.01';
+our $VERSION_MSG = "\ntei2korapxml - v$VERSION\n";
+
 my $_CORR_POS_FOR_EMPTY_S_TAGS = 1;        # this should only be deactivated for test purposes (empty s-tags produce an additional blank (look for '  ' in data.xml)
 my $_CORR_BYTE_POS_FOR_P_TAGS  = 1;
 
@@ -125,15 +129,25 @@
 my $inside_annot_tag = -1;
 ##
 
-my $help;
-
 GetOptions(
-  "base=s" => \$_root_dir,
-  "help"   => \$help
+  'base|b=s' => \$_root_dir,
+  'help|h'   => sub {
+    pod2usage(
+      -verbose => 99,
+      -sections => 'NAME|DESCRIPTION|SYNOPSIS|ARGUMENTS|OPTIONS',
+      -msg => $VERSION_MSG,
+      -output => '-'
+    )
+  },
+  'version|v' => sub {
+    pod2usage(
+      -verbose => 0,
+      -msg => $VERSION_MSG,
+      -output => '-'
+    )
+  }
 );
 
-printhelp() if $help;
-
 ######
 ###### MAIN
 ######
@@ -975,15 +989,80 @@
 
 } # end: sub write_annot
 
-sub printhelp {
-    print STDERR <<EOHELP;
-This program is called from inside another script.
-EOHELP
-    exit();
-}
 
 sub startTokenizer {
   $pid = open2($chld_out, $chld_in, 'java  -cp '. join(":", ".", glob(&dirname(__FILE__)."/../target/*.jar"))." de.ids_mannheim.korap.tokenizer.KorAPTokenizerImpl");
   $select = IO::Select->new();
   $select->add(*$chld_out);
 }
+
+__END__
+
+=pod
+
+=encoding utf8
+
+=head1 NAME
+
+tei2korapxml - Conversion of TEI P5 based formats to KorAP-XML
+
+=head1 SYNOPSIS
+
+  cat corpus.i5.xml | tei2korapxml > corpus.korapxml.zip
+
+=head1 DESCRIPTION
+
+C<tei2korapxml> is a script to convert TEI P5 and I5 based documents
+to the KorAP-XML format. If no specific input is defined, data is
+read from C<STDIN>. If no specific output is defined, data is written
+to C<STDOUT>.
+This program is usually called from inside another script.
+
+=head1 INSTALLATION
+
+C<tei2korapxml> requires L<libxml2-dev> bindings to build. When
+these bindings are available, the preferred way to install the script is
+to use L<cpanm|App::cpanminus>.
+
+  $ cpanm https://github.com/KorAP/KorAP-XML-TEI.git
+
+In case everything went well, the C<tei2korapxml> tool will
+be available on your command line immediately.
+Minimum requirement for L<KorAP::XML::TEI> is Perl 5.16.
+
+=head1 OPTIONS
+
+=over 2
+
+=item B<--base|-b>
+
+The base directory for output. Defaults to C<.>.
+
+=item B<--help|-h>
+
+Print help information.
+
+=item B<--version|-v>
+
+Print version information.
+
+=back
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2020, L<IDS Mannheim|https://www.ids-mannheim.de/>
+
+Author: Peter Harders
+
+Contributors: Marc Kupietz, Carsten Schnober, Nils Diewald
+
+L<KorAP::XML::TEI> is developed as part of the L<KorAP|https://korap.ids-mannheim.de/>
+Corpus Analysis Platform at the
+L<Leibniz Institute for the German Language (IDS)|http://ids-mannheim.de/>,
+member of the
+L<Leibniz-Gemeinschaft|http://www.leibniz-gemeinschaft.de/>.
+
+This program is free software published under the
+L<BSD-2 License|https://raw.githubusercontent.com/KorAP/KorAP-XML-TEI/master/LICENSE>.
+
+=cut
diff --git a/t/script.t b/t/script.t
index 6bdbc9e..101db08 100644
--- a/t/script.t
+++ b/t/script.t
@@ -14,12 +14,19 @@
 my $script = catfile($f, '..', 'script', 'tei2korapxml');
 ok(-f $script, 'Script found');
 
-stderr_is(
+stdout_like(
   sub { system('perl', $script, '--help') },
-  "This program is called from inside another script.\n",
+  qr!This\s*program\s*is\s*usually\s*called\s*from\s*inside\s*another\s*script\.!,
   'Help'
 );
 
+stdout_like(
+  sub { system('perl', $script, '--version') },
+  qr!tei2korapxml - v\d+?\.\d+?!,
+  'Version'
+);
+
+
 # Load example file
 my $file = catfile($f, 'data', 'goe_sample.i5.xml');
 my $outzip = tmpnam();
@@ -48,6 +55,7 @@
   ->text_is('h\.author', 'Goethe, Johann Wolfgang von', 'h.author')
   ->text_is('pubDate[type=year]', '1982', 'pubDate');
 
+
 # Uncompress GOE/AGA/header.xml from zip file
 $zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/header.xml');