Add -tk option to use the standard KoAP tokenizer

Change-Id: I992fe37463926c8ecbca933fbb709f8640d6fb93
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 0546658..ab1975c 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -28,6 +28,10 @@
 use KorAP::XML::TEI::Zipper;
 use KorAP::XML::TEI::Header;
 
+eval {
+  require KorAP::XML::TEI::Tokenizer::KorAP;
+  1;
+};
 
 our $VERSION = '0.01';
 
@@ -39,6 +43,7 @@
   "root|r=s"  => \(my $_root_dir = '.'),  # name of root directory inside zip file
   "input|i=s" => \(my $input_fname = ''), # input file (yet only TEI I5 Format accepted)
   'tokenizer-call|tc=s' => \(my $tokenizer_call), # Temporary argument for testing purposes
+  'tokenizer-korap|tk' => \(my $tokenizer_korap), # use KorAP-tokenizer
   'use-intern-tokenization|ti' => \(my $tokenizer_intern), # use intern tokenization (default = no)
   'log|l=s' => \(my $log_level = 'notice'),
   'help|h'    => sub {
@@ -76,13 +81,18 @@
 #
 
 ## extern tokenization
-my $_GEN_TOK_EXT = $tokenizer_call ? 1 : 0;
+my $_GEN_TOK_EXT = $tokenizer_call || $tokenizer_korap ? 1 : 0;
+
   # TODO:
   #   Read tokenizer call from configuration file.
   #   was 'java  -cp '. join(":", ".", glob(&dirname(__FILE__)."/../target/*.jar")). " de.ids_mannheim.korap.tokenizer.KorAPTokenizerImpl";
   my $ext_tok;
   if ($tokenizer_call) {
     $ext_tok = KorAP::XML::TEI::Tokenizer::External->new($tokenizer_call);
+  }
+
+  elsif ($tokenizer_korap) {
+    $ext_tok = KorAP::XML::TEI::Tokenizer::KorAP->new;
   };
   my $_tok_file_ext  = "tokens.xml";
 ##
@@ -1044,6 +1054,10 @@
 Call an external tokenizer process, that will tokenize
 a single line from STDIN and outputs one token per line.
 
+=item B<--tokenizer-korap|-tk>
+
+Use the standard KorAP/DeReKo tokenizer.
+
 =item B<--use-intern-tokenization|-ti>
 
 Tokenize the data using two embedded tokenizers,