Introduce --no-tokenizer parameter

Change-Id: Icc12c24dd3d01b0e31d14b6d5a6c6214da0d9918
diff --git a/t/script.t b/t/script.t
index e7b93f3..80b67b5 100644
--- a/t/script.t
+++ b/t/script.t
@@ -503,7 +503,8 @@
   my $t = test_tei2korapxml(
     file => $file,
     env => 'KORAPXMLTEI_INLINE=1',
-    tmp => 'script_tagged'
+    tmp => 'script_tagged',
+    param => '--no-tokenizer'
   )
     ->stderr_like(qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!)
     ->stderr_like(qr!KORAPXMLTEI_INLINE is deprecated!)
@@ -586,7 +587,7 @@
   my $t = test_tei2korapxml(
     file => $file,
     tmp => 'script_tagged',
-    param => '--inline-tokens=myfoundry#myfile --skip-inline-token-annotations=0'
+    param => '--inline-tokens=myfoundry#myfile --skip-inline-token-annotations=0 --no-tokenizer'
   )
     ->stderr_like(qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!)
     ->stderr_unlike(qr!KORAPXMLTEI_INLINE is deprecated!)
@@ -606,7 +607,7 @@
   $t = test_tei2korapxml(
     file => $file,
     tmp => 'script_tagged',
-    param => '--inline-tokens=myfoundry --skip-inline-token-annotations=0'
+    param => '--inline-tokens=myfoundry --skip-inline-token-annotations=0 --no-tokenizer'
   )
     ->stderr_like(qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!)
 
@@ -632,14 +633,14 @@
 
   # Generate zip file (unportable!)
   stderr_like(
-    sub { `cat '$file' | perl '$script' --skip-token-inline-annotations=0 - > '$outzip'` },
+    sub { `cat '$file' | perl '$script' --skip-token-inline-annotations=0 --no-tokenizer - > '$outzip'` },
     qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!,
     'Processing 1'
   );
 
   # TODO: there should be a better way to test this
   stderr_unlike(
-    sub { `cat '$file' | perl '$script' --skip-token-inline-annotations=0 - > '$outzip'` },
+    sub { `cat '$file' | perl '$script' --skip-token-inline-annotations=0 --no-tokenizer - > '$outzip'` },
     qr!.*undefined value.*!,
     'Processing 2'
   );
@@ -667,7 +668,7 @@
   test_tei2korapxml(
     file => catfile($f, 'data', 'goe_sample.i5.xml'),
     tmp => 'script_utf8_enc',
-    param => '--skip-inline-token-annotations=0',
+    param => '--skip-inline-token-annotations=0 --no-tokenizer',
   )
     ->stderr_like(qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!)
     ->unzip_xml('GOE/AGA/00000/data.xml')
@@ -677,7 +678,7 @@
 
   test_tei2korapxml(
     file => catfile($f, 'data', 'goe_sample.i5.iso.xml'),
-    param => '--skip-inline-token-annotations=0',
+    param => '--skip-inline-token-annotations=0 --no-tokenizer',
     tmp => 'script_iso_enc'
   )
     ->stderr_like(qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!)
@@ -730,7 +731,7 @@
   test_tei2korapxml(
     tmp => 'script_out',
     file => $file,
-    param => '-l=warn'
+    param => '-l=warn --no-tokenizer'
   )->stderr_is('');
 };
 
@@ -763,13 +764,13 @@
   test_tei2korapxml(
     tmp => 'script_out',
     file => $file,
-    param => '-rv=' . $KorAP::XML::TEI::Tokenizer::KorAP::VERSION
+    param => '-rv=' . $KorAP::XML::TEI::Tokenizer::KorAP::VERSION . ' --no-tokenizer'
   )->stderr_like(qr!GOE_AGA\.00000!);
 
   test_tei2korapxml(
     tmp => 'script_out',
     file => $file,
-    param => '-rv=   "  ' . $KorAP::XML::TEI::Tokenizer::KorAP::VERSION . '  "'
+    param => '-rv=   "  ' . $KorAP::XML::TEI::Tokenizer::KorAP::VERSION . ' "  --no-tokenizer'
   )->stderr_like(qr!GOE_AGA\.00000!);
 };
 
@@ -789,4 +790,12 @@
 
 };
 
+subtest 'Require tokenizer' => sub {
+
+  my $t = test_tei2korapxml(
+      file => catfile($f, 'data', 'icc_german_sample.p5.xml'),
+      tmp => 'script_utf8_enc'
+  )->stderr_like(qr!No tokenizer chosen!);
+};
+
 done_testing;
diff --git a/t/tei.t b/t/tei.t
index 4d068a6..98b945e 100644
--- a/t/tei.t
+++ b/t/tei.t
@@ -53,7 +53,8 @@
       text => "<!--\nDies ist ein\nmehrzeiligerKommentar -->Text1",
       textSigle => 'A/B.1',
       pattern => 'xx'
-    }
+    },
+    param => '--no-tokenizer'
   )
     ->file_exists('A/B/1/data.xml')
     ->unzip_xml('A/B/1/data.xml')
@@ -67,7 +68,8 @@
       text => "Nur ein Test",
       textSigle => '',
       pattern => 'missing_dir'
-    }
+    },
+    param => '--no-tokenizer'
   )
     ->file_exists_not('A/B/1/data.xml')
     ->stderr_like(qr!Empty '<textSigle />' \(L29\) in header!)