Fixed bug in tokenizer to recognize non-word-tokenizations
Change-Id: I4d9d5ffaefc45dc2220c17273dee70e05080137e
diff --git a/t/script/archive.t b/t/script/archive.t
index aee0860..a0b4dd6 100644
--- a/t/script/archive.t
+++ b/t/script/archive.t
@@ -128,6 +128,24 @@
};
ok(-d $output, 'Ouput directory exists');
+
+
+$input = catfile($f, '..', 'corpus', 'WDD15', 'A79', '83946');
+$call = join(
+ ' ',
+ 'perl', $script,
+ '--input' => $input
+);
+
+# Test without compression
+{
+ local $SIG{__WARN__} = sub {};
+ my $out = stderr_from(sub { system($call); });
+
+ like($out, qr!no base tokenization!s, $call);
+};
+
+
unlink($output);
done_testing;