Add utf-8 test for tokenization
Change-Id: Iadd133bc65e404d23a6b5097446293a0492fa983
diff --git a/t/tokenization.t b/t/tokenization.t
index 9d986a0..9fbf4d1 100644
--- a/t/tokenization.t
+++ b/t/tokenization.t
@@ -4,6 +4,7 @@
use File::Basename 'dirname';
use File::Spec::Functions qw/catfile/;
use IO::Uncompress::Unzip;
+use utf8;
use open qw(:std :utf8); # assume utf-8 encoding
use FindBin;
@@ -40,6 +41,9 @@
$cons->tokenize("Der alte Mann");
is_deeply($cons, [0,3,4,8,9,13]);
+$cons->reset->tokenize("Der ältere Mann");
+is_deeply($cons, [0,3,4,10,11,15]);
+
$cons->reset->tokenize("Der alte bzw. der grau-melierte Mann");
is_deeply($cons, [0,3,4,8,9,12,12,13,14,17,18,31,32,36]);