Add -tk option to use the standard KoAP tokenizer
Change-Id: I992fe37463926c8ecbca933fbb709f8640d6fb93
diff --git a/t/cmd/tokenizer.pl b/t/cmd/tokenizer.pl
index bf948e8..4e8b2d0 100644
--- a/t/cmd/tokenizer.pl
+++ b/t/cmd/tokenizer.pl
@@ -2,6 +2,7 @@
use strict;
use warnings;
use FindBin;
+use Encode;
BEGIN {
unshift @INC, "$FindBin::Bin/../../lib";
};
@@ -14,7 +15,7 @@
# Read lines from input and return boundaries
while (!eof(STDIN)) {
- my $line = <>;
+ my $line = decode_utf8(<>);
for my $text (split(/\n?\x{04}\n?/, $line)) {
$tok->tokenize($text);
print join(' ', $tok->boundaries), "\n";