allow to specify both tokenizations (extern and intern)
Change-Id: I2e3ff83e5122f803c5e4a18a0c1b89b93269d444
diff --git a/t/tokenization.t b/t/tokenization.t
index 1d75e5f..b132a63 100644
--- a/t/tokenization.t
+++ b/t/tokenization.t
@@ -3,6 +3,7 @@
use Test::More;
use File::Basename 'dirname';
use File::Spec::Functions qw/catfile/;
+use IO::Uncompress::Unzip;
use open qw(:std :utf8); # assume utf-8 encoding
use FindBin;
@@ -10,8 +11,10 @@
unshift @INC, "$FindBin::Bin/../lib";
};
+use_ok('Test::KorAP::XML::TEI','korap_tempfile');
require_ok('KorAP::XML::TEI::Tokenizer::Aggressive');
require_ok('KorAP::XML::TEI::Tokenizer::Conservative');
+require_ok('KorAP::XML::TEI::Zipper');
# Test aggressive
my $aggr = KorAP::XML::TEI::Tokenizer::Aggressive->new;
@@ -111,4 +114,25 @@
is(302, scalar(@$cons));
+subtest 'Test Zipper' => sub {
+ # Test Zipper
+ my ($fh, $outzip) = korap_tempfile('tokenize_zipper');
+ my $zip = KorAP::XML::TEI::Zipper->new($outzip);
+ $fh->close;
+
+ my $aggr = KorAP::XML::TEI::Tokenizer::Aggressive->new;
+ $aggr->tokenize("Der alte Mann");
+ ok($aggr->to_zip(
+ $zip->new_stream('tokens.xml'),
+ 'fun'
+ ), 'Written successfully');
+
+ $zip->close;
+
+ ok(-e $outzip, 'Zip exists');
+ my $unzip = IO::Uncompress::Unzip->new($outzip, Name => 'tokens.xml');
+ ok(!$unzip->eof, 'Unzip successful');
+};
+
+
done_testing;