blob: 316dceaabce74eff8c579dbb043061ff93a7faf1 [file] [log] [blame]
Akronef8544f2016-01-16 14:51:07 +01001package TestInit;
2use strict;
3use warnings;
4use File::Basename 'dirname';
5use File::Spec::Functions 'catdir';
Akrone4c2e412016-01-28 15:10:50 +01006use KorAP::XML::Tokenizer;
7use KorAP::XML::Krill;
Akronef8544f2016-01-16 14:51:07 +01008
9sub tokens {
10 my $file = shift;
Akronf3f0c942016-06-27 13:27:14 +020011 my $token_foundry = shift;
Akronef8544f2016-01-16 14:51:07 +010012 my $path = catdir(dirname(__FILE__), 'corpus', 'doc', $file);
13
Akrone4c2e412016-01-28 15:10:50 +010014 my $doc = KorAP::XML::Krill->new(
Akronef8544f2016-01-16 14:51:07 +010015 path => $path . '/'
16 ) or return;
17
18 $doc->parse;
19
Akrone4c2e412016-01-28 15:10:50 +010020 my $tokens = KorAP::XML::Tokenizer->new(
Akronef8544f2016-01-16 14:51:07 +010021 path => $doc->path,
22 doc => $doc,
Akronf3f0c942016-06-27 13:27:14 +020023 foundry => ($token_foundry // 'OpenNLP'),
Akronef8544f2016-01-16 14:51:07 +010024 layer => 'Tokens',
25 name => 'tokens'
26 ) or return;
27
28 $tokens->parse or return;
29
30 return $tokens;
31};
32
331;