blob: aaec3f5790ee28acb8791e3f6a627fea77fabb63 [file] [log] [blame]
Akron3d47ca42016-01-25 20:55:55 +01001use strict;
2use warnings;
3use Test::More;
4use File::Basename 'dirname';
5use File::Spec::Functions 'catdir';
6use Data::Dumper;
7use KorAP::Tokenizer;
8use KorAP::Document;
9use utf8;
10
11my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
12
13ok(my $doc = KorAP::Document->new(
14 path => $path . '/'
15), 'Create Document');
16
17ok($doc->parse, 'Parse document');
18
19ok(my $tokens = KorAP::Tokenizer->new(
20 path => $doc->path,
21 doc => $doc,
22 foundry => 'Sgbr',
23 layer => 'Lemma',
24 name => 'tokens'
25), 'Create tokens based on lemmata');
26
27ok($tokens->parse, 'Parse tokenization based on lemmata');
28
29my $data = $tokens->to_data->{data};
30
31my $stream = $data->{stream};
32
33is($stream->[0]->[0], '-:tokens$<i>51', 'Token number');
34is($stream->[0]->[1], '_0$<i>0<i>18', 'Position');
35is($stream->[0]->[2], 'i:sommerüberraschung', 'First term');
36is($stream->[0]->[3], 's:Sommerüberraschung', 'First term');
37is($stream->[-1]->[0], '_50$<i>359<i>364', 'Last position');
38is($stream->[-1]->[1], 'i:kevin', 'Last term');
39is($stream->[-1]->[2], 's:Kevin', 'Last term');
40
41done_testing;