blob: 1c95001e5355429c069cb6d257cf1f1c3266f7b4 [file] [log] [blame]
Akron414ec952020-08-03 15:48:43 +02001use strict;
2use warnings;
3use Test::More;
4use File::Basename 'dirname';
5use File::Spec::Functions 'catdir';
6use Data::Dumper;
7use KorAP::XML::Tokenizer;
8use KorAP::XML::Krill;
9use utf8;
10
11if ($ENV{SKIP_REAL}) {
12 plan skip_all => 'Skip real tests';
13};
14
15my $path = catdir(dirname(__FILE__), 'TEST', 'BSP', 1);
16
17ok(my $doc = KorAP::XML::Krill->new(
18 path => $path . '/'
19), 'Create Document');
20
21ok($doc->parse, 'Parse document');
22
23ok(my $tokens = KorAP::XML::Tokenizer->new(
24 path => $doc->path,
25 doc => $doc,
26 foundry => 'Sgbr',
27 layer => 'Lemma',
28 name => 'tokens'
29), 'Create tokens based on lemmata');
30
31ok($tokens->parse, 'Parse tokenization based on lemmata');
32
33my $data = $tokens->to_data->{data};
34
35my $stream = $data->{stream};
36
37is($stream->[0]->[0], '-:tokens$<i>51', 'Token number');
38is($stream->[0]->[2], '_0$<i>0<i>18', 'Position');
39is($stream->[0]->[3], 'i:sommerüberraschung', 'First term');
40is($stream->[0]->[4], 's:Sommerüberraschung', 'First term');
41is($stream->[-1]->[0], '_50$<i>359<i>364', 'Last position');
42is($stream->[-1]->[1], 'i:kevin', 'Last term');
43is($stream->[-1]->[2], 's:Kevin', 'Last term');
44
45done_testing;