blob: 2abc19841e821c1c899adf630494c269acf0d99e [file] [log] [blame]
Akron1622dd92015-12-09 22:34:26 +01001#!/usr/bin/env perl
2use strict;
3use warnings;
4use utf8;
5use Test::More;
6use Scalar::Util qw/weaken/;
7
8use_ok('KorAP::Document');
9
10use File::Basename 'dirname';
11use File::Spec::Functions 'catdir';
12
13my $path = catdir(dirname(__FILE__), 'corpus', 'doc', 'text');
14
15ok(my $doc = KorAP::Document->new(
16 path => $path . '/'
17), 'Load Korap::Document');
18
19like($doc->path, qr!$path/$!, 'Path');
20ok($doc->parse, 'Parse document');
21
22ok($doc->primary->data, 'Primary data in existence');
23is($doc->primary->data_length, 129, 'Data length');
24
25use_ok('KorAP::Tokenizer');
26
27ok(my $tokens = KorAP::Tokenizer->new(
28 path => $doc->path,
29 doc => $doc,
30 foundry => 'OpenNLP',
31 layer => 'Tokens',
32 name => 'tokens'
33), 'New Tokenizer');
34
35ok($tokens->parse, 'Parse');
36
37ok($tokens->add('Base', 'Paragraphs'), 'Add Structure');
38
39
40done_testing;
41
42__END__
43
44
45
46
47
48done_testing;
49__END__
50
51
52sub new_tokenizer {
53 my $x = $doc;
54 weaken $x;
55 return KorAP::Tokenizer->new(
56 path => $x->path,
57 doc => $x,
58 foundry => 'DeReKo',
59 layer => 'Structure',
60 name => 'spans'
61 )
62};
63
64__END__