blob: 90f739294ba606b1fdbabd0ed4de52cb73630b2b [file] [log] [blame]
Akroneaffe932019-03-07 17:14:42 +01001package MyLog;
2use Mojo::Base -base;
3
4has is_debug => 0;
5has warn => sub {};
6has debug => sub {};
7has trace => sub {};
8has error => sub {};
9
10package main;
11use strict;
12use warnings;
13use Test::More;
14use Data::Dumper;
15use JSON::XS;
16use Log::Log4perl;
17
18use Benchmark qw/:hireswallclock/;
19
20my $t = Benchmark->new;
21
22use utf8;
23use lib 'lib', '../lib';
24
25use File::Basename 'dirname';
26use File::Spec::Functions 'catdir';
27
28use_ok('KorAP::XML::Krill');
29
30# This will check files from the dortmund chat corpus
31
32# New
33my $path = catdir(dirname(__FILE__), '../corpus/NGAFC/B14/00010');
34
35ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
36ok($doc->parse, 'Parse document');
37
38is($doc->text_sigle, 'NGAFC/B14/00010', 'Correct text sigle');
39is($doc->doc_sigle, 'NGAFC/B14', 'Correct document sigle');
40is($doc->corpus_sigle, 'NGAFC', 'Correct corpus sigle');
41
42my $meta = $doc->meta;
43is($meta->{T_title}, 'Re: Ranking der Zuverlässigkeit von Filesystemen, In: de.sci.informatik.misc',
44 'Title');
45is($meta->{A_publisher}, 'Usenet', 'Publisher');
46
47# Tokenization
48use_ok('KorAP::XML::Tokenizer');
49
50my ($token_base_foundry, $token_base_layer) = (qw/Base Tokens/);
51
52# Get tokenization
53my $tokens = KorAP::XML::Tokenizer->new(
54 path => $doc->path,
55 doc => $doc,
56 foundry => $token_base_foundry,
57 layer => $token_base_layer,
58 name => 'tokens',
59 log => MyLog->new
60);
61
62ok($tokens, 'Token Object is fine');
63ok(!$tokens->parse, 'Token parsing is not fine');
64
65done_testing;
66
67
68__END__