blob: 95eabc4582b5db63705d6174f6873f9357ca266c [file] [log] [blame]
Akroneaffe932019-03-07 17:14:42 +01001package MyLog;
2use Mojo::Base -base;
3
4has is_debug => 0;
5has warn => sub {};
6has debug => sub {};
7has trace => sub {};
8has error => sub {};
9
10package main;
11use strict;
12use warnings;
13use Test::More;
14use Data::Dumper;
15use JSON::XS;
Akroneaffe932019-03-07 17:14:42 +010016
Akronfab17d32020-07-31 14:38:29 +020017if ($ENV{SKIP_REAL}) {
18 plan skip_all => 'Skip real tests';
19};
20
Akroneaffe932019-03-07 17:14:42 +010021use Benchmark qw/:hireswallclock/;
22
23my $t = Benchmark->new;
24
25use utf8;
26use lib 'lib', '../lib';
27
28use File::Basename 'dirname';
29use File::Spec::Functions 'catdir';
30
31use_ok('KorAP::XML::Krill');
32
33# This will check files from the dortmund chat corpus
34
35# New
Akron414ec952020-08-03 15:48:43 +020036my $path = catdir(dirname(__FILE__), 'corpus','NGAFC','B14','00010');
Akroneaffe932019-03-07 17:14:42 +010037
38ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
39ok($doc->parse, 'Parse document');
40
41is($doc->text_sigle, 'NGAFC/B14/00010', 'Correct text sigle');
42is($doc->doc_sigle, 'NGAFC/B14', 'Correct document sigle');
43is($doc->corpus_sigle, 'NGAFC', 'Correct corpus sigle');
44
45my $meta = $doc->meta;
46is($meta->{T_title}, 'Re: Ranking der Zuverlässigkeit von Filesystemen, In: de.sci.informatik.misc',
47 'Title');
48is($meta->{A_publisher}, 'Usenet', 'Publisher');
49
50# Tokenization
51use_ok('KorAP::XML::Tokenizer');
52
53my ($token_base_foundry, $token_base_layer) = (qw/Base Tokens/);
54
55# Get tokenization
56my $tokens = KorAP::XML::Tokenizer->new(
57 path => $doc->path,
58 doc => $doc,
59 foundry => $token_base_foundry,
60 layer => $token_base_layer,
61 name => 'tokens',
62 log => MyLog->new
63);
64
65ok($tokens, 'Token Object is fine');
66ok(!$tokens->parse, 'Token parsing is not fine');
67
68done_testing;
69
70
71__END__