| Akron | eaffe93 | 2019-03-07 17:14:42 +0100 | [diff] [blame] | 1 | package MyLog; | 
|  | 2 | use Mojo::Base -base; | 
|  | 3 |  | 
|  | 4 | has is_debug => 0; | 
|  | 5 | has warn  => sub {}; | 
|  | 6 | has debug => sub {}; | 
|  | 7 | has trace => sub {}; | 
|  | 8 | has error => sub {}; | 
|  | 9 |  | 
|  | 10 | package main; | 
|  | 11 | use strict; | 
|  | 12 | use warnings; | 
|  | 13 | use Test::More; | 
|  | 14 | use Data::Dumper; | 
|  | 15 | use JSON::XS; | 
| Akron | eaffe93 | 2019-03-07 17:14:42 +0100 | [diff] [blame] | 16 |  | 
| Akron | fab17d3 | 2020-07-31 14:38:29 +0200 | [diff] [blame] | 17 | if ($ENV{SKIP_REAL}) { | 
|  | 18 | plan skip_all => 'Skip real tests'; | 
|  | 19 | }; | 
|  | 20 |  | 
| Akron | eaffe93 | 2019-03-07 17:14:42 +0100 | [diff] [blame] | 21 | use Benchmark qw/:hireswallclock/; | 
|  | 22 |  | 
|  | 23 | my $t = Benchmark->new; | 
|  | 24 |  | 
|  | 25 | use utf8; | 
|  | 26 | use lib 'lib', '../lib'; | 
|  | 27 |  | 
|  | 28 | use File::Basename 'dirname'; | 
|  | 29 | use File::Spec::Functions 'catdir'; | 
|  | 30 |  | 
|  | 31 | use_ok('KorAP::XML::Krill'); | 
|  | 32 |  | 
|  | 33 | # This will check files from the dortmund chat corpus | 
|  | 34 |  | 
|  | 35 | # New | 
| Akron | 414ec95 | 2020-08-03 15:48:43 +0200 | [diff] [blame] | 36 | my $path = catdir(dirname(__FILE__), 'corpus','NGAFC','B14','00010'); | 
| Akron | eaffe93 | 2019-03-07 17:14:42 +0100 | [diff] [blame] | 37 |  | 
|  | 38 | ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document'); | 
|  | 39 | ok($doc->parse, 'Parse document'); | 
|  | 40 |  | 
|  | 41 | is($doc->text_sigle, 'NGAFC/B14/00010', 'Correct text sigle'); | 
|  | 42 | is($doc->doc_sigle, 'NGAFC/B14', 'Correct document sigle'); | 
|  | 43 | is($doc->corpus_sigle, 'NGAFC', 'Correct corpus sigle'); | 
|  | 44 |  | 
|  | 45 | my $meta = $doc->meta; | 
|  | 46 | is($meta->{T_title}, 'Re: Ranking der Zuverlässigkeit von Filesystemen, In: de.sci.informatik.misc', | 
|  | 47 | 'Title'); | 
|  | 48 | is($meta->{A_publisher}, 'Usenet', 'Publisher'); | 
|  | 49 |  | 
|  | 50 | # Tokenization | 
|  | 51 | use_ok('KorAP::XML::Tokenizer'); | 
|  | 52 |  | 
|  | 53 | my ($token_base_foundry, $token_base_layer) = (qw/Base Tokens/); | 
|  | 54 |  | 
|  | 55 | # Get tokenization | 
|  | 56 | my $tokens = KorAP::XML::Tokenizer->new( | 
|  | 57 | path => $doc->path, | 
|  | 58 | doc => $doc, | 
|  | 59 | foundry => $token_base_foundry, | 
|  | 60 | layer => $token_base_layer, | 
|  | 61 | name => 'tokens', | 
|  | 62 | log => MyLog->new | 
|  | 63 | ); | 
|  | 64 |  | 
|  | 65 | ok($tokens, 'Token Object is fine'); | 
|  | 66 | ok(!$tokens->parse, 'Token parsing is not fine'); | 
|  | 67 |  | 
|  | 68 | done_testing; | 
|  | 69 |  | 
|  | 70 |  | 
|  | 71 | __END__ |