Nils Diewald | 6c44f42 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | # source ~/perl5/perlbrew/etc/bashrc |
| 3 | # perlbrew switch perl-blead@korap |
| 4 | use strict; |
| 5 | use warnings; |
| 6 | use Test::More; |
| 7 | |
| 8 | use Benchmark qw/:hireswallclock/; |
| 9 | |
| 10 | my $t = Benchmark->new; |
| 11 | |
| 12 | use utf8; |
| 13 | use lib 'lib', '../lib'; |
| 14 | |
Nils Diewald | 6c44f42 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 15 | use File::Basename 'dirname'; |
| 16 | use File::Spec::Functions 'catdir'; |
| 17 | |
| 18 | use_ok('KorAP::Document'); |
| 19 | |
| 20 | # GOE/AGA/03828 |
| 21 | my $path = catdir(dirname(__FILE__), 'GOE/AGA/03828'); |
| 22 | |
| 23 | ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document'); |
| 24 | ok($doc->parse, 'Parse document'); |
| 25 | |
| 26 | # Tokenization |
| 27 | use_ok('KorAP::Tokenizer'); |
| 28 | |
| 29 | my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/); |
| 30 | |
| 31 | # Get tokenization |
| 32 | my $tokens = KorAP::Tokenizer->new( |
| 33 | path => $doc->path, |
| 34 | doc => $doc, |
| 35 | foundry => $token_base_foundry, |
| 36 | layer => $token_base_layer, |
| 37 | name => 'tokens' |
| 38 | ); |
| 39 | ok($tokens, 'Token Object is fine'); |
| 40 | ok($tokens->parse, 'Token parsing is fine'); |
Nils Diewald | 207439c | 2014-11-01 00:16:38 +0000 | [diff] [blame^] | 41 | |
Nils Diewald | 6c44f42 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 42 | ok($tokens->add('Base', 'Sentences'), 'Add base sentences'); |
| 43 | ok($tokens->add('Base', 'Paragraphs'), 'Add base paragraphs'); |
| 44 | ok($tokens->add('OpenNLP', 'Sentences'), 'Add opennlp sentences'); |
| 45 | ok($tokens->add('OpenNLP', 'Morpho'), 'Add opennlp morpho'); |
| 46 | ok($tokens->add('TreeTagger', 'Sentences'), 'Add tt sentences'); |
| 47 | ok($tokens->add('TreeTagger', 'Morpho'), 'Add tt morpho'); |
| 48 | ok($tokens->add('CoreNLP', 'NamedEntities'), 'Add corenlp ne'); |
| 49 | ok($tokens->add('CoreNLP', 'Sentences'), 'Add corenlp sentences'); |
| 50 | ok($tokens->add('CoreNLP', 'Morpho'), 'Add corenlp morpho'); |
| 51 | ok($tokens->add('CoreNLP', 'Constituency'), 'Add corenlp constituency'); |
| 52 | ok($tokens->add('Glemm', 'Morpho'), 'Add glemm morpho'); |
| 53 | ok($tokens->add('Connexor', 'Sentences'), 'Add cnx sentences'); |
| 54 | ok($tokens->add('Connexor', 'Morpho'), 'Add cnx morpho'); |
| 55 | ok($tokens->add('Connexor', 'Phrase'), 'Add cnx phrase'); |
| 56 | ok($tokens->add('Connexor', 'Syntax'), 'Add cnx syntax'); |
| 57 | ok($tokens->add('Mate', 'Morpho'), 'Add mate morpho'); |
| 58 | # $tokens->add('Mate', 'Dependency'); |
| 59 | ok($tokens->add('XIP', 'Sentences'), 'Add xip sentences'); |
| 60 | ok($tokens->add('XIP', 'Morpho'), 'Add xip morpho'); |
| 61 | ok($tokens->add('XIP', 'Constituency'), 'Add xip constituency'); |
| 62 | # $tokens->add('XIP', 'Dependency'); |
| 63 | ok($tokens->to_json, 'To json'); |
| 64 | |
Nils Diewald | 207439c | 2014-11-01 00:16:38 +0000 | [diff] [blame^] | 65 | is($tokens->doc->to_hash->{title}, 'Autobiographische Einzelheiten'); |
| 66 | |
Nils Diewald | 6c44f42 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 67 | diag timestr(timediff(Benchmark->new, $t)); |