Nils Diewald | 90a23f2 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | # source ~/perl5/perlbrew/etc/bashrc |
| 3 | # perlbrew switch perl-blead@korap |
| 4 | use strict; |
| 5 | use warnings; |
| 6 | use Test::More; |
Nils Diewald | a0e8d72 | 2014-11-01 01:18:25 +0000 | [diff] [blame^] | 7 | use Mojo::ByteStream 'b'; |
Nils Diewald | 90a23f2 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 8 | |
| 9 | use Benchmark qw/:hireswallclock/; |
| 10 | |
| 11 | my $t = Benchmark->new; |
| 12 | |
| 13 | use utf8; |
| 14 | use lib 'lib', '../lib'; |
| 15 | |
Nils Diewald | 90a23f2 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 16 | use File::Basename 'dirname'; |
| 17 | use File::Spec::Functions 'catdir'; |
| 18 | |
| 19 | use_ok('KorAP::Document'); |
| 20 | |
| 21 | # GOE/AGA/03828 |
| 22 | my $path = catdir(dirname(__FILE__), 'GOE/AGA/03828'); |
Nils Diewald | a0e8d72 | 2014-11-01 01:18:25 +0000 | [diff] [blame^] | 23 | # Todo: Test with absolute path! |
Nils Diewald | 90a23f2 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 24 | |
| 25 | ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document'); |
| 26 | ok($doc->parse, 'Parse document'); |
| 27 | |
| 28 | # Tokenization |
| 29 | use_ok('KorAP::Tokenizer'); |
| 30 | |
| 31 | my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/); |
| 32 | |
| 33 | # Get tokenization |
| 34 | my $tokens = KorAP::Tokenizer->new( |
| 35 | path => $doc->path, |
| 36 | doc => $doc, |
| 37 | foundry => $token_base_foundry, |
| 38 | layer => $token_base_layer, |
| 39 | name => 'tokens' |
| 40 | ); |
| 41 | ok($tokens, 'Token Object is fine'); |
| 42 | ok($tokens->parse, 'Token parsing is fine'); |
Nils Diewald | 24b0446 | 2014-11-01 00:16:38 +0000 | [diff] [blame] | 43 | |
Nils Diewald | 90a23f2 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 44 | ok($tokens->add('Base', 'Sentences'), 'Add base sentences'); |
| 45 | ok($tokens->add('Base', 'Paragraphs'), 'Add base paragraphs'); |
| 46 | ok($tokens->add('OpenNLP', 'Sentences'), 'Add opennlp sentences'); |
| 47 | ok($tokens->add('OpenNLP', 'Morpho'), 'Add opennlp morpho'); |
| 48 | ok($tokens->add('TreeTagger', 'Sentences'), 'Add tt sentences'); |
| 49 | ok($tokens->add('TreeTagger', 'Morpho'), 'Add tt morpho'); |
| 50 | ok($tokens->add('CoreNLP', 'NamedEntities'), 'Add corenlp ne'); |
| 51 | ok($tokens->add('CoreNLP', 'Sentences'), 'Add corenlp sentences'); |
| 52 | ok($tokens->add('CoreNLP', 'Morpho'), 'Add corenlp morpho'); |
| 53 | ok($tokens->add('CoreNLP', 'Constituency'), 'Add corenlp constituency'); |
| 54 | ok($tokens->add('Glemm', 'Morpho'), 'Add glemm morpho'); |
Nils Diewald | a0e8d72 | 2014-11-01 01:18:25 +0000 | [diff] [blame^] | 55 | # t ok($tokens->add('Connexor', 'Sentences'), 'Add cnx sentences'); |
| 56 | # t ok($tokens->add('Connexor', 'Morpho'), 'Add cnx morpho'); |
| 57 | # t ok($tokens->add('Connexor', 'Phrase'), 'Add cnx phrase'); |
| 58 | # t ok($tokens->add('Connexor', 'Syntax'), 'Add cnx syntax'); |
Nils Diewald | 90a23f2 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 59 | ok($tokens->add('Mate', 'Morpho'), 'Add mate morpho'); |
| 60 | # $tokens->add('Mate', 'Dependency'); |
Nils Diewald | a0e8d72 | 2014-11-01 01:18:25 +0000 | [diff] [blame^] | 61 | # t ok($tokens->add('XIP', 'Sentences'), 'Add xip sentences'); |
| 62 | # t ok($tokens->add('XIP', 'Morpho'), 'Add xip morpho'); |
| 63 | # t ok($tokens->add('XIP', 'Constituency'), 'Add xip constituency'); |
Nils Diewald | 90a23f2 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 64 | # $tokens->add('XIP', 'Dependency'); |
| 65 | ok($tokens->to_json, 'To json'); |
| 66 | |
Nils Diewald | 24b0446 | 2014-11-01 00:16:38 +0000 | [diff] [blame] | 67 | is($tokens->doc->to_hash->{title}, 'Autobiographische Einzelheiten'); |
| 68 | |
Nils Diewald | a0e8d72 | 2014-11-01 01:18:25 +0000 | [diff] [blame^] | 69 | b($tokens->to_json)->spurt('AGA.03828.json'); |
| 70 | |
Nils Diewald | 90a23f2 | 2014-10-31 02:16:14 +0000 | [diff] [blame] | 71 | diag timestr(timediff(Benchmark->new, $t)); |