blob: 66152dd8c7422b02c02509c6b0966642e2a76417 [file] [log] [blame]
Nils Diewald90a23f22014-10-31 02:16:14 +00001#!/usr/bin/env perl
2# source ~/perl5/perlbrew/etc/bashrc
3# perlbrew switch perl-blead@korap
4use strict;
5use warnings;
6use Test::More;
Nils Diewalda0e8d722014-11-01 01:18:25 +00007use Mojo::ByteStream 'b';
Nils Diewald90a23f22014-10-31 02:16:14 +00008
9use Benchmark qw/:hireswallclock/;
10
11my $t = Benchmark->new;
12
13use utf8;
14use lib 'lib', '../lib';
15
Nils Diewald90a23f22014-10-31 02:16:14 +000016use File::Basename 'dirname';
17use File::Spec::Functions 'catdir';
18
19use_ok('KorAP::Document');
20
21# GOE/AGA/03828
22my $path = catdir(dirname(__FILE__), 'GOE/AGA/03828');
Nils Diewalda0e8d722014-11-01 01:18:25 +000023# Todo: Test with absolute path!
Nils Diewald90a23f22014-10-31 02:16:14 +000024
25ok(my $doc = KorAP::Document->new( path => $path . '/' ), 'Load Korap::Document');
26ok($doc->parse, 'Parse document');
27
28# Tokenization
29use_ok('KorAP::Tokenizer');
30
31my ($token_base_foundry, $token_base_layer) = (qw/OpenNLP Tokens/);
32
33# Get tokenization
34my $tokens = KorAP::Tokenizer->new(
35 path => $doc->path,
36 doc => $doc,
37 foundry => $token_base_foundry,
38 layer => $token_base_layer,
39 name => 'tokens'
40);
41ok($tokens, 'Token Object is fine');
42ok($tokens->parse, 'Token parsing is fine');
Nils Diewald24b04462014-11-01 00:16:38 +000043
Nils Diewald90a23f22014-10-31 02:16:14 +000044ok($tokens->add('Base', 'Sentences'), 'Add base sentences');
45ok($tokens->add('Base', 'Paragraphs'), 'Add base paragraphs');
46ok($tokens->add('OpenNLP', 'Sentences'), 'Add opennlp sentences');
47ok($tokens->add('OpenNLP', 'Morpho'), 'Add opennlp morpho');
48ok($tokens->add('TreeTagger', 'Sentences'), 'Add tt sentences');
49ok($tokens->add('TreeTagger', 'Morpho'), 'Add tt morpho');
50ok($tokens->add('CoreNLP', 'NamedEntities'), 'Add corenlp ne');
51ok($tokens->add('CoreNLP', 'Sentences'), 'Add corenlp sentences');
52ok($tokens->add('CoreNLP', 'Morpho'), 'Add corenlp morpho');
53ok($tokens->add('CoreNLP', 'Constituency'), 'Add corenlp constituency');
54ok($tokens->add('Glemm', 'Morpho'), 'Add glemm morpho');
Nils Diewalda0e8d722014-11-01 01:18:25 +000055# t ok($tokens->add('Connexor', 'Sentences'), 'Add cnx sentences');
56# t ok($tokens->add('Connexor', 'Morpho'), 'Add cnx morpho');
57# t ok($tokens->add('Connexor', 'Phrase'), 'Add cnx phrase');
58# t ok($tokens->add('Connexor', 'Syntax'), 'Add cnx syntax');
Nils Diewald90a23f22014-10-31 02:16:14 +000059ok($tokens->add('Mate', 'Morpho'), 'Add mate morpho');
60# $tokens->add('Mate', 'Dependency');
Nils Diewalda0e8d722014-11-01 01:18:25 +000061# t ok($tokens->add('XIP', 'Sentences'), 'Add xip sentences');
62# t ok($tokens->add('XIP', 'Morpho'), 'Add xip morpho');
63# t ok($tokens->add('XIP', 'Constituency'), 'Add xip constituency');
Nils Diewald90a23f22014-10-31 02:16:14 +000064# $tokens->add('XIP', 'Dependency');
65ok($tokens->to_json, 'To json');
66
Nils Diewald24b04462014-11-01 00:16:38 +000067is($tokens->doc->to_hash->{title}, 'Autobiographische Einzelheiten');
68
Nils Diewalda0e8d722014-11-01 01:18:25 +000069b($tokens->to_json)->spurt('AGA.03828.json');
70
Nils Diewald90a23f22014-10-31 02:16:14 +000071diag timestr(timediff(Benchmark->new, $t));