blob: 5b6858cc298870d56febb2b4981445ed836c967d [file] [log] [blame]
Akron05ba5472016-07-05 21:12:26 +02001#!/usr/bin/env perl
2use strict;
3use warnings;
4use utf8;
5use Log::Log4perl;
6use Data::Dumper;
7
8Log::Log4perl->init({
9 'log4perl.rootLogger' => 'ERROR, STDERR',
10 'log4perl.appender.STDERR' => 'Log::Log4perl::Appender::ScreenColoredLevels',
11 'log4perl.appender.STDERR.layout' => 'PatternLayout',
12 'log4perl.appender.STDERR.layout.ConversionPattern' => '[%r] %F %L %c - %m%n'
13});
14
15use File::Basename 'dirname';
16use File::Spec::Functions qw/catdir catfile/;
17use Test::More;
18use Scalar::Util qw/weaken/;
19use Data::Dumper;
20use lib 't/annotation';
21use File::Temp qw/tempdir/;
22
23use_ok('KorAP::XML::Annotation::MDParser::Dependency');
24use_ok('KorAP::XML::Archive');
25use_ok('KorAP::XML::Krill');
26use_ok('KorAP::XML::Tokenizer');
27
28my $name = 'wpd15-single';
29my @path = (dirname(__FILE__), '..', 'corpus','archives');
30
31my $file = catfile(@path, $name . '.zip');
32ok(my $archive = KorAP::XML::Archive->new($file), 'Create archive');
33
34ok($archive->attach('#' . catfile(@path, $name . '.mdparser.zip')), 'Attach mdparser archive');
35
36my $dir = tempdir();
37
38my $f_path = 'WPD15/A00/00081';
39$archive->extract($f_path, $dir);
40
41ok(my $doc = KorAP::XML::Krill->new( path => $dir . '/' . $f_path));
42
43ok($doc->parse, 'Krill parser works');
44
45my $tokens = KorAP::XML::Tokenizer->new(
46 path => $doc->path,
47 doc => $doc,
48 foundry => 'Base',
49 layer => 'Tokens',
50 name => 'tokens'
51) or return;
52
53$tokens->parse or return;
54
55ok($tokens->add('MDParser', 'Dependency'), 'Add Dependency');
56
57my $data = $tokens->to_data->{data};
58
59
60is($data->{tokenSource}, 'base#tokens', 'TokenSource');
61like($data->{foundries}, qr!mdparser/dependency!, 'foundries');
62like($data->{layerInfos}, qr!mdp/d=rels!, 'foundries');
63
64my $stream = $data->{stream};
65
Akrona86d94a2016-07-06 14:23:12 +020066is($stream->[0]->[0], '-:tokens$<i>3555', 'Token count');
67
68# Term-to-term
69is($stream->[0]->[1], '<:mdp/d:NMOD$<b>32<i>5', 'Term-to-Term');
70is($stream->[5]->[0], '>:mdp/d:NMOD$<b>32<i>0', 'Term-to-Term');
71
72# Element-to-term
73is($stream->[0]->[8], '<:mdp/d:ROOT$<b>34<i>0<i>317<i>40<i>0', 'Element-to-Term');
74is($stream->[0]->[10], '>:mdp/d:ROOT$<b>33<i>0<i>317<i>0<i>40', 'Term-to-Element');
75
76
77is($stream->[-1]->[0], '>:mdp/d:ROOT$<b>33<i>26130<i>26153<i>3553<i>3554', 'Term-to-Element');
78is($stream->[3553]->[1], '<:mdp/d:ROOT$<b>34<i>26130<i>26153<i>3554<i>3553', 'Element-to-Term');
Akron05ba5472016-07-05 21:12:26 +020079
80done_testing;
81__END__