blob: 1da4e9d5f686ef1b20a1938a99fad830fd872816 [file] [log] [blame]
Akron0c3e3752016-06-28 15:55:53 +02001#!/usr/bin/env perl
2use strict;
3use warnings;
4use Test::More;
5use File::Basename 'dirname';
6use File::Spec::Functions qw/catfile catdir/;
7use File::Temp qw/tempdir/;
8
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +02009use KorAP::XML::Archive;
Akron0c3e3752016-06-28 15:55:53 +020010
11my $name = 'wpd15-single';
12my @path = (dirname(__FILE__), 'corpus','archives');
13
14my $file = catfile(@path, $name . '.zip');
15my $archive = KorAP::XML::Archive->new($file);
16
17unless ($archive->test_unzip) {
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020018 plan skip_all => 'unzip not found';
Akron0c3e3752016-06-28 15:55:53 +020019};
20
21ok($archive->test, 'Test archive');
22
23like($archive->path(0), qr/wpd15-single\.zip$/, 'Archive path');
24
25ok($archive->attach(catfile(@path, 'fake.zip')), 'Attach fake archive');
26
27# Fake archive is no valid zip file
28ok(!$archive->test, 'Test archive');
29
30# Recreate archive object
31$archive = KorAP::XML::Archive->new($file);
32
33# Test again
34ok($archive->test, 'Test archive');
35
36my @list = $archive->list_texts;
37is(scalar @list, 1, 'Found all tests');
38
39# Attach further archives
40ok($archive->attach(catfile(@path, $name . '.corenlp.zip')), 'Add corenlp');
41ok($archive->attach(catfile(@path, $name . '.malt.zip')), 'Add malt');
42ok($archive->attach(catfile(@path, $name . '.mdparser.zip')), 'Add mdparser');
43ok($archive->attach(catfile(@path, $name . '.opennlp.zip')), 'Add opennlp');
44ok($archive->attach(catfile(@path, $name . '.tree_tagger.zip')), 'Add tree tagger');
45
46@list = $archive->list_texts;
47is(scalar @list, 1, 'Found all tests');
48is($list[0], 'WPD15/A00/00081', 'First document');
49
50ok($archive->test, 'Test all archives');
51
52# Split path
53@path = $archive->split_path($list[0]);
54is($path[0],'', 'Prefix');
55is($path[1],'WPD15', 'Prefix');
56is($path[2],'A00', 'Prefix');
57is($path[3],'00081', 'Prefix');
58
59# Extract everything to temporary directory
60my $dir = tempdir(CLEANUP => 1);
61{
62 local $SIG{__WARN__} = sub {};
Akron20807582016-10-26 17:11:34 +020063 ok($archive->extract_text($list[0], $dir), 'Wrong path');
Akron0c3e3752016-06-28 15:55:53 +020064};
65
66ok(-d catdir($dir, 'WPD15'), 'Test corpus directory exists');
67ok(-f catdir($dir, 'WPD15', 'header.xml'), 'Test corpus header exists');
68ok(-d catdir($dir, 'WPD15', 'A00'), 'Test doc directory exists');
69ok(-f catdir($dir, 'WPD15', 'A00', 'header.xml'), 'Test doc header exists');
70ok(-d catdir($dir, 'WPD15', 'A00', '00081'), 'Test text directory exists');
71ok(-f catdir($dir, 'WPD15', 'A00', '00081', 'header.xml'), 'Test text header exists');
72
73ok(-f catdir($dir, 'WPD15', 'A00', '00081', 'data.xml'), 'Test primary data exists');
74
75my @file = ('WPD15', 'A00', '00081');
76ok(-f catdir($dir, @file, 'base', 'paragraph.xml'), 'Annotation data exists');
77ok(-f catdir($dir, @file, 'base', 'sentences.xml'), 'Annotation data exists');
78ok(-f catdir($dir, @file, 'base', 'tokens.xml'), 'Annotation data exists');
79ok(-f catdir($dir, @file, 'base', 'tokens_aggr.xml'), 'Annotation data exists');
80ok(-f catdir($dir, @file, 'base', 'tokens_conservative.xml'), 'Annotation data exists');
81
82ok(-f catdir($dir, @file, 'struct', 'structure.xml'), 'Annotation data exists');
83
84ok(-f catdir($dir, @file, 'corenlp', 'constituency.xml'), 'Annotation data exists');
85ok(-f catdir($dir, @file, 'corenlp', 'metadata.xml'), 'Annotation data exists');
86ok(-f catdir($dir, @file, 'corenlp', 'morpho.xml'), 'Annotation data exists');
87ok(-f catdir($dir, @file, 'corenlp', 'sentences.xml'), 'Annotation data exists');
88ok(-f catdir($dir, @file, 'corenlp', 'tokens.xml'), 'Annotation data exists');
89
90ok(-f catdir($dir, @file, 'malt', 'dependency.xml'), 'Annotation data exists');
91ok(-f catdir($dir, @file, 'malt', 'metadata.xml'), 'Annotation data exists');
92
93ok(-f catdir($dir, @file, 'mdparser', 'dependency.xml'), 'Annotation data exists');
94ok(-f catdir($dir, @file, 'mdparser', 'metadata.xml'), 'Annotation data exists');
95
96ok(-f catdir($dir, @file, 'opennlp', 'metadata.xml'), 'Annotation data exists');
97ok(-f catdir($dir, @file, 'opennlp', 'morpho.xml'), 'Annotation data exists');
98ok(-f catdir($dir, @file, 'opennlp', 'sentences.xml'), 'Annotation data exists');
99ok(-f catdir($dir, @file, 'opennlp', 'tokens.xml'), 'Annotation data exists');
100
101ok(-f catdir($dir, @file, 'tree_tagger', 'metadata.xml'), 'Annotation data exists');
102ok(-f catdir($dir, @file, 'tree_tagger', 'morpho.xml'), 'Annotation data exists');
103ok(-f catdir($dir, @file, 'tree_tagger', 'sentences.xml'), 'Annotation data exists');
104ok(-f catdir($dir, @file, 'tree_tagger', 'tokens.xml'), 'Annotation data exists');
105
106
107done_testing;
108__END__
109
110