blob: 643ab8192fe0659fbb3c3345a335bba02162862e [file] [log] [blame]
Akron0c3e3752016-06-28 15:55:53 +02001#!/usr/bin/env perl
2use strict;
3use warnings;
4use Test::More;
5use File::Basename 'dirname';
6use File::Spec::Functions qw/catfile catdir/;
7use File::Temp qw/tempdir/;
8
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +02009use KorAP::XML::Archive;
Akron0c3e3752016-06-28 15:55:53 +020010
11my $name = 'wpd15-single';
12my @path = (dirname(__FILE__), 'corpus','archives');
13
14my $file = catfile(@path, $name . '.zip');
15my $archive = KorAP::XML::Archive->new($file);
16
17unless ($archive->test_unzip) {
Nils Diewaldb3e9ccd2016-10-24 15:16:52 +020018 plan skip_all => 'unzip not found';
Akron0c3e3752016-06-28 15:55:53 +020019};
20
21ok($archive->test, 'Test archive');
22
23like($archive->path(0), qr/wpd15-single\.zip$/, 'Archive path');
24
25ok($archive->attach(catfile(@path, 'fake.zip')), 'Attach fake archive');
26
27# Fake archive is no valid zip file
28ok(!$archive->test, 'Test archive');
29
30# Recreate archive object
31$archive = KorAP::XML::Archive->new($file);
32
33# Test again
34ok($archive->test, 'Test archive');
35
36my @list = $archive->list_texts;
37is(scalar @list, 1, 'Found all tests');
38
39# Attach further archives
40ok($archive->attach(catfile(@path, $name . '.corenlp.zip')), 'Add corenlp');
41ok($archive->attach(catfile(@path, $name . '.malt.zip')), 'Add malt');
42ok($archive->attach(catfile(@path, $name . '.mdparser.zip')), 'Add mdparser');
43ok($archive->attach(catfile(@path, $name . '.opennlp.zip')), 'Add opennlp');
44ok($archive->attach(catfile(@path, $name . '.tree_tagger.zip')), 'Add tree tagger');
45
46@list = $archive->list_texts;
47is(scalar @list, 1, 'Found all tests');
48is($list[0], 'WPD15/A00/00081', 'First document');
49
Akron8b03ba52025-07-15 09:16:18 +020050# Test list_texts_iterator
51my $iter = $archive->list_texts_iterator;
52ok($iter, 'Iterator created');
53my @iter_list;
54while (defined(my $path = $iter->())) {
55 push @iter_list, $path;
56}
57is_deeply(\@iter_list, \@list, 'Iterator returns same paths as list_texts');
58
59# Test count_texts
60is($archive->count_texts, 1, 'count_texts returns correct number');
61
Akron0c3e3752016-06-28 15:55:53 +020062ok($archive->test, 'Test all archives');
63
64# Split path
65@path = $archive->split_path($list[0]);
66is($path[0],'', 'Prefix');
67is($path[1],'WPD15', 'Prefix');
68is($path[2],'A00', 'Prefix');
69is($path[3],'00081', 'Prefix');
70
71# Extract everything to temporary directory
72my $dir = tempdir(CLEANUP => 1);
73{
74 local $SIG{__WARN__} = sub {};
Akrona3518372024-01-22 23:29:00 +010075 ok($archive->extract_sigle(0, [$list[0]], $dir), 'Wrong path');
Akron0c3e3752016-06-28 15:55:53 +020076};
77
78ok(-d catdir($dir, 'WPD15'), 'Test corpus directory exists');
79ok(-f catdir($dir, 'WPD15', 'header.xml'), 'Test corpus header exists');
80ok(-d catdir($dir, 'WPD15', 'A00'), 'Test doc directory exists');
81ok(-f catdir($dir, 'WPD15', 'A00', 'header.xml'), 'Test doc header exists');
82ok(-d catdir($dir, 'WPD15', 'A00', '00081'), 'Test text directory exists');
83ok(-f catdir($dir, 'WPD15', 'A00', '00081', 'header.xml'), 'Test text header exists');
84
85ok(-f catdir($dir, 'WPD15', 'A00', '00081', 'data.xml'), 'Test primary data exists');
86
87my @file = ('WPD15', 'A00', '00081');
88ok(-f catdir($dir, @file, 'base', 'paragraph.xml'), 'Annotation data exists');
89ok(-f catdir($dir, @file, 'base', 'sentences.xml'), 'Annotation data exists');
90ok(-f catdir($dir, @file, 'base', 'tokens.xml'), 'Annotation data exists');
91ok(-f catdir($dir, @file, 'base', 'tokens_aggr.xml'), 'Annotation data exists');
92ok(-f catdir($dir, @file, 'base', 'tokens_conservative.xml'), 'Annotation data exists');
93
94ok(-f catdir($dir, @file, 'struct', 'structure.xml'), 'Annotation data exists');
95
96ok(-f catdir($dir, @file, 'corenlp', 'constituency.xml'), 'Annotation data exists');
97ok(-f catdir($dir, @file, 'corenlp', 'metadata.xml'), 'Annotation data exists');
98ok(-f catdir($dir, @file, 'corenlp', 'morpho.xml'), 'Annotation data exists');
99ok(-f catdir($dir, @file, 'corenlp', 'sentences.xml'), 'Annotation data exists');
100ok(-f catdir($dir, @file, 'corenlp', 'tokens.xml'), 'Annotation data exists');
101
102ok(-f catdir($dir, @file, 'malt', 'dependency.xml'), 'Annotation data exists');
103ok(-f catdir($dir, @file, 'malt', 'metadata.xml'), 'Annotation data exists');
104
105ok(-f catdir($dir, @file, 'mdparser', 'dependency.xml'), 'Annotation data exists');
106ok(-f catdir($dir, @file, 'mdparser', 'metadata.xml'), 'Annotation data exists');
107
108ok(-f catdir($dir, @file, 'opennlp', 'metadata.xml'), 'Annotation data exists');
109ok(-f catdir($dir, @file, 'opennlp', 'morpho.xml'), 'Annotation data exists');
110ok(-f catdir($dir, @file, 'opennlp', 'sentences.xml'), 'Annotation data exists');
111ok(-f catdir($dir, @file, 'opennlp', 'tokens.xml'), 'Annotation data exists');
112
113ok(-f catdir($dir, @file, 'tree_tagger', 'metadata.xml'), 'Annotation data exists');
114ok(-f catdir($dir, @file, 'tree_tagger', 'morpho.xml'), 'Annotation data exists');
115ok(-f catdir($dir, @file, 'tree_tagger', 'sentences.xml'), 'Annotation data exists');
116ok(-f catdir($dir, @file, 'tree_tagger', 'tokens.xml'), 'Annotation data exists');
117
118
119done_testing;
120__END__
121
122