Akron | 0c3e375 | 2016-06-28 15:55:53 +0200 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | use strict; |
| 3 | use warnings; |
| 4 | use Test::More; |
| 5 | use File::Basename 'dirname'; |
| 6 | use File::Spec::Functions qw/catfile catdir/; |
| 7 | use File::Temp qw/tempdir/; |
| 8 | |
Nils Diewald | b3e9ccd | 2016-10-24 15:16:52 +0200 | [diff] [blame] | 9 | use KorAP::XML::Archive; |
Akron | 0c3e375 | 2016-06-28 15:55:53 +0200 | [diff] [blame] | 10 | |
| 11 | my $name = 'wpd15-single'; |
| 12 | my @path = (dirname(__FILE__), 'corpus','archives'); |
| 13 | |
| 14 | my $file = catfile(@path, $name . '.zip'); |
| 15 | my $archive = KorAP::XML::Archive->new($file); |
| 16 | |
| 17 | unless ($archive->test_unzip) { |
Nils Diewald | b3e9ccd | 2016-10-24 15:16:52 +0200 | [diff] [blame] | 18 | plan skip_all => 'unzip not found'; |
Akron | 0c3e375 | 2016-06-28 15:55:53 +0200 | [diff] [blame] | 19 | }; |
| 20 | |
| 21 | ok($archive->test, 'Test archive'); |
| 22 | |
| 23 | like($archive->path(0), qr/wpd15-single\.zip$/, 'Archive path'); |
| 24 | |
| 25 | ok($archive->attach(catfile(@path, 'fake.zip')), 'Attach fake archive'); |
| 26 | |
| 27 | # Fake archive is no valid zip file |
| 28 | ok(!$archive->test, 'Test archive'); |
| 29 | |
| 30 | # Recreate archive object |
| 31 | $archive = KorAP::XML::Archive->new($file); |
| 32 | |
| 33 | # Test again |
| 34 | ok($archive->test, 'Test archive'); |
| 35 | |
| 36 | my @list = $archive->list_texts; |
| 37 | is(scalar @list, 1, 'Found all tests'); |
| 38 | |
| 39 | # Attach further archives |
| 40 | ok($archive->attach(catfile(@path, $name . '.corenlp.zip')), 'Add corenlp'); |
| 41 | ok($archive->attach(catfile(@path, $name . '.malt.zip')), 'Add malt'); |
| 42 | ok($archive->attach(catfile(@path, $name . '.mdparser.zip')), 'Add mdparser'); |
| 43 | ok($archive->attach(catfile(@path, $name . '.opennlp.zip')), 'Add opennlp'); |
| 44 | ok($archive->attach(catfile(@path, $name . '.tree_tagger.zip')), 'Add tree tagger'); |
| 45 | |
| 46 | @list = $archive->list_texts; |
| 47 | is(scalar @list, 1, 'Found all tests'); |
| 48 | is($list[0], 'WPD15/A00/00081', 'First document'); |
| 49 | |
| 50 | ok($archive->test, 'Test all archives'); |
| 51 | |
| 52 | # Split path |
| 53 | @path = $archive->split_path($list[0]); |
| 54 | is($path[0],'', 'Prefix'); |
| 55 | is($path[1],'WPD15', 'Prefix'); |
| 56 | is($path[2],'A00', 'Prefix'); |
| 57 | is($path[3],'00081', 'Prefix'); |
| 58 | |
| 59 | # Extract everything to temporary directory |
| 60 | my $dir = tempdir(CLEANUP => 1); |
| 61 | { |
| 62 | local $SIG{__WARN__} = sub {}; |
Akron | 2080758 | 2016-10-26 17:11:34 +0200 | [diff] [blame] | 63 | ok($archive->extract_text($list[0], $dir), 'Wrong path'); |
Akron | 0c3e375 | 2016-06-28 15:55:53 +0200 | [diff] [blame] | 64 | }; |
| 65 | |
| 66 | ok(-d catdir($dir, 'WPD15'), 'Test corpus directory exists'); |
| 67 | ok(-f catdir($dir, 'WPD15', 'header.xml'), 'Test corpus header exists'); |
| 68 | ok(-d catdir($dir, 'WPD15', 'A00'), 'Test doc directory exists'); |
| 69 | ok(-f catdir($dir, 'WPD15', 'A00', 'header.xml'), 'Test doc header exists'); |
| 70 | ok(-d catdir($dir, 'WPD15', 'A00', '00081'), 'Test text directory exists'); |
| 71 | ok(-f catdir($dir, 'WPD15', 'A00', '00081', 'header.xml'), 'Test text header exists'); |
| 72 | |
| 73 | ok(-f catdir($dir, 'WPD15', 'A00', '00081', 'data.xml'), 'Test primary data exists'); |
| 74 | |
| 75 | my @file = ('WPD15', 'A00', '00081'); |
| 76 | ok(-f catdir($dir, @file, 'base', 'paragraph.xml'), 'Annotation data exists'); |
| 77 | ok(-f catdir($dir, @file, 'base', 'sentences.xml'), 'Annotation data exists'); |
| 78 | ok(-f catdir($dir, @file, 'base', 'tokens.xml'), 'Annotation data exists'); |
| 79 | ok(-f catdir($dir, @file, 'base', 'tokens_aggr.xml'), 'Annotation data exists'); |
| 80 | ok(-f catdir($dir, @file, 'base', 'tokens_conservative.xml'), 'Annotation data exists'); |
| 81 | |
| 82 | ok(-f catdir($dir, @file, 'struct', 'structure.xml'), 'Annotation data exists'); |
| 83 | |
| 84 | ok(-f catdir($dir, @file, 'corenlp', 'constituency.xml'), 'Annotation data exists'); |
| 85 | ok(-f catdir($dir, @file, 'corenlp', 'metadata.xml'), 'Annotation data exists'); |
| 86 | ok(-f catdir($dir, @file, 'corenlp', 'morpho.xml'), 'Annotation data exists'); |
| 87 | ok(-f catdir($dir, @file, 'corenlp', 'sentences.xml'), 'Annotation data exists'); |
| 88 | ok(-f catdir($dir, @file, 'corenlp', 'tokens.xml'), 'Annotation data exists'); |
| 89 | |
| 90 | ok(-f catdir($dir, @file, 'malt', 'dependency.xml'), 'Annotation data exists'); |
| 91 | ok(-f catdir($dir, @file, 'malt', 'metadata.xml'), 'Annotation data exists'); |
| 92 | |
| 93 | ok(-f catdir($dir, @file, 'mdparser', 'dependency.xml'), 'Annotation data exists'); |
| 94 | ok(-f catdir($dir, @file, 'mdparser', 'metadata.xml'), 'Annotation data exists'); |
| 95 | |
| 96 | ok(-f catdir($dir, @file, 'opennlp', 'metadata.xml'), 'Annotation data exists'); |
| 97 | ok(-f catdir($dir, @file, 'opennlp', 'morpho.xml'), 'Annotation data exists'); |
| 98 | ok(-f catdir($dir, @file, 'opennlp', 'sentences.xml'), 'Annotation data exists'); |
| 99 | ok(-f catdir($dir, @file, 'opennlp', 'tokens.xml'), 'Annotation data exists'); |
| 100 | |
| 101 | ok(-f catdir($dir, @file, 'tree_tagger', 'metadata.xml'), 'Annotation data exists'); |
| 102 | ok(-f catdir($dir, @file, 'tree_tagger', 'morpho.xml'), 'Annotation data exists'); |
| 103 | ok(-f catdir($dir, @file, 'tree_tagger', 'sentences.xml'), 'Annotation data exists'); |
| 104 | ok(-f catdir($dir, @file, 'tree_tagger', 'tokens.xml'), 'Annotation data exists'); |
| 105 | |
| 106 | |
| 107 | done_testing; |
| 108 | __END__ |
| 109 | |
| 110 | |