| Akron | 797e807 | 2020-02-13 07:59:40 +0100 | [diff] [blame] | 1 | use strict; | 
 | 2 | use warnings; | 
 | 3 | use File::Basename 'dirname'; | 
 | 4 | use File::Spec::Functions qw/catfile/; | 
| Akron | 2a60c53 | 2020-02-13 15:52:18 +0100 | [diff] [blame^] | 5 | use File::Temp ':POSIX'; | 
 | 6 | use IO::Uncompress::Unzip qw(unzip $UnzipError); | 
| Akron | 797e807 | 2020-02-13 07:59:40 +0100 | [diff] [blame] | 7 |  | 
 | 8 | use Test::More; | 
 | 9 | use Test::Output; | 
 | 10 |  | 
| Akron | 2a60c53 | 2020-02-13 15:52:18 +0100 | [diff] [blame^] | 11 | # TODO: | 
 | 12 | #   This is nearly unusable slow and due to namespaces extremely verbose | 
 | 13 | #   - probably better switch to something based on Test::Mojo | 
 | 14 | use Test::XML::Simple; | 
 | 15 |  | 
| Akron | 797e807 | 2020-02-13 07:59:40 +0100 | [diff] [blame] | 16 | my $f = dirname(__FILE__); | 
 | 17 | my $script = catfile($f, '..', 'script', 'tei2korapxml'); | 
 | 18 | ok(-f $script, 'Script found'); | 
 | 19 |  | 
 | 20 | stderr_is( | 
 | 21 |   sub { system('perl', $script, '--help') }, | 
 | 22 |   "This program is called from inside another script.\n", | 
 | 23 |   'Help' | 
 | 24 | ); | 
 | 25 |  | 
| Akron | 2a60c53 | 2020-02-13 15:52:18 +0100 | [diff] [blame^] | 26 | # Load example file | 
 | 27 | my $file = catfile($f, 'data', 'goe_sample.i5.xml'); | 
 | 28 | my $outzip = tmpnam(); | 
 | 29 |  | 
 | 30 | # Generate zip file (unportable!) | 
 | 31 | stderr_like( | 
 | 32 |   sub { `cat '$file' | perl '$script' > '$outzip'` }, | 
 | 33 |   qr!tei2korapxml: .*? text_id=GOE_AGA\.00000!, | 
 | 34 |   'Processing' | 
 | 35 | ); | 
 | 36 |  | 
 | 37 | # Uncompress GOE/header.xml from zip file | 
 | 38 | my $zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/header.xml'); | 
 | 39 |  | 
 | 40 | ok($zip, 'Zip-File is created'); | 
 | 41 |  | 
 | 42 | # Read GOE/header.xml | 
 | 43 | my $header_xml = ''; | 
 | 44 | $header_xml .= $zip->getline while !$zip->eof; | 
 | 45 | ok($zip->close, 'Closed'); | 
 | 46 |  | 
 | 47 | xml_is($header_xml, '//korpusSigle', 'GOE', 'korpusSigle'); | 
 | 48 | xml_is($header_xml, '//h.title[@type="main"]', 'Goethes Werke', 'h.title'); | 
 | 49 | xml_is($header_xml, '//h.author', 'Goethe, Johann Wolfgang von', 'h.author'); | 
 | 50 | xml_is($header_xml, '//pubDate[@type="year"]', '1982', 'pubDate'); | 
 | 51 |  | 
 | 52 |  | 
 | 53 | # Uncompress GOE/AGA/header.xml from zip file | 
 | 54 | $zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/header.xml'); | 
 | 55 |  | 
 | 56 | ok($zip, 'Zip-File is found'); | 
 | 57 |  | 
 | 58 | # Read GOE/AGA/header.xml | 
 | 59 | $header_xml = ''; | 
 | 60 | $header_xml .= $zip->getline while !$zip->eof; | 
 | 61 | ok($zip->close, 'Closed'); | 
 | 62 |  | 
 | 63 | xml_is($header_xml, '//dokumentSigle', 'GOE/AGA', 'dokumentSigle'); | 
 | 64 | xml_is($header_xml, '//d.title', 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)', 'd.title'); | 
 | 65 | xml_is($header_xml, '//creatDate', '1820-1822', 'creatDate'); | 
 | 66 |  | 
 | 67 |  | 
 | 68 | # Uncompress GOE/AGA/00000/header.xml from zip file | 
 | 69 | $zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/header.xml'); | 
 | 70 |  | 
 | 71 | ok($zip, 'Zip-File is found'); | 
 | 72 |  | 
 | 73 | # Read GOE/AGA/00000/header.xml | 
 | 74 | $header_xml = ''; | 
 | 75 | $header_xml .= $zip->getline while !$zip->eof; | 
 | 76 | ok($zip->close, 'Closed'); | 
 | 77 |  | 
 | 78 | # This is slow - should be improved for more tests | 
 | 79 | xml_is($header_xml, '//textSigle', 'GOE/AGA.00000', 'textSigle'); | 
 | 80 | xml_is($header_xml, '//analytic/h.title[@type="main"]', 'Campagne in Frankreich', 'h.title'); | 
 | 81 |  | 
 | 82 |  | 
 | 83 | # Uncompress GOE/AGA/00000/data.xml from zip file | 
 | 84 | $zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/data.xml'); | 
 | 85 |  | 
 | 86 | ok($zip, 'Zip-File is found'); | 
 | 87 |  | 
 | 88 | # Read GOE/AGA/00000/data.xml | 
 | 89 | my $data_xml = ''; | 
 | 90 | $data_xml .= $zip->getline while !$zip->eof; | 
 | 91 | ok($zip->close, 'Closed'); | 
 | 92 |  | 
 | 93 | xml_node($data_xml, '/*[name()="raw_text" and @docid="GOE_AGA.00000"]', 'text id'); | 
 | 94 | xml_like($data_xml, '/*[local-name()="raw_text"]/*[local-name()="text"]', qr!^Campagne in Frankreich 1792.*?uns allein begl.cke\.$!, 'text content'); | 
 | 95 |  | 
 | 96 | # Uncompress GOE/AGA/00000/struct/structure.xml from zip file | 
 | 97 | $zip = IO::Uncompress::Unzip->new($outzip, Name => 'GOE/AGA/00000/struct/structure.xml'); | 
 | 98 |  | 
 | 99 | ok($zip, 'Zip-File is found'); | 
 | 100 |  | 
 | 101 | # Read GOE/AGA/00000/struct/structure.xml | 
 | 102 | my $struct_xml = ''; | 
 | 103 | $struct_xml .= $zip->getline while !$zip->eof; | 
 | 104 | ok($zip->close, 'Closed'); | 
 | 105 |  | 
 | 106 | xml_is($struct_xml, '//*[name()="span" and @id="s3"]//*[@name="type"]', 'Autobiographie', 'text content'); | 
 | 107 |  | 
 | 108 |  | 
| Akron | 797e807 | 2020-02-13 07:59:40 +0100 | [diff] [blame] | 109 |  | 
 | 110 | done_testing; |