Make Archive::Tar::Builder optional
Change-Id: I557617b0884b0b30d70c54003a3afda8c1a6b0e3
diff --git a/Changes b/Changes
index 1b9d1cd..4cce76f 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,8 @@
+0.45 2022-02-24
+ - Due to problems installing Archive::Tar::Builder
+ in certain environments, this is now optional,
+ with a pure perl fallback archiver.
+
0.44 2022-02-17
- Improve Gingko Metadata support.
- Fix data-URIs by always refering to UTF-8.
diff --git a/Makefile.PL b/Makefile.PL
index 41c2fe1..3a90159 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -43,8 +43,8 @@
'Config::Simple' => 4.58,
'String::Random' => 0.32,
'File::Path' => 2.18,
- 'Archive::Tar::Builder' => 2.5005,
'Archive::Tar' => 2.40,
+ 'Archive::Tar::Stream' => 0.02,
'Clone' => 0.45,
'List::Util' => 1.60,
'Scalar::Util' => 1.60,
diff --git a/Readme.pod b/Readme.pod
index b7445ab..50b9165 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -28,7 +28,8 @@
In case everything went well, the C<korapxml2krill> tool will
be available on your command line immediately.
Minimum requirement for L<KorAP::XML::Krill> is Perl 5.16.
-Optional support for L<Sys::Info> to calculate available cores.
+Optionally installing L<Archive::Tar::Builder> speeds up archive building.
+Optional support for L<Sys::Info> to calculate available cores is available.
In addition to work with zip archives, the C<unzip> tool needs to be present.
=head1 ARGUMENTS
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index cd53751..229ad79 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
-our $VERSION = '0.44';
+our $VERSION = '0.45';
has 'path';
has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/lib/KorAP/XML/TarBuilder.pm b/lib/KorAP/XML/TarBuilder.pm
new file mode 100644
index 0000000..4f7ba9e
--- /dev/null
+++ b/lib/KorAP/XML/TarBuilder.pm
@@ -0,0 +1,37 @@
+package KorAP::XML::TarBuilder;
+use Archive::Tar::Stream;
+use strict;
+use warnings;
+
+# This is a fallback module for Archive::Tar::Builder
+# that may not be available on certain systems.
+
+# Create a new TarBuilder object
+sub new {
+ my $class = shift;
+ my $fh = shift;
+ my $tar = Archive::Tar::Stream->new(outfh => $fh);
+ bless \$tar, $class;
+};
+
+
+# Archive a file
+sub archive_as {
+ my $self = shift;
+ my ($datafile, $tarfilename) = @_;
+ if (open(my $fh, $datafile)) {
+ $$self->AddFile($tarfilename, -s $datafile, $fh);
+ close $fh;
+ return 1;
+ };
+ return;
+};
+
+
+# Finish the Tar stream
+sub finish {
+ my $self = shift;
+ return $$self->FinishTar;
+};
+
+1;
diff --git a/script/korapxml2krill b/script/korapxml2krill
index d09786c..a4a8276 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -16,6 +16,7 @@
use Directory::Iterator;
use KorAP::XML::Krill qw!get_file_name get_file_name_from_glob!;
use KorAP::XML::Archive;
+use KorAP::XML::TarBuilder;
use KorAP::XML::Tokenizer;
use KorAP::XML::Batch::File;
use Config::Simple;
@@ -27,7 +28,6 @@
use Mojo::Collection 'c';
use String::Random qw(random_string);
use IO::File;
-use Archive::Tar::Builder;
use Fcntl qw(:flock SEEK_END);
# use KorAP::XML::ForkPool;
@@ -160,7 +160,7 @@
# - Introduced support for Gingko
# ----------------------------------------------------------
-our $LAST_CHANGE = '2022/01/17';
+our $LAST_CHANGE = '2022/02/24';
our $LOCAL = $FindBin::Bin;
our $KORAL_VERSION = 0.03;
our $VERSION_MSG = <<"VERSION";
@@ -725,9 +725,6 @@
# Initialize tar archive
if ($to_tar) {
- $tar_archive = Archive::Tar::Builder->new(
- ignore_errors => 1
- );
# Set output name
my $tar_file = $output;
@@ -740,8 +737,22 @@
$tar_fh = IO::File->new($tar_file, 'w');
$tar_fh->binmode(1);
- # Set handle
- $tar_archive->set_handle($tar_fh);
+ # Use tar builder for archiving
+ if (eval("use Archive::Tar::Builder; 1;")) {
+ $tar_archive = Archive::Tar::Builder->new(
+ ignore_errors => 1
+ );
+
+ # Set handle
+ $tar_archive->set_handle($tar_fh);
+ }
+
+ # Fallback solution
+ else {
+ $tar_archive = KorAP::XML::TarBuilder->new(
+ $tar_fh
+ );
+ };
# Output to temporary directory
$output_dir = File::Temp->newdir;
@@ -1059,7 +1070,8 @@
In case everything went well, the C<korapxml2krill> tool will
be available on your command line immediately.
Minimum requirement for L<KorAP::XML::Krill> is Perl 5.16.
-Optional support for L<Sys::Info> to calculate available cores.
+Optionally installing L<Archive::Tar::Builder> speeds up archive building.
+Optional support for L<Sys::Info> to calculate available cores is available.
In addition to work with zip archives, the C<unzip> tool needs to be present.
=head1 ARGUMENTS
@@ -1317,7 +1329,7 @@
C<overwrite>, C<gzip>, C<jobs>, C<input-base>,
C<token>, C<log>, C<cache>, C<cache-size>, C<cache-delete>, C<meta>,
C<output>, C<koral>,
-C<tempary-extract>, C<sequential-extraction>,
+C<temporary-extract>, C<sequential-extraction>,
C<base-sentences>, C<base-paragraphs>,
C<base-pagebreaks>,
C<skip> (semicolon separated), C<sigle>
diff --git a/t/corpus/artificial/header.xml b/t/corpus/artificial/header.xml
index 589e75e..10e14f8 100644
--- a/t/corpus/artificial/header.xml
+++ b/t/corpus/artificial/header.xml
@@ -4,7 +4,7 @@
<idsHeader type="text" pattern="text" status="new" version="1.1" TEIform="teiHeader">
<fileDesc>
<titleStmt>
- <textSigle>ART/ABC.0001</textSigle>
+ <textSigle>ART/ABC.00001</textSigle>
<t.title assemblage="regular"/>
</titleStmt>
<publicationStmt>
diff --git a/t/tar_builder.t b/t/tar_builder.t
new file mode 100644
index 0000000..9413440
--- /dev/null
+++ b/t/tar_builder.t
@@ -0,0 +1,87 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Test::More;
+
+use File::Basename 'dirname';
+use File::Spec::Functions qw/catfile catdir/;
+use File::Temp qw/tempdir tempfile/;
+
+use_ok('Archive::Tar');
+
+my ($out_tar, $out_tar_fn) = tempfile();
+
+use_ok('KorAP::XML::TarBuilder');
+
+ok(my $tar = KorAP::XML::TarBuilder->new($out_tar), 'Create new tar');
+
+is(ref $tar, 'KorAP::XML::TarBuilder');
+
+my $file = catfile(dirname(__FILE__), 'corpus','artificial', 'data.xml');
+ok($tar->archive_as($file, 'example1.xml'));
+
+$file = catfile(dirname(__FILE__), 'corpus','artificial', 'header.xml');
+ok($tar->archive_as($file, 'example2.xml'));
+
+ok($tar->finish, 'Finish tar');
+
+use_ok('Archive::Tar');
+
+my $tar_read = Archive::Tar->new($out_tar_fn);
+
+ok($tar_read->contains_file('example1.xml'), 'File exists');
+ok($tar_read->contains_file('example2.xml'), 'File exists');
+
+my $content = $tar_read->get_content('example1.xml');
+like($content, qr!ART_ABC\.00001!, 'Content is correct');
+
+$content = $tar_read->get_content('example2.xml');
+like($content, qr!ART\/ABC\.00001!, 'Content is correct');
+
+
+
+
+# Now test for equivalence to Archive::Tar::Builder
+if (eval("use Archive::Tar::Builder; 1;")) {
+
+ use_ok('Archive::Tar::Builder');
+
+ # Reset
+ ($out_tar, $out_tar_fn) = tempfile();
+
+ $tar = Archive::Tar::Builder->new(
+ ignore_errors => 1
+ );
+
+ # Set handle
+ $tar->set_handle($out_tar);
+
+ is(ref $tar, 'Archive::Tar::Builder');
+
+ $file = catfile(dirname(__FILE__), 'corpus','artificial', 'data.xml');
+ ok($tar->archive_as($file, 'example1.xml'));
+
+ $file = catfile(dirname(__FILE__), 'corpus','artificial', 'header.xml');
+ ok($tar->archive_as($file, 'example2.xml'));
+
+ ok($tar->finish, 'Finish tar');
+
+ use_ok('Archive::Tar');
+
+ $tar_read = Archive::Tar->new($out_tar_fn);
+
+ ok($tar_read->contains_file('example1.xml'), 'File exists');
+ ok($tar_read->contains_file('example2.xml'), 'File exists');
+
+ $content = $tar_read->get_content('example1.xml');
+ like($content, qr!ART_ABC\.00001!, 'Content is correct');
+
+ $content = $tar_read->get_content('example2.xml');
+ like($content, qr!ART\/ABC\.00001!, 'Content is correct');
+}
+else {
+ diag 'Archive::Tar::Builder not installed.';
+};
+
+done_testing;
+