Make Archive::Tar::Builder optional

Change-Id: I557617b0884b0b30d70c54003a3afda8c1a6b0e3
diff --git a/Changes b/Changes
index 1b9d1cd..4cce76f 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,8 @@
+0.45 2022-02-24
+        - Due to problems installing Archive::Tar::Builder
+          in certain environments, this is now optional,
+          with a pure perl fallback archiver.
+
 0.44 2022-02-17
         - Improve Gingko Metadata support.
         - Fix data-URIs by always refering to UTF-8.
diff --git a/Makefile.PL b/Makefile.PL
index 41c2fe1..3a90159 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -43,8 +43,8 @@
     'Config::Simple'  => 4.58,
     'String::Random'  => 0.32,
     'File::Path'      => 2.18,
-    'Archive::Tar::Builder' => 2.5005,
     'Archive::Tar'    => 2.40,
+    'Archive::Tar::Stream' => 0.02,
     'Clone'           => 0.45,
     'List::Util'      => 1.60,
     'Scalar::Util'    => 1.60,
diff --git a/Readme.pod b/Readme.pod
index b7445ab..50b9165 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -28,7 +28,8 @@
 In case everything went well, the C<korapxml2krill> tool will
 be available on your command line immediately.
 Minimum requirement for L<KorAP::XML::Krill> is Perl 5.16.
-Optional support for L<Sys::Info> to calculate available cores.
+Optionally installing L<Archive::Tar::Builder> speeds up archive building.
+Optional support for L<Sys::Info> to calculate available cores is available.
 In addition to work with zip archives, the C<unzip> tool needs to be present.
 
 =head1 ARGUMENTS
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index cd53751..229ad79 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
 
 our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
 
-our $VERSION = '0.44';
+our $VERSION = '0.45';
 
 has 'path';
 has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/lib/KorAP/XML/TarBuilder.pm b/lib/KorAP/XML/TarBuilder.pm
new file mode 100644
index 0000000..4f7ba9e
--- /dev/null
+++ b/lib/KorAP/XML/TarBuilder.pm
@@ -0,0 +1,37 @@
+package KorAP::XML::TarBuilder;
+use Archive::Tar::Stream;
+use strict;
+use warnings;
+
+# This is a fallback module for Archive::Tar::Builder
+# that may not be available on certain systems.
+
+# Create a new TarBuilder object
+sub new {
+  my $class = shift;
+  my $fh = shift;
+  my $tar = Archive::Tar::Stream->new(outfh => $fh);
+  bless \$tar, $class;
+};
+
+
+# Archive a file
+sub archive_as {
+  my $self = shift;
+  my ($datafile, $tarfilename) = @_;
+  if (open(my $fh, $datafile)) {
+    $$self->AddFile($tarfilename, -s $datafile, $fh);
+    close $fh;
+    return 1;
+  };
+  return;
+};
+
+
+# Finish the Tar stream
+sub finish {
+  my $self = shift;
+  return $$self->FinishTar;
+};
+
+1;
diff --git a/script/korapxml2krill b/script/korapxml2krill
index d09786c..a4a8276 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -16,6 +16,7 @@
 use Directory::Iterator;
 use KorAP::XML::Krill qw!get_file_name get_file_name_from_glob!;
 use KorAP::XML::Archive;
+use KorAP::XML::TarBuilder;
 use KorAP::XML::Tokenizer;
 use KorAP::XML::Batch::File;
 use Config::Simple;
@@ -27,7 +28,6 @@
 use Mojo::Collection 'c';
 use String::Random qw(random_string);
 use IO::File;
-use Archive::Tar::Builder;
 use Fcntl qw(:flock SEEK_END);
 
 # use KorAP::XML::ForkPool;
@@ -160,7 +160,7 @@
 # - Introduced support for Gingko
 # ----------------------------------------------------------
 
-our $LAST_CHANGE = '2022/01/17';
+our $LAST_CHANGE = '2022/02/24';
 our $LOCAL = $FindBin::Bin;
 our $KORAL_VERSION = 0.03;
 our $VERSION_MSG = <<"VERSION";
@@ -725,9 +725,6 @@
 
   # Initialize tar archive
   if ($to_tar) {
-    $tar_archive = Archive::Tar::Builder->new(
-      ignore_errors => 1
-    );
 
     # Set output name
     my $tar_file = $output;
@@ -740,8 +737,22 @@
     $tar_fh = IO::File->new($tar_file, 'w');
     $tar_fh->binmode(1);
 
-    # Set handle
-    $tar_archive->set_handle($tar_fh);
+    # Use tar builder for archiving
+    if (eval("use Archive::Tar::Builder; 1;")) {
+      $tar_archive = Archive::Tar::Builder->new(
+        ignore_errors => 1
+      );
+
+      # Set handle
+      $tar_archive->set_handle($tar_fh);
+    }
+
+    # Fallback solution
+    else {
+      $tar_archive = KorAP::XML::TarBuilder->new(
+        $tar_fh
+      );
+    };
 
     # Output to temporary directory
     $output_dir = File::Temp->newdir;
@@ -1059,7 +1070,8 @@
 In case everything went well, the C<korapxml2krill> tool will
 be available on your command line immediately.
 Minimum requirement for L<KorAP::XML::Krill> is Perl 5.16.
-Optional support for L<Sys::Info> to calculate available cores.
+Optionally installing L<Archive::Tar::Builder> speeds up archive building.
+Optional support for L<Sys::Info> to calculate available cores is available.
 In addition to work with zip archives, the C<unzip> tool needs to be present.
 
 =head1 ARGUMENTS
@@ -1317,7 +1329,7 @@
 C<overwrite>, C<gzip>, C<jobs>, C<input-base>,
 C<token>, C<log>, C<cache>, C<cache-size>, C<cache-delete>, C<meta>,
 C<output>, C<koral>,
-C<tempary-extract>, C<sequential-extraction>,
+C<temporary-extract>, C<sequential-extraction>,
 C<base-sentences>, C<base-paragraphs>,
 C<base-pagebreaks>,
 C<skip> (semicolon separated), C<sigle>
diff --git a/t/corpus/artificial/header.xml b/t/corpus/artificial/header.xml
index 589e75e..10e14f8 100644
--- a/t/corpus/artificial/header.xml
+++ b/t/corpus/artificial/header.xml
@@ -4,7 +4,7 @@
 <idsHeader type="text" pattern="text" status="new" version="1.1" TEIform="teiHeader">
   <fileDesc>
     <titleStmt>
-      <textSigle>ART/ABC.0001</textSigle>
+      <textSigle>ART/ABC.00001</textSigle>
       <t.title assemblage="regular"/>
     </titleStmt>
     <publicationStmt>
diff --git a/t/tar_builder.t b/t/tar_builder.t
new file mode 100644
index 0000000..9413440
--- /dev/null
+++ b/t/tar_builder.t
@@ -0,0 +1,87 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Test::More;
+
+use File::Basename 'dirname';
+use File::Spec::Functions qw/catfile catdir/;
+use File::Temp qw/tempdir tempfile/;
+
+use_ok('Archive::Tar');
+
+my ($out_tar, $out_tar_fn) = tempfile();
+
+use_ok('KorAP::XML::TarBuilder');
+
+ok(my $tar = KorAP::XML::TarBuilder->new($out_tar), 'Create new tar');
+
+is(ref $tar, 'KorAP::XML::TarBuilder');
+
+my $file = catfile(dirname(__FILE__), 'corpus','artificial', 'data.xml');
+ok($tar->archive_as($file, 'example1.xml'));
+
+$file = catfile(dirname(__FILE__), 'corpus','artificial', 'header.xml');
+ok($tar->archive_as($file, 'example2.xml'));
+
+ok($tar->finish, 'Finish tar');
+
+use_ok('Archive::Tar');
+
+my $tar_read = Archive::Tar->new($out_tar_fn);
+
+ok($tar_read->contains_file('example1.xml'), 'File exists');
+ok($tar_read->contains_file('example2.xml'), 'File exists');
+
+my $content = $tar_read->get_content('example1.xml');
+like($content, qr!ART_ABC\.00001!, 'Content is correct');
+
+$content = $tar_read->get_content('example2.xml');
+like($content, qr!ART\/ABC\.00001!, 'Content is correct');
+
+
+
+
+# Now test for equivalence to Archive::Tar::Builder
+if (eval("use Archive::Tar::Builder; 1;")) {
+
+  use_ok('Archive::Tar::Builder');
+
+  # Reset
+  ($out_tar, $out_tar_fn) = tempfile();
+
+  $tar = Archive::Tar::Builder->new(
+    ignore_errors => 1
+  );
+
+  # Set handle
+  $tar->set_handle($out_tar);
+
+  is(ref $tar, 'Archive::Tar::Builder');
+
+  $file = catfile(dirname(__FILE__), 'corpus','artificial', 'data.xml');
+  ok($tar->archive_as($file, 'example1.xml'));
+
+  $file = catfile(dirname(__FILE__), 'corpus','artificial', 'header.xml');
+  ok($tar->archive_as($file, 'example2.xml'));
+
+  ok($tar->finish, 'Finish tar');
+
+  use_ok('Archive::Tar');
+
+  $tar_read = Archive::Tar->new($out_tar_fn);
+
+  ok($tar_read->contains_file('example1.xml'), 'File exists');
+  ok($tar_read->contains_file('example2.xml'), 'File exists');
+
+  $content = $tar_read->get_content('example1.xml');
+  like($content, qr!ART_ABC\.00001!, 'Content is correct');
+
+  $content = $tar_read->get_content('example2.xml');
+  like($content, qr!ART\/ABC\.00001!, 'Content is correct');
+}
+else {
+  diag 'Archive::Tar::Builder not installed.';
+};
+
+done_testing;
+