Use ripunzip for unzipping whole archives if available (fixes #16)

Note that ripunzip only for unzipping and listing complete archives.

Resolves #16

Change-Id: I12d120184f7194ca32bcd1ea4e3a76acd5f47e28
diff --git a/Changes b/Changes
index c95879a..724f7b0 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.60 2025-04-22
+        - Partially support ripunzip if available.
+
 0.59 2024-11-14
         - UsePath::Iterator::Rule instead of Mojo::File.
 
diff --git a/Readme.pod b/Readme.pod
index 6939e32..b849760 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -391,6 +391,11 @@
 By providing the number of parallel jobs using C<--jobs>, the execution can be tailored to specific
 hardware environments.
 
+=item Install ripunzip
+
+For full extraction of data, L<ripunzip|https://github.com/google/ripunzip> can be
+used for improved performance.
+
 
 =head1 ANNOTATION SUPPORT
 
diff --git a/lib/KorAP/XML/Archive.pm b/lib/KorAP/XML/Archive.pm
index a1b0526..0fc438d 100644
--- a/lib/KorAP/XML/Archive.pm
+++ b/lib/KorAP/XML/Archive.pm
@@ -6,6 +6,8 @@
 use strict;
 use warnings;
 
+our $RIPUNZIP_AVAILABLE;
+
 # Construct new archive helper
 sub new {
   my $class = shift;
@@ -22,12 +24,27 @@
 };
 
 
-# Check if unzip is installed
-sub test_unzip {
+# Check if classic Info-ZIP unzip is installed
+sub test_InfoZIP_unzip {
   return 1 if grep { -x "$_/unzip"} split /:/, $ENV{PATH};
   return;
 };
 
+# Check if ripunzip is installed
+sub test_ripunzip {
+
+  if (!defined $RIPUNZIP_AVAILABLE) {
+    $RIPUNZIP_AVAILABLE = grep { -x "$_/ripunzip" } split /:/, $ENV{PATH};
+  }
+  return $RIPUNZIP_AVAILABLE;
+}
+
+# Check if unzip is installed (ripunzip can be used only for some tasks)
+sub test_unzip {
+  test_ripunzip();
+  return test_InfoZIP_unzip();
+};
+
 
 # Check the compressed archive
 sub test {
@@ -144,11 +161,12 @@
   my $self = shift;
   my ($quiet, $target_dir, $jobs) = @_;
 
-  my @init_cmd = (
-    'unzip',          # Use unzip program
-    '-qo',            # quietly overwrite all existing files
-    '-uo',
-    '-d', $target_dir # Extract into target directory
+  my @init_cmd = (test_ripunzip() ?
+    # Use ripunzip program
+    ('ripunzip', 'unzip-file', '-q', '-d', $target_dir)
+    :
+    # Use InfoZIP unzip program
+    ('unzip', '-qo', '-uo', '-d', $target_dir)
   );
 
   # Iterate over all attached archives
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index a213982..d245b23 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
 
 our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
 
-our $VERSION = '0.59';
+our $VERSION = '0.60';
 
 has 'path';
 has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/script/korapxml2krill b/script/korapxml2krill
index a40d75a..c522b32 100755
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -1504,6 +1504,12 @@
 By providing the number of parallel jobs using C<--jobs>, the execution can be tailored to specific
 hardware environments.
 
+=item Install ripunzip
+
+For full extraction of data, L<ripunzip|https://github.com/google/ripunzip> can be
+used for improved performance.
+
+
 =head1 ANNOTATION SUPPORT
 
 L<KorAP::XML::Krill> has built-in importer for some annotation foundries and layers