Use ripunzip for unzipping whole archives if available (fixes #16)
Note that ripunzip only for unzipping and listing complete archives.
Resolves #16
Change-Id: I12d120184f7194ca32bcd1ea4e3a76acd5f47e28
diff --git a/Changes b/Changes
index c95879a..724f7b0 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.60 2025-04-22
+ - Partially support ripunzip if available.
+
0.59 2024-11-14
- UsePath::Iterator::Rule instead of Mojo::File.
diff --git a/Readme.pod b/Readme.pod
index 6939e32..b849760 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -391,6 +391,11 @@
By providing the number of parallel jobs using C<--jobs>, the execution can be tailored to specific
hardware environments.
+=item Install ripunzip
+
+For full extraction of data, L<ripunzip|https://github.com/google/ripunzip> can be
+used for improved performance.
+
=head1 ANNOTATION SUPPORT
diff --git a/lib/KorAP/XML/Archive.pm b/lib/KorAP/XML/Archive.pm
index a1b0526..0fc438d 100644
--- a/lib/KorAP/XML/Archive.pm
+++ b/lib/KorAP/XML/Archive.pm
@@ -6,6 +6,8 @@
use strict;
use warnings;
+our $RIPUNZIP_AVAILABLE;
+
# Construct new archive helper
sub new {
my $class = shift;
@@ -22,12 +24,27 @@
};
-# Check if unzip is installed
-sub test_unzip {
+# Check if classic Info-ZIP unzip is installed
+sub test_InfoZIP_unzip {
return 1 if grep { -x "$_/unzip"} split /:/, $ENV{PATH};
return;
};
+# Check if ripunzip is installed
+sub test_ripunzip {
+
+ if (!defined $RIPUNZIP_AVAILABLE) {
+ $RIPUNZIP_AVAILABLE = grep { -x "$_/ripunzip" } split /:/, $ENV{PATH};
+ }
+ return $RIPUNZIP_AVAILABLE;
+}
+
+# Check if unzip is installed (ripunzip can be used only for some tasks)
+sub test_unzip {
+ test_ripunzip();
+ return test_InfoZIP_unzip();
+};
+
# Check the compressed archive
sub test {
@@ -144,11 +161,12 @@
my $self = shift;
my ($quiet, $target_dir, $jobs) = @_;
- my @init_cmd = (
- 'unzip', # Use unzip program
- '-qo', # quietly overwrite all existing files
- '-uo',
- '-d', $target_dir # Extract into target directory
+ my @init_cmd = (test_ripunzip() ?
+ # Use ripunzip program
+ ('ripunzip', 'unzip-file', '-q', '-d', $target_dir)
+ :
+ # Use InfoZIP unzip program
+ ('unzip', '-qo', '-uo', '-d', $target_dir)
);
# Iterate over all attached archives
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index a213982..d245b23 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
-our $VERSION = '0.59';
+our $VERSION = '0.60';
has 'path';
has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/script/korapxml2krill b/script/korapxml2krill
index a40d75a..c522b32 100755
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -1504,6 +1504,12 @@
By providing the number of parallel jobs using C<--jobs>, the execution can be tailored to specific
hardware environments.
+=item Install ripunzip
+
+For full extraction of data, L<ripunzip|https://github.com/google/ripunzip> can be
+used for improved performance.
+
+
=head1 ANNOTATION SUPPORT
L<KorAP::XML::Krill> has built-in importer for some annotation foundries and layers