Remove Directory::Iterator in korapxml2krill
Change-Id: Ia372478fec6dd62e40c90a0f9bdd1a15aa445b9d
diff --git a/Changes b/Changes
index 927bf37..837c1f7 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.58 2024-09-11
+ - Remove Directory::Iterator and replace it with Mojo::File.
+
0.57 2024-07-26
- Support award notes in i5.
- Add support for idno (with @rend) in i5.
diff --git a/Makefile.PL b/Makefile.PL
index 2ca4078..8c56aff 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -27,8 +27,6 @@
'IO::Uncompress::Gunzip' => 2.102,
'IO::Dir::Recursive' => 0.03,
'File::Temp' => 0.2311,
- 'Directory::Iterator' => 1,
- 'Directory::Iterator::XS' => 1,
'Module::Install' => 1.19,
'Dumbbench' => 0.501,
'Benchmark' => 0,
diff --git a/lib/KorAP/XML/ForkPool.pm b/lib/KorAP/XML/ForkPool.pm
index b3acef8..b3453ce 100644
--- a/lib/KorAP/XML/ForkPool.pm
+++ b/lib/KorAP/XML/ForkPool.pm
@@ -1,6 +1,7 @@
package KorAP::XML::ForkPool;
use strict;
use warnings;
+use Mojo::File;
use Parallel::ForkManager;
use v5.10;
@@ -51,17 +52,17 @@
print "Reading data ...\n";
- my $it = Directory::Iterator->new($input);
my @dirs;
- my $dir;
- while (1) {
- if (!$it->is_directory && ($dir = $it->get) && $dir =~ s{/data\.xml$}{}) {
- push @dirs, $dir;
- $it->prune;
- };
- last unless $it->next;
- };
+ Mojo::File->new($input[0])
+ ->list_tree({hidden => 0, dir => 0})
+ ->grep(qr/\/data\.xml$/)
+ ->each(
+ sub {
+ s/\/data\.xml$//;
+ push @dirs, $_;
+ }
+ );
$self->{count} = scalar @dirs;
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index a613292..d7f4d58 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
-our $VERSION = '0.57';
+our $VERSION = '0.58';
has 'path';
has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 447b6c4..f67b8b5 100755
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -13,7 +13,6 @@
use Log::Any::Adapter;
use Pod::Usage;
use Cache::FastMmap;
-use Directory::Iterator;
use KorAP::XML::Krill qw!get_file_name get_file_name_from_glob!;
use KorAP::XML::Archive;
use KorAP::XML::TarBuilder;
@@ -25,6 +24,7 @@
use File::Temp qw/tempdir/;
use File::Path qw(remove_tree make_path);
use File::Basename;
+use Mojo::File;
use Mojo::Collection 'c';
use String::Random qw(random_string);
use IO::File;
@@ -876,20 +876,18 @@
# Input is a directory
if (-d $input[0]) {
- # TODO:
- # Replace with Mojo::File
- my $it = Directory::Iterator->new($input[0]);
- my @dirs;
- my $dir;
- # Todo: Make a DO WHILE
- while (1) {
- if (!$it->is_directory && ($dir = $it->get) && $dir =~ s{/data\.xml$}{}) {
- push @dirs, $dir;
- $it->prune;
- };
- last unless $it->next;
- };
+ my @dirs;
+
+ Mojo::File->new($input[0])
+ ->list_tree({hidden => 0, dir => 0})
+ ->grep(qr/\/data\.xml$/)
+ ->each(
+ sub {
+ s/\/data\.xml$//;
+ push @dirs, $_;
+ }
+ );
print "Start processing ...\n" unless $q;
$t = Benchmark->new;
@@ -1170,7 +1168,7 @@
$ korapxml2krill serial -i <archive1> -i <archive2> -o <directory> -cfg <config-file>
-Convert archives sequentially. The inputs are not merged but treated
+Convert archives in serial. The inputs are not merged but treated
as they are (so they may be premerged or globs).
the C<--out> directory is treated as the base directory where subdirectories
are created based on the archive name. In case the C<--to-tar> flag is given,
@@ -1481,7 +1479,6 @@
=back
-
=head1 ANNOTATION SUPPORT
L<KorAP::XML::Krill> has built-in importer for some annotation foundries and layers