Fix memory leak by using Path::Iterator::Rule

Change-Id: I16967fb0801e2be0491e61c6dfaa28a4ebc4b742
diff --git a/Changes b/Changes
index 5914da4..c95879a 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.59 2024-11-14
+        - UsePath::Iterator::Rule instead of Mojo::File.
+
 0.58 2024-09-11
         - Remove Directory::Iterator and replace it with Mojo::File.
         - Add performance hint.
diff --git a/Makefile.PL b/Makefile.PL
index 8c56aff..f4e3025 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -41,6 +41,7 @@
     'Config::Simple'  => 4.58,
     'String::Random'  => 0.32,
     'File::Path'      => 2.18,
+    'Path::Iterator::Rule' => 1.015,
     'Archive::Tar'    => 2.40,
     'Archive::Tar::Stream' => 0.02,
     'Clone'           => 0.45,
diff --git a/lib/KorAP/XML/ForkPool.pm b/lib/KorAP/XML/ForkPool.pm
index b3453ce..e6d407c 100644
--- a/lib/KorAP/XML/ForkPool.pm
+++ b/lib/KorAP/XML/ForkPool.pm
@@ -1,7 +1,7 @@
 package KorAP::XML::ForkPool;
 use strict;
 use warnings;
-use Mojo::File;
+use Path::Iterator::Rule;
 use Parallel::ForkManager;
 use v5.10;
 
@@ -54,15 +54,19 @@
 
   my @dirs;
 
-  Mojo::File->new($input[0])
-      ->list_tree({hidden => 0, dir => 0})
-      ->grep(qr/\/data\.xml$/)
-      ->each(
-        sub {
-          s/\/data\.xml$//;
-          push @dirs, $_;
-        }
-      );
+  my $rule = Path::Iterator::Rule->new;
+  $rule->name('data.xml')->file;
+
+  my $next = $rule->iter(
+    $input => {
+      sorted => 0,
+      depthfirst => -1,
+      error_handler => undef
+    });
+  while (defined(my $file = $next->())) {
+    $file =~ s/\/data\.xml$//;
+    push @dirs, $file;
+  };
 
   $self->{count} = scalar @dirs;
 
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index d7f4d58..a213982 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
 
 our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
 
-our $VERSION = '0.58';
+our $VERSION = '0.59';
 
 has 'path';
 has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index ff60469..4c82bc5 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -46,6 +46,7 @@
 
 # Check if token is emoji
 sub is_emoji {
+  # return $_[0] =~ m{^\p{gc:S}\p{gc:M}*?(\x{200D}\p{gc:S}\p{gc:M}*?)*$}i;
   return $_[0] =~ m{^(\p{Extended_Pictographic}+\p{Emoji_Modifier}*)$}i;
 };
 
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 30f3384..a40d75a 100755
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -19,12 +19,12 @@
 use KorAP::XML::Tokenizer;
 use KorAP::XML::Batch::File;
 use Config::Simple;
+use Path::Iterator::Rule;
 use Parallel::ForkManager;
 use File::Glob ':bsd_glob';
 use File::Temp qw/tempdir/;
 use File::Path qw(remove_tree make_path);
 use File::Basename;
-use Mojo::File;
 use Mojo::Collection 'c';
 use String::Random qw(random_string);
 use IO::File;
@@ -178,7 +178,7 @@
 # - Improve core count logging.
 # ----------------------------------------------------------
 
-our $LAST_CHANGE = '2024/06/05';
+our $LAST_CHANGE = '2024/11/14';
 our $LOCAL = $FindBin::Bin;
 our $KORAL_VERSION = 0.03;
 our $VERSION_MSG = <<"VERSION";
@@ -879,15 +879,18 @@
 
     my @dirs;
 
-    Mojo::File->new($input[0])
-        ->list_tree({hidden => 0, dir => 0})
-        ->grep(qr/\/data\.xml$/)
-        ->each(
-      sub {
-        s/\/data\.xml$//;
-        push @dirs, $_;
-      }
-    );
+    my $rule = Path::Iterator::Rule->new;
+    $rule->name('data.xml')->file;
+    my $next = $rule->iter(
+      $input[0] => {
+        sorted => 0,
+        depthfirst => -1,
+        error_handler => undef
+      });
+    while (defined(my $file = $next->())) {
+      $file =~ s/\/data\.xml$//;
+      push @dirs, $file;
+    };
 
     print "Start processing ...\n" unless $q;
     $t = Benchmark->new;