Fix cache command line parameter handling
Change-Id: I72fcaa6f2d25f79282b141daf50b1d3a4caf0188
diff --git a/Changes b/Changes
index aac2050..7fa22db 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,7 @@
+0.54 2023-04-15
+ - Fix 'cache' parameter. (reported by kupietz)
+ - Fix cache deletion for certain scenarios.
+
0.53 2023-03-20
- Added Spacy support. (kupietz)
- Support 'pos' as an alternative to 'ctag'
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index 2a16e6b..286ace1 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
-our $VERSION = '0.53';
+our $VERSION = '0.54';
has 'path';
has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 0a5a05f..2262db4 100755
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -176,7 +176,7 @@
#
# ----------------------------------------------------------
-our $LAST_CHANGE = '2024/03/20';
+our $LAST_CHANGE = '2024/04/15';
our $LOCAL = $FindBin::Bin;
our $KORAL_VERSION = 0.03;
our $VERSION_MSG = <<"VERSION";
@@ -211,7 +211,7 @@
'temporary-extract|te=s' => \($cfg{temporary_extract}),
'skip|s=s' => \@skip,
'sigle|sg=s' => \@sigle,
- 'cache|c=s' => \($cfg{cache_file}),
+ 'cache|c=s' => \($cfg{cache}),
'config|cfg=s' => \(my $cfg_file),
'lang=s' => \($cfg{lang}),
'log|l=s' => \($cfg{log}),
@@ -269,7 +269,7 @@
meta base-sentences base-paragraphs base-pagebreaks
gzip to-tar log lang cache non-word-tokens
non-verbal-tokens sequential-extraction
- temporary-extract cache-init
+ temporary-extract cache-init cache-delete
koral extract-dir jobs quiet!) {
my $underlined = $_ =~ tr/-/_/r;
if (!defined($cfg{$underlined}) && defined $config{$_}) {
@@ -568,7 +568,8 @@
my $cache = Cache::FastMmap->new(
share_file => $cache_file,
cache_size => ($cfg{cache_size} // '50m'),
- init_file => ($cfg{cache_init} // 1)
+ init_file => ($cfg{cache_init} // 1),
+ unlink_on_exit => $cache_delete
);
# Create batch object
@@ -601,7 +602,6 @@
$log->info("Run using $jobs jobs on $cores cores");
};
-
# Glob and prefix files
if (@input > 0) {
@@ -622,7 +622,6 @@
print 'Input is ' . join(', ', @input)."\n" unless $q;
};
-
# Process a single file
unless ($cmd) {
my $input = $input[0];
@@ -648,10 +647,8 @@
# Process file
$batch_file->process($input, $output);
- # Delete cache file
- unlink($cache_file) if $cache_delete;
-
stop_time;
+
exit;
};
@@ -983,9 +980,6 @@
$pool->wait_all_children;
- # Delete cache file
- unlink($cache_file) if $cache_delete;
-
# Close tar filehandle
if ($to_tar && $tar_fh) {
$tar_archive->finish;
@@ -1377,7 +1371,8 @@
Supported parameters are:
C<overwrite>, C<gzip>, C<jobs>, C<input-base>,
-C<token>, C<log>, C<cache>, C<cache-size>, C<cache-delete>, C<meta>,
+C<token>, C<log>,
+C<cache>, C<cache-size>, C<cache-init>, C<cache-delete>, C<meta>,
C<output>, C<koral>,
C<temporary-extract>, C<sequential-extraction>,
C<base-sentences>, C<base-paragraphs>,
diff --git a/t/script/single.t b/t/script/single.t
index 4772c1c..54ae7a1 100644
--- a/t/script/single.t
+++ b/t/script/single.t
@@ -25,7 +25,6 @@
my $output = tmpnam();
my $cache = tmpnam();
-
ok(!-f $output, 'Output does not exist');
my $call = join(
@@ -34,6 +33,7 @@
'--input' => $input,
'--output' => $output,
'--cache' => $cache,
+ '--no-cache-delete',
'-k' => 0.03,
'-t' => 'OpenNLP#Tokens',
'-l' => 'INFO'
@@ -48,6 +48,7 @@
$call
);
+ok(-f $cache, 'Cache does exist');
ok(-f $output, 'Output does exist');
ok((my $file = Mojo::File->new($output)->slurp), 'Slurp data');
ok((my $json = decode_json $file), 'decode json');
@@ -61,6 +62,7 @@
# Delete output
unlink $output;
+unlink $cache;
ok(!-f $output, 'Output does not exist');
$call .= ' -z';