Improve core counting logging
Change-Id: I3762f25ef8c6f90dc59b1f299efe44d4ccb829bc
diff --git a/Changes b/Changes
index 7fa22db..d52de30 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,8 @@
0.54 2023-04-15
- Fix 'cache' parameter. (reported by kupietz)
- Fix cache deletion for certain scenarios.
+ - Improve information on the number of jobs
+ running in parallel.
0.53 2023-03-20
- Added Spacy support. (kupietz)
diff --git a/Readme.pod b/Readme.pod
index 28db741..ccf61c3 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -210,14 +210,22 @@
If C<sequential-extraction> is not set to true, this will
also apply to extraction.
-Pass -1, and the value will be set automatically to 5
+Pass C<-1>, and the value will be set automatically to 5
times the number of available cores, in case L<Sys::Info>
-is available. Be aware, that the report of available cores
+is available and can read CPU count (see C<--job-count>).
+Be aware, that the report of available cores
may not work in certain conditions. Benchmarking the processing
speed based on the number of jobs may be valuable.
+
This is I<experimental>.
+=item B<--job-count|-jc>
+
+Print job and core information that would be used if
+C<-1> was passed to C<--jobs>.
+
+
=item B<--koral|-k>
Version of the output format. Supported versions are:
diff --git a/script/korapxml2krill b/script/korapxml2krill
index c468b01..6c8da84 100755
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -174,6 +174,8 @@
# 2024/03/20
# - Added Spacy support.
#
+# 2024/03/22
+# - Improve core count logging.
# ----------------------------------------------------------
our $LAST_CHANGE = '2024/04/15';
@@ -196,6 +198,26 @@
# Configuration hash
my %cfg = ();
+# Count jobs/cores if not set
+sub count_jobs {
+ my ($cores, $jobs);
+ my $msg = 'Unable to determine number of cores - set to 1';
+ if (eval("use Sys::Info; 1;") && eval("use Sys::Info::Constants qw( :device_cpu ); 1;")) {
+ $cores = Sys::Info->new->device('CPU')->count;
+ if ($cores <= 0) {
+ $log->error($msg);
+ $cores = 1;
+ }
+ }
+ else {
+ $log->error($msg);
+ $cores = 1;
+ };
+
+ $jobs = ceil(5 * $cores);
+ return $jobs, "Run using $jobs jobs on $cores cores";
+}
+
# Parse options from the command line
GetOptions(
'input|i=s' => \@input,
@@ -245,6 +267,14 @@
-verbose => 0,
-msg => $VERSION_MSG,
-output => '-'
+ ),
+ },
+ 'job-count|jc' => sub {
+ my ($j, $msg) = count_jobs();
+ pod2usage(
+ -verbose => 0,
+ -msg => $msg,
+ -output => '-'
)
}
);
@@ -590,16 +620,8 @@
# Auto adjust jobs
if ($jobs eq '-1') {
- my $cores = 1;
- if (eval("use Sys::Info; 1;") && eval("use Sys::Info::Constants qw( :device_cpu ); 1;")) {
- $cores = Sys::Info->new->device('CPU')->count;
- }
- else {
- $log->warn("Unable to determine number of cores");
- };
-
- $jobs = ceil(5 * $cores);
- $log->info("Run using $jobs jobs on $cores cores");
+ ($jobs, my $msg) = count_jobs();
+ print $msg . "\n" unless $q;
};
# Glob and prefix files
@@ -1296,14 +1318,22 @@
If C<sequential-extraction> is not set to true, this will
also apply to extraction.
-Pass -1, and the value will be set automatically to 5
+Pass C<-1>, and the value will be set automatically to 5
times the number of available cores, in case L<Sys::Info>
-is available. Be aware, that the report of available cores
+is available and can read CPU count (see C<--job-count>).
+Be aware, that the report of available cores
may not work in certain conditions. Benchmarking the processing
speed based on the number of jobs may be valuable.
+
This is I<experimental>.
+=item B<--job-count|-jc>
+
+Print job and core information that would be used if
+C<-1> was passed to C<--jobs>.
+
+
=item B<--koral|-k>
Version of the output format. Supported versions are: