Add auto-core-calculation
Change-Id: Ia74663afee12eb7a650a36fc270e2f5644b7150f
diff --git a/Makefile.PL b/Makefile.PL
index eb8ecc3..56ab8ee 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -34,7 +34,8 @@
'utf8' => 0,
'bytes' => 0,
'Pod::Usage' => 0,
- 'Cache::FastMmap' => 1.40
+ 'Cache::FastMmap' => 1.40,
+ 'Sys::Info' => 0.78
},
MIN_PERL_VERSION => '5.014',
test => {
diff --git a/lib/KorAP/XML/ForkPool.pm b/lib/KorAP/XML/ForkPool.pm
index e60135f..c42ea51 100644
--- a/lib/KorAP/XML/ForkPool.pm
+++ b/lib/KorAP/XML/ForkPool.pm
@@ -2,7 +2,9 @@
use strict;
use warnings;
use Parallel::ForkManager;
-
+use v5.10;
+use Sys::Info;
+use Sys::Info::Constants qw( :device_cpu );
# Construct a new fork pool
sub new {
@@ -31,7 +33,7 @@
my $data = pop;
print 'Convert [' . ($self->{jobs} > 0 ? "\$$pid:" : '') .
- ($self->{iter}++) . '/' . $self->{count} . ']';
+ ($self->{iter}++) . '/' . $self->{count} . ']';
print ($code ? " $code" : '') . " $$data\n";
}
);
@@ -66,14 +68,14 @@
unless ($self->{overwrite}) {
my $filename = catfile(
- $output,
- get_file_name($dirs[$i]) . '.json' . ($gzip ? '.gz' : '')
+ $output,
+ get_file_name($dirs[$i]) . '.json' . ($gzip ? '.gz' : '')
);
if (-e $filename) {
- $iter++;
- print "Skip $filename\n";
- next;
+ $iter++;
+ print "Skip $filename\n";
+ next;
};
};
@@ -121,17 +123,17 @@
unless ($self->{overwrite}) {
my $filename = catfile(
- $output,
- get_file_name(
- catfile($corpus, $doc, $text)
- . '.json' . ($gzip ? '.gz' : '')
- )
+ $output,
+ get_file_name(
+ catfile($corpus, $doc, $text)
+ . '.json' . ($gzip ? '.gz' : '')
+ )
);
if (-e $filename) {
- $iter++;
- print "Skip $filename\n";
- next;
+ $iter++;
+ print "Skip $filename\n";
+ next;
};
};
diff --git a/script/korapxml2krill b/script/korapxml2krill
index b61ae04..66af16f 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -7,6 +7,7 @@
use Getopt::Long qw/GetOptions :config no_auto_abbrev/;
use Benchmark qw/:hireswallclock/;
use IO::Compress::Gzip qw/$GzipError/;
+use POSIX qw/ceil/;
use Log::Log4perl;
use Pod::Usage;
use Cache::FastMmap;
@@ -16,9 +17,16 @@
use KorAP::XML::Tokenizer;
use KorAP::XML::Batch::File;
use Parallel::ForkManager;
+use v5.10;
+use Sys::Info;
+use Sys::Info::Constants qw( :device_cpu );
+
+# use KorAP::XML::ForkPool;
# TODO: use Parallel::Loops
# TODO: make output files
+# TODO: Use KorAP::XML::ForkPool!
+
# CHANGES:
# ----------------------------------------------------------
# 2013/11/25
@@ -103,6 +111,7 @@
my (@skip, @sigle, @anno, @input);
my $text;
+
# Parse options from the command line
GetOptions(
'input|i=s' => \@input,
@@ -170,6 +179,13 @@
my $log = Log::Log4perl->get_logger('main');
+
+if ($jobs == -1) {
+ state $cores = Sys::Info->new->device('CPU')->count;
+ $jobs = ceil(5 * $cores);
+ $log->info("Run using $jobs jobs");
+};
+
my %skip;
$skip{lc($_)} = 1 foreach @skip;
@@ -786,6 +802,8 @@
Define the number of concurrent jobs in seperated forks
for archive processing.
Defaults to C<0> (everything runs in a single process).
+Pass -1, and the value will be set automatically to 5
+times the number of available cores.
This is I<experimental>.
=item B<--meta|-m>