Finished tar flag
Change-Id: I54f6fddcc8392c51eab59e0e84a60fe2455bccd4
diff --git a/Changes b/Changes
index 06eeff2..28b58c3 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.29 2017-04-23
+ - support --to-tar flag.
+
0.28 2017-04-12
- Improved overwriting behaviour for unzip.
- Introduced --sequential-extraction flag.
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index e94baaa..ddc859d 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
use Data::Dumper;
use File::Spec::Functions qw/catdir catfile catpath splitdir splitpath rel2abs/;
-our $VERSION = '0.28';
+our $VERSION = '0.29';
has 'path';
has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/script/korapxml2krill b/script/korapxml2krill
index a6aa95f..7a7b8f7 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -28,6 +28,7 @@
use String::Random qw(random_string);
use IO::File;
use Archive::Tar::Builder;
+use Fcntl qw(:flock SEEK_END);
# use KorAP::XML::ForkPool;
# TODO: use Parallel::Loops
@@ -780,6 +781,34 @@
my $count = 0; # Texts to process
my $iter = 1; # Current text in process
+ my $tar_archive;
+ my $output_dir = $output;
+ my $tar_fh;
+
+ # Initialize tar archive
+ if ($to_tar) {
+ $tar_archive = Archive::Tar::Builder->new(
+ ignore_errors => 1
+ );
+
+ # Set output name
+ my $tar_file = $output;
+ unless ($tar_file =~ /\.tar$/) {
+ $tar_file .= '.tar';
+ };
+
+ # Initiate the tar file
+ print "Writing to file $tar_file\n";
+ $tar_fh = IO::File->new($tar_file, 'w');
+ $tar_fh->binmode(1);
+
+ # Set handle
+ $tar_archive->set_handle($tar_fh);
+
+ # Output to temporary directory
+ $output_dir = File::Temp->newdir;
+ };
+
# Report on fork message
$pool->run_on_finish (
sub {
@@ -790,6 +819,25 @@
($iter++) . "/$count]" .
($code ? " $code" : '') .
' ' . $data->[0] . "\n";
+
+ if (!$code && $to_tar && $data->[2]) {
+ my $filename = $data->[2];
+
+ # Lock filehandle
+ if (flock($tar_fh, LOCK_EX)) {
+
+ # Archive and remove file
+ $tar_archive->archive($filename);
+ unlink $filename;
+
+ # Unlock filehandle
+ flock($tar_fh, LOCK_UN);
+ }
+ else {
+ $log->warn("Unable to add $filename to archive");
+ };
+ };
+
$data->[1] = undef if $data->[1];
}
);
@@ -807,33 +855,6 @@
# exit(1);
# };
- my $tar_archive;
- my $output_dir = $output;
-
- # Initialize tar archive
- if ($to_tar) {
- $tar_archive = Archive::Tar::Builder->new(
- ignore_errors => 1
- );
-
- # Set output name
- my $tar_file = $output;
- unless ($tar_file =~ /\.tar$/) {
- $tar_file .= '.tar';
- };
-
- # Initiate the tar file
- print "Writing to file $tar_file\n";
- my $fh = IO::File->new($tar_file, 'w');
- $fh->binmode(1);
-
- # Set handle
- $tar_archive->set_handle($fh);
-
- # Output to temporary directory
- $output_dir = File::Temp->newdir;
- };
-
# Input is a directory
if (-d $input[0]) {
@@ -866,16 +887,13 @@
$pool->start and next DIRECTORY_LOOP;
if (my $return = $batch_file->process($dirs[$i] => $filename)) {
-
- # Add to tar archive
- if ($to_tar) {
- $tar_archive->archive($filename);
- unlink $filename;
- };
-
$pool->finish(
0,
- ["Processed " . $filename . ($return == -1 ? " - already existing" : '')]
+ [
+ "Processed " . $filename . ($return == -1 ? " - already existing" : ''),
+ undef,
+ $filename
+ ]
);
}
else {
@@ -935,16 +953,14 @@
# Write file
if (my $return = $batch_file->process($dir => $filename)) {
- # Add to tar archive
- if ($to_tar) {
- $tar_archive->archive($filename);
- unlink $filename;
- };
-
# Delete temporary file
$pool->finish(
0,
- ["Processed " . $filename . ($return == -1 ? " - already existing" : ''), $temp]
+ [
+ "Processed " . $filename . ($return == -1 ? " - already existing" : ''),
+ $temp,
+ $filename
+ ]
);
#$pool->finish(0, ["Processed " . $filename, $temp]);
}
@@ -970,6 +986,13 @@
# Delete cache file
unlink($cache_file) if $cache_delete;
+ # Close tar filehandle
+ if ($to_tar && $tar_fh) {
+ $tar_archive->finish;
+ $tar_fh->close;
+ print "Wrote to tar archive.\n";
+ };
+
print timestr(timediff(Benchmark->new, $t))."\n";
print "Done.\n";
};
diff --git a/t/script/archive_tar.t b/t/script/archive_tar.t
index aee6542..666e2c5 100644
--- a/t/script/archive_tar.t
+++ b/t/script/archive_tar.t
@@ -58,9 +58,10 @@
# Test without parameters
my $combined = combined_from( sub { system($call) });
-diag $combined;
+like($combined, qr!Input is .+?wpd15-single\.zip,.+?wpd15-single\.malt\.zip,.+?wpd15-single\.corenlp\.zip,.+?wpd15-single\.opennlp\.zip,.+?wpd15-single\.mdparser\.zip,.+?wpd15-single\.tree_tagger\.zip!is, 'Input is fine');
-#qr!Input is .+?wpd15-single\.zip,.+?wpd15-single\.malt\.zip,.+?wpd15-single\.corenlp\.zip,.+?wpd15-single\.opennlp\.zip,.+?wpd15-single\.mdparser\.zip,.+?wpd15-single\.tree_tagger\.zip!is,
+like($combined, qr!Writing to file .+?\.tar!, 'Write out');
+like($combined, qr!Wrote to tar archive!, 'Write out');