Improved tar support

Change-Id: I318b6f18e571c81a34752911bc9d009d726c7d14
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 226c35a..a6aa95f 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -157,7 +157,7 @@
   'primary|p!'  => \(my $primary),
   'pretty|y'    => \(my $pretty),
   'jobs|j=i'    => \(my $jobs),
-  'to-tar=s'       => \(my $to_tar),
+  'to-tar'      => \(my $to_tar),
   'sequential-extraction|se' => \(my $sequential_extraction),
   'cache-size|cs=s'  => \(my $cache_size),
   'cache-delete|cd!' => \(my $cache_delete),
@@ -350,7 +350,7 @@
 # Start serial processing
 if ($cmd eq 'serial') {
 
-  if ($output && (!-e $output || !-d $output)) {
+  if ($output && (!defined($to_tar)) && (!-e $output || !-d $output)) {
     print "Directory '$output' does not exist.\n\n";
     exit(0);
   };
@@ -383,7 +383,7 @@
     # This will create a directory
     my $new_out = catdir($output, get_file_name_from_glob($_));
 
-    # Create new path
+    # Create new path, in case the output is not meant to be tarred
     unless ($to_tar) {
       if (make_path($new_out) == 0 && !-d $new_out) {
         $log->error("Can\'t create path $new_out");
@@ -558,7 +558,7 @@
 s!^\s*([^_]+?)_([^\.]+?)\.(.+?)\s*$!$1/$2/$3! foreach @sigle;
 
 if ($cmd) {
-  if ($output && (!-e $output || !-d $output)) {
+  if ($output && (!defined($to_tar)) && (!-e $output || !-d $output)) {
     print "Directory '$output' does not exist.\n\n";
     exit(0);
   };
@@ -807,6 +807,34 @@
   #    exit(1);
   #  };
 
+  my $tar_archive;
+  my $output_dir = $output;
+
+  # Initialize tar archive
+  if ($to_tar) {
+    $tar_archive = Archive::Tar::Builder->new(
+      ignore_errors => 1
+    );
+
+    # Set output name
+    my $tar_file = $output;
+    unless ($tar_file =~ /\.tar$/) {
+      $tar_file .= '.tar';
+    };
+
+    # Initiate the tar file
+    print "Writing to file $tar_file\n";
+    my $fh = IO::File->new($tar_file, 'w');
+    $fh->binmode(1);
+
+    # Set handle
+    $tar_archive->set_handle($fh);
+
+    # Output to temporary directory
+    $output_dir = File::Temp->newdir;
+  };
+
+
   # Input is a directory
   if (-d $input[0]) {
     my $it = Directory::Iterator->new($input[0]);
@@ -826,26 +854,6 @@
     $t = Benchmark->new;
     $count = scalar @dirs;
 
-    my $tar_archive;
-    my $output_dir = $output;
-    if ($to_tar) {
-      $tar_archive = Archive::Tar::Builder->new(
-        ignore_errors => 1
-      );
-
-      # Set output name
-      my $tar_file = $output;
-      unless ($tar_file =~ /\.tar$/) {
-        $tar_file .= '.tar';
-      };
-      my $fh = IO::File->new($tar_file, 'w');
-      $fh->binmode(1);
-
-      # Set handle
-      $tar_archive->set_handle($fh);
-      $output_dir = File::Temp->newdir;
-    };
-
   DIRECTORY_LOOP:
     for (my $i = 0; $i < $count; $i++) {
 
@@ -858,16 +866,17 @@
       $pool->start and next DIRECTORY_LOOP;
 
       if (my $return = $batch_file->process($dirs[$i] => $filename)) {
-        $pool->finish(
-          0,
-          ["Processed " . $filename . ($return == -1 ? " - already existing" : '')]
-        );
 
         # Add to tar archive
         if ($to_tar) {
           $tar_archive->archive($filename);
           unlink $filename;
         };
+
+        $pool->finish(
+          0,
+          ["Processed " . $filename . ($return == -1 ? " - already existing" : '')]
+        );
       }
       else {
         $pool->finish(1, ["Unable to process " . $dirs[$i]]);
@@ -898,7 +907,7 @@
       my ($prefix, $corpus, $doc, $text) = $archive->split_path($dirs[$i]);
 
       my $filename = catfile(
-        $output,
+        $output_dir,
         get_file_name(
           catfile($corpus, $doc, $text)
             . '.json' . ($gzip ? '.gz' : '')
@@ -925,6 +934,13 @@
 
         # Write file
         if (my $return = $batch_file->process($dir => $filename)) {
+
+          # Add to tar archive
+          if ($to_tar) {
+            $tar_archive->archive($filename);
+            unlink $filename;
+          };
+
           # Delete temporary file
           $pool->finish(
             0,