Introduced sequential extraction flag to circumvent troubles with parallel extraction

Change-Id: I7fb50e60ff527c7e79b59fac3cf957fdf4b989ac
diff --git a/Changes b/Changes
index 2bc208c..06eeff2 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,6 @@
-0.28 2017-04-11
+0.28 2017-04-12
         - Improved overwriting behaviour for unzip.
+        - Introduced --sequential-extraction flag.
 
 0.27 2017-04-10
         - Support configuration files.
diff --git a/lib/KorAP/XML/Archive.pm b/lib/KorAP/XML/Archive.pm
index c37164e..bbdcfbd 100644
--- a/lib/KorAP/XML/Archive.pm
+++ b/lib/KorAP/XML/Archive.pm
@@ -176,10 +176,10 @@
       system(@$_);
 
       # Check for return code
-      if ($? != 0) {
-        carp("System call '" . join(' ', @$_) . "' errors " . $?);
-        return;
-      };
+      my $code = $?;
+
+      print "Extract" .
+        ($code ? " $code" : '') . " " . join(' ', @$_) . "\n";
     };
   }
 
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 00e9216..1b994c2 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -106,12 +106,13 @@
 # - support configuration option
 # - support for temporary extraction
 #
-# 2017/04/10
+# 2017/04/12
 # - support serial processing
 # - support input root
+# - introduced --sequential-extraction flag
 # ----------------------------------------------------------
 
-our $LAST_CHANGE = '2017/04/07';
+our $LAST_CHANGE = '2017/04/12';
 our $LOCAL = $FindBin::Bin;
 our $VERSION_MSG = <<"VERSION";
 Version $KorAP::XML::Krill::VERSION - diewald\@ids-mannheim.de - $LAST_CHANGE
@@ -154,6 +155,7 @@
   'primary|p!'  => \(my $primary),
   'pretty|y'    => \(my $pretty),
   'jobs|j=i'    => \(my $jobs),
+  'sequential-extraction|se' => \(my $sequential_extraction),
   'cache-size|cs=s'  => \(my $cache_size),
   'cache-delete|cd!' => \(my $cache_delete),
   'cache-init|ci!'   => \(my $cache_init),
@@ -231,6 +233,11 @@
     $cache_init = $config{'cache-init'} ;
   };
 
+  # Jobs for extraction
+  if (!(defined $sequential_extraction) && defined $config{'sequential-extraction'}) {
+    $sequential_extraction = $config{'sequential-extraction'} ;
+  };
+
   # Meta
   if (!(defined $meta) && defined $config{'meta'}) {
     $meta = $config{'meta'} ;
@@ -279,16 +286,17 @@
 
 
 # Set default token base
-$token_base      //= 'OpenNLP#tokens';
-$cache_file      //= 'korapxml2krill.cache';
-$cache_size      //= '50m';
-$jobs            //= 0;
-$cache_delete    //= 1;
-$cache_init      //= 1;
-$log_level       //= 'ERROR';
-$base_sentences  //= '';
-$base_paragraphs //= '';
-$base_pagebreaks //= '';
+$token_base          //= 'OpenNLP#tokens';
+$cache_file          //= 'korapxml2krill.cache';
+$cache_size          //= '50m';
+$jobs                //= 0;
+$cache_delete        //= 1;
+$cache_init          //= 1;
+$sequential_extraction //= 0;
+$log_level           //= 'ERROR';
+$base_sentences      //= '';
+$base_paragraphs     //= '';
+$base_pagebreaks     //= '';
 
 $base_sentences  = lc $base_sentences;
 $base_paragraphs = lc $base_paragraphs;
@@ -524,13 +532,13 @@
 
 sub get_file_name_from_glob ($) {
   my $glob = shift;
-  $glob =~ s/\.zip$//;          # Remove file extension
   $glob =~ s![\\\/]!-!g;        # Transform paths
   $glob =~ s/[\*\?]//g;         # Remove arbitrary fills
   $glob =~ s/[\{\}\[\]]/-/g;    # Remove class and multiple brackets
   $glob =~ s/\-\-+/-/g;         # Remove sequences of binding characters
   $glob =~ s/^-//;              # Clean beginning
   $glob =~ s/-$//;              # Clean end
+  $glob =~ s/\.zip$//;          # Remove file extension
   return $glob;
 };
 
@@ -660,7 +668,7 @@
 
           print '... ' . (
             $archive->extract_doc(
-              $path, $output, $jobs
+              $path, $output, $sequential_extraction ? 1 : $jobs
             ) ? '' : 'not '
           );
           print "extracted.\n";
@@ -736,7 +744,7 @@
       $extract_dir = catdir($extract_dir, random_string('cccccc'));
 
       # Extract to temprary directory
-      if ($archive->extract_all($extract_dir, $jobs)) {
+      if ($archive->extract_all($extract_dir, $sequential_extraction ? 1: $jobs)) {
         @input = ($extract_dir);
       }
       else {
@@ -1104,10 +1112,22 @@
 Define the number of concurrent jobs in seperated forks
 for archive processing.
 Defaults to C<0> (everything runs in a single process).
+
+If C<sequential-extraction> is not set to false, this will
+also apply to extraction.
+
 Pass -1, and the value will be set automatically to 5
 times the number of available cores.
 This is I<experimental>.
 
+=item B<--sequential-extraction|-se>
+
+Flag to indicate, if the C<jobs> value also applies to extraction.
+Some systems may have problems with extracting multiple archives
+to the same folder at the same time.
+Can be flagged using C<--no-sequential-extraction> as well.
+Defaults to C<false>.
+
 =item B<--meta|-m>
 
 Define the metadata parser to use. Defaults to C<I5>.
@@ -1157,8 +1177,11 @@
 Supported parameters are:
 C<overwrite>, C<gzip>, C<jobs>, C<input-base>,
 C<token>, C<log>, C<cache>, C<cache-size>, C<cache-delete>, C<meta>,
-C<output>, C<base-sentences>, C<temp-extract>, C<base-paragraphs>,
-C<base-pagebreaks>, C<skip> (semicolon separated), C<sigle>
+C<output>,
+C<temp-extract>, C<sequential-extraction>,
+C<base-sentences>, C<base-paragraphs>,
+C<base-pagebreaks>,
+C<skip> (semicolon separated), C<sigle>
 (semicolon separated), C<anno> (semicolon separated).
 
 =item B<--temporary-extract|-te>
diff --git a/t/script/extract.t b/t/script/extract.t
index 884cdb5..736647b 100644
--- a/t/script/extract.t
+++ b/t/script/extract.t
@@ -56,7 +56,8 @@
   sub {
     system($call);
   },
-  qr!TEST/BSP/1 $sep extracted.!s,
+  qr!TEST/BSP/1 $sep extracted!s,
+#  qr!TEST/BSP/1 $sep extracted.!s,
   $call
 );
 
@@ -125,7 +126,7 @@
   sub {
     system($call);
   },
-  qr!REI/BNG $sep extracted!s,
+  qr!Extract .+? REI/BNG!s,
   $call
 );
 
@@ -134,7 +135,7 @@
   sub {
     system($call);
   },
-  qr!REI/RBR $sep extracted!s,
+  qr!Extract .+? REI/RBR!s,
   $call
 );
 
@@ -161,7 +162,7 @@
   sub {
     system($call);
   },
-  qr!REI/BN\* $sep extracted!s,
+  qr!Extract .+? REI/BN\*!s,
   $call
 );
 
@@ -203,7 +204,7 @@
   sub {
     system($call);
   },
-  qr!WPD15/A00/00081 $sep extracted.!s,
+  qr!WPD15/A00/00081 $sep extracted!s,
   $call
 );
 
@@ -237,6 +238,7 @@
     system($call);
   },
   qr!TEST/BSP "Example"\/1 $sep extracted!s,
+  # qr!Extract .+? TEST/BSP "Example"\/1!s,
   $call
 );