Fixed tempdir issue in script

Change-Id: I8421bd0c83629350ef4d6efce8fbb9fce22fab4e
diff --git a/Changes b/Changes
index 8a28f26..e0477b1 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,7 @@
+0.09 2016-02-15
+        - Fixed temporary directory handling in scripts.
+	- Improved skipping for archive handling in scripts.
+
 0.08 2016-02-14
         - Added support for archive streaming.
 	- Improved scripts.
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index decae35..c11588c 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -17,7 +17,7 @@
 #       Due to the kind of processing, processed metadata may be stored in
 #       a multiprocess cache instead.
 
-our $VERSION = '0.08';
+our $VERSION = '0.09';
 
 our @ATTR = qw/text_sigle
 	       doc_sigle
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 9e2d1e8..6443c8a 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -40,9 +40,9 @@
   --output|-o <filename>          Document name for output (optional),
                                   Writes to <STDOUT> by default
   --overwrite|-w                  Overwrite files that already exist
-  --token|-t <foundry>[#<layer>]  Define the default tokenization by specifying
+  --token|-t <foundry>[#<file>]   Define the default tokenization by specifying
                                   the name of the foundry and optionally the name
-                                  of the layer. Defaults to OpenNLP#tokens.
+                                  of the layer-file. Defaults to OpenNLP#tokens.
   --skip|-s <foundry>[#<layer>]   Skip specific foundries by specifying the name
                                   or specific layers by defining the name
                                   with a # in front of the foundry,
@@ -61,7 +61,7 @@
   --help|-h                       Print this document (optional)
   --version|-v                    Print version information
 
-diewald@ids-mannheim.de, 2016/02/14
+diewald@ids-mannheim.de, 2016/02/15
 
 EOHELP
   exit(defined $_[0] ? $_[0] : 0);
diff --git a/script/korapxml2krill_dir b/script/korapxml2krill_dir
index 5b09566..6293a7d 100644
--- a/script/korapxml2krill_dir
+++ b/script/korapxml2krill_dir
@@ -24,6 +24,11 @@
 #
 # 2016/02/14
 # - Added version information
+# - Added support for archive files
+#
+# 2016/02/15
+# - Fixed temporary directory bug
+# - Improved skipping before unzipping
 
 sub printversion {
   print "Version " . $KorAP::XML::Krill::VERSION . "\n\n";
@@ -63,7 +68,7 @@
   --help|-h                       Print this document (optional)
   --version|-v                    Print version information
 
-diewald@ids-mannheim.de, 2016/02/14
+diewald@ids-mannheim.de, 2016/02/15
 
 EOHELP
 
@@ -90,14 +95,18 @@
 
 printhelp(1) if !$input || !$output;
 
+sub get_file_name {
+  my $file = shift;
+  $file =~ s/^?\/?$input//;
+  $file =~ tr/\//-/;
+  $file =~ s{^-+}{};
+  return $file;
+};
 
 # write file
 sub write_file {
   my $anno = shift;
-  my $file = $anno;
-  $file =~ s/^?\/?$input//;
-  $file =~ tr/\//-/;
-  $file =~ s{^-+}{};
+  my $file = get_file_name($anno);
 
   my $call = 'perl ' . $local . '/korapxml2krill -i ' . $anno . ' -o ' . $output . '/' . $file . '.json';
   $call .= '.gz -z' if $gzip;
@@ -154,14 +163,26 @@
     # Split path information
     my ($prefix, $corpus, $doc, $text) = $archive->split_path($dirs[$i]);
 
+    unless ($overwrite) {
+
+      my $filename = catfile(
+	$output,
+	get_file_name(catdir($doc, $text)) . '.json' . ($gzip ? '.gz' : '')
+      );
+      if (-e $filename) {
+	print "Skip $filename\n";
+	next;
+      };
+    };
+
     # Create temporary file
-    my $temp = tempdir(CLEANUP => 1);
+    my $temp = File::Temp->newdir;
 
     # Extract from archive
     if ($archive->extract($dirs[$i], $temp)) {
 
       # Create corpus directory
-      $input = catdir($temp, $corpus);
+      $input = catdir("$temp", $corpus);
 
       # Temporary directory
       my $dir = catdir($input, $doc, $text);
@@ -173,7 +194,7 @@
       print "Unable to extract " . $dirs[$i] . "\n";
     };
 
-    $temp = 0;
+    $temp = undef;
   };
 }