Fixed tempdir issue in script
Change-Id: I8421bd0c83629350ef4d6efce8fbb9fce22fab4e
diff --git a/script/korapxml2krill_dir b/script/korapxml2krill_dir
index 5b09566..6293a7d 100644
--- a/script/korapxml2krill_dir
+++ b/script/korapxml2krill_dir
@@ -24,6 +24,11 @@
#
# 2016/02/14
# - Added version information
+# - Added support for archive files
+#
+# 2016/02/15
+# - Fixed temporary directory bug
+# - Improved skipping before unzipping
sub printversion {
print "Version " . $KorAP::XML::Krill::VERSION . "\n\n";
@@ -63,7 +68,7 @@
--help|-h Print this document (optional)
--version|-v Print version information
-diewald@ids-mannheim.de, 2016/02/14
+diewald@ids-mannheim.de, 2016/02/15
EOHELP
@@ -90,14 +95,18 @@
printhelp(1) if !$input || !$output;
+sub get_file_name {
+ my $file = shift;
+ $file =~ s/^?\/?$input//;
+ $file =~ tr/\//-/;
+ $file =~ s{^-+}{};
+ return $file;
+};
# write file
sub write_file {
my $anno = shift;
- my $file = $anno;
- $file =~ s/^?\/?$input//;
- $file =~ tr/\//-/;
- $file =~ s{^-+}{};
+ my $file = get_file_name($anno);
my $call = 'perl ' . $local . '/korapxml2krill -i ' . $anno . ' -o ' . $output . '/' . $file . '.json';
$call .= '.gz -z' if $gzip;
@@ -154,14 +163,26 @@
# Split path information
my ($prefix, $corpus, $doc, $text) = $archive->split_path($dirs[$i]);
+ unless ($overwrite) {
+
+ my $filename = catfile(
+ $output,
+ get_file_name(catdir($doc, $text)) . '.json' . ($gzip ? '.gz' : '')
+ );
+ if (-e $filename) {
+ print "Skip $filename\n";
+ next;
+ };
+ };
+
# Create temporary file
- my $temp = tempdir(CLEANUP => 1);
+ my $temp = File::Temp->newdir;
# Extract from archive
if ($archive->extract($dirs[$i], $temp)) {
# Create corpus directory
- $input = catdir($temp, $corpus);
+ $input = catdir("$temp", $corpus);
# Temporary directory
my $dir = catdir($input, $doc, $text);
@@ -173,7 +194,7 @@
print "Unable to extract " . $dirs[$i] . "\n";
};
- $temp = 0;
+ $temp = undef;
};
}