Ignore temporary-extraction on directory archiving
Change-Id: I4fb13c5b80f76d91837a3b9fa97eed6b9de162d6
diff --git a/Changes b/Changes
index 64d6a84..bd13616 100644
--- a/Changes
+++ b/Changes
@@ -1,9 +1,11 @@
-0.37 2019-02-07
+0.37 2019-02-13
- Support for 'koral:field' array.
- Support for Koral versioning.
- Added tests for english sources.
- Added support for external links for
Wikipedia resources.
+ - Ignore temporary extraction
+ on directory archiving.
0.36 2019-01-22
- Support for non-word tokens (fixes #5).
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 669c0d3..3530288 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -133,9 +133,11 @@
# 2019/01/22
# - Support for non-word tokens.
#
-# 2019/02/07
+# 2019/02/13
# - Support for 'koral:field' array.
# - Support for Koral versioning.
+# - Ignore temporary extract parameter on
+# directory archiving.
# ----------------------------------------------------------
our $LAST_CHANGE = '2019/02/07';
@@ -786,7 +788,7 @@
my $archive_output;
# First extract, then archive
- if (defined $extract_dir) {
+ if (defined $extract_dir && !-d $input[0]) {
# Create new archive object
if (-f($input[0]) && (my $archive = KorAP::XML::Archive->new($input[0]))) {
diff --git a/t/script/archive.t b/t/script/archive.t
index db4f856..8d0fba1 100644
--- a/t/script/archive.t
+++ b/t/script/archive.t
@@ -9,7 +9,7 @@
use Mojo::JSON qw/decode_json/;
use IO::Uncompress::Gunzip;
use Test::More;
-use Test::Output qw/:stdout :stderr :functions/;
+use Test::Output qw/:stdout :stderr :combined :functions/;
use Data::Dumper;
use KorAP::XML::Archive;
use utf8;
@@ -44,7 +44,6 @@
my $cache = tmpnam();
-
ok(-d $output, 'Output directory exists');
$call = join(
@@ -136,6 +135,31 @@
ok(-d $output, 'Ouput directory exists');
+my $temp_extract = tmpnam();
+
+# Ignore -te when archive is a directory
+$call = join(
+ ' ',
+ 'perl', $script,
+ 'archive',
+ '--input' => $input,
+ '--output' => $output,
+ '--cache' => $cache,
+ '-t' => 'Tree_Tagger#Tokens',
+ '-j' => 4, # 4 jobs!
+ '-te' => $temp_extract
+);
+
+{
+ local $SIG{__WARN__} = sub {};
+
+ my $out = combined_from(sub { system($call); });
+
+ ok($out =~ m!Processed .+?\/corpus-doc-0001\.json!s, $call);
+ ok($out =~ m!Processed .+?\/corpus-doc-0002\.json!s, $call);
+};
+
+
$input = catfile($f, '..', 'corpus', 'WDD15', 'A79', '83946');
$call = join(
' ',