Fixed archive handling and support multiple jobs for extraction
Change-Id: I656cb0aa31c7139bf30b223928725ded195254a1
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 1a418af..cedd84e 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -75,6 +75,7 @@
#
# 1016/10/27
# - Added wildcard support for document extraction
+#
# ----------------------------------------------------------
our $LAST_CHANGE = '2016/10/27';
@@ -322,7 +323,7 @@
};
# Add further annotation archived
- $archive->attach($_) foreach @input;
+ $archive->attach($_) foreach @input[1..$#input];
my $prefix = 1;
@@ -351,8 +352,8 @@
# Sigle is a doc sigle
if ($_ =~ m!^(?:\.[/\\])?[^/\\]+?[/\\][^/\\]+?$!) {
- print "$_ ";
+ print "$_ ...\n";
# Check if a prefix is needed
unless ($prefix_check) {
$prefix = $archive->check_prefix;
@@ -360,9 +361,11 @@
};
# TODO: Make this OS independent
- print '' . (
+ my $path = ($prefix ? './' : '') . $_;
+
+ print '... ' . (
$archive->extract_doc(
- ($prefix ? './' : '') . $_, $output
+ $path, $output, $jobs
) ? '' : 'not '
);
print "extracted.\n";
@@ -376,10 +379,10 @@
# Iterate over all given sigles and extract
foreach (@sigle) {
- print "$_ ";
+ print "$_ ...\n";
# TODO: Make this OS independent
- print '' . (
+ print '... ' . (
$archive->extract_text(
($prefix ? './' : '') . $_, $output
) ? '' : 'not '
@@ -483,7 +486,7 @@
};
# Add further annotation archived
- $archive->attach($_) foreach @input;
+ $archive->attach($_) foreach @input[1..$#input];
print "Start processing ...\n";
$t = Benchmark->new;