Support --quiet flag (fixes #15)
Change-Id: I2675131953f1ac455c85862278df9a702ad5d523
diff --git a/Changes b/Changes
index efd5116..684dd95 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+0.52 2023-01-23
+ - Introduced 'quiet' flag.
+
0.51 2023-12-23
- Support ICC meta.
- Fix date handling for years of length < 2.
diff --git a/Readme.pod b/Readme.pod
index 08aac66..a9ab89c 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -342,6 +342,11 @@
The L<Log::Any> log level, defaults to C<ERROR>.
+=item B<--quiet>
+
+Silence all information (non-log) outputs.
+
+
=item B<--help|-h>
Print help information.
@@ -478,8 +483,8 @@
=head1 About KorAP-XML
-KorAP-XML (Banski et al. 2012) is an implementation of the KorAP
-data model (Banski et al. 2013), where text data are stored physically
+KorAP-XML (Bański et al. 2012) is an implementation of the KorAP
+data model (Bański et al. 2013), where text data are stored physically
separated from their interpretations (i.e. annotations).
A text document in KorAP-XML therefore consists of several files
containing primary data, metadata and annotations.
@@ -506,7 +511,7 @@
The C<data.xml> contains the primary data, the C<header.xml> contains
the metadata, and the annotation layers are stored in subfolders
like C<base>, C<struct> or C<corenlp>
-(so-called "foundries"; Banski et al. 2013).
+(so-called "foundries"; Bański et al. 2013).
Metadata is available in the TEI-P5 variant I5
(Lüngen and Sperberg-McQueen 2012). See the documentation in
@@ -567,15 +572,15 @@
=head2 References
-Piotr Banski, Cyril Belica, Helge Krause, Marc Kupietz, Carsten Schnober, Oliver Schonefeld, and Andreas Witt (2011):
+Piotr Bański, Cyril Belica, Helge Krause, Marc Kupietz, Carsten Schnober, Oliver Schonefeld, and Andreas Witt (2011):
KorAP data model: first approximation, December.
-Piotr Banski, Peter M. Fischer, Elena Frick, Erik Ketzan, Marc Kupietz, Carsten Schnober, Oliver Schonefeld and Andreas Witt (2012):
+Piotr Bański, Peter M. Fischer, Elena Frick, Erik Ketzan, Marc Kupietz, Carsten Schnober, Oliver Schonefeld and Andreas Witt (2012):
"The New IDS Corpus Analysis Platform: Challenges and Prospects",
Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC 2012).
L<PDF|http://www.lrec-conf.org/proceedings/lrec2012/pdf/789_Paper.pdf>
-Piotr Banski, Elena Frick, Michael Hanl, Marc Kupietz, Carsten Schnober and Andreas Witt (2013):
+Piotr Bański, Elena Frick, Michael Hanl, Marc Kupietz, Carsten Schnober and Andreas Witt (2013):
"Robust corpus architecture: a new look at virtual collections and data access",
Corpus Linguistics 2013. Abstract Book. Lancaster: UCREL, pp. 23-25.
L<PDF|https://ids-pub.bsz-bw.de/frontdoor/deliver/index/docId/4485/file/Ba%c5%84ski_Frick_Hanl_Robust_corpus_architecture_2013.pdf>
@@ -604,7 +609,7 @@
=head1 COPYRIGHT AND LICENSE
-Copyright (C) 2015-2023, L<IDS Mannheim|https://www.ids-mannheim.de/>
+Copyright (C) 2015-2024, L<IDS Mannheim|https://www.ids-mannheim.de/>
Author: L<Nils Diewald|https://www.nils-diewald.de/>
diff --git a/lib/KorAP/XML/Archive.pm b/lib/KorAP/XML/Archive.pm
index 3606086..a1b0526 100644
--- a/lib/KorAP/XML/Archive.pm
+++ b/lib/KorAP/XML/Archive.pm
@@ -142,7 +142,7 @@
sub extract_all {
my $self = shift;
- my ($target_dir, $jobs) = @_;
+ my ($quiet, $target_dir, $jobs) = @_;
my @init_cmd = (
'unzip', # Use unzip program
@@ -163,12 +163,12 @@
push @cmds, \@cmd;
};
- $self->_extract($jobs, @cmds);
+ $self->_extract($quiet, $jobs, @cmds);
};
sub _extract {
- my ($self, $jobs, @cmds) = @_;
+ my ($self, $quiet, $jobs, @cmds) = @_;
# Only single call
if (!$jobs || $jobs == 1) {
@@ -179,8 +179,10 @@
# Check for return code
my $code = $?;
- print "Extract" .
- ($code ? " $code" : '') . " " . join(' ', @$_) . "\n";
+ unless ($quiet) {
+ print "Extract" .
+ ($code ? " $code" : '') . " " . join(' ', @$_) . "\n";
+ };
};
}
@@ -191,8 +193,10 @@
sub {
my ($pid, $code) = @_;
my $data = pop;
- print "Extract [\$$pid] " .
- ($code ? " $code" : '') . " $$data\n";
+ unless ($quiet) {
+ print "Extract [\$$pid] " .
+ ($code ? " $code" : '') . " $$data\n";
+ };
}
);
@@ -215,7 +219,7 @@
# Extract from sigle
sub extract_sigle {
- my ($self, $sigle, $target_dir, $jobs) = @_;
+ my ($self, $quiet, $sigle, $target_dir, $jobs) = @_;
my @cmds = $self->cmds_from_sigle($sigle);
@cmds = map {
@@ -223,7 +227,7 @@
$_;
} @cmds;
- return $self->_extract($jobs, @cmds);
+ return $self->_extract($quiet, $jobs, @cmds);
};
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index 7f5e904..06dd102 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
-our $VERSION = '0.51';
+our $VERSION = '0.52';
has 'path';
has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 4185020..51352e8 100755
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -216,6 +216,7 @@
'primary|p!' => sub {
warn 'Primary flag no longer supported!';
},
+ 'quiet' => \($cfg{quiet}),
'pretty|y' => sub {
warn 'Pretty flag no longer supported!';
},
@@ -266,7 +267,7 @@
gzip to-tar log lang cache non-word-tokens
non-verbal-tokens sequential-extraction
temporary-extract cache-init
- koral extract-dir jobs!) {
+ koral extract-dir jobs quiet!) {
my $underlined = $_ =~ tr/-/_/r;
if (!defined($cfg{$underlined}) && defined $config{$_}) {
$cfg{$underlined} = $config{$_};
@@ -303,6 +304,7 @@
my $base_paragraphs = lc($cfg{base_paragraphs} // '');
my $base_pagebreaks = lc($cfg{base_pagebreaks} // '');
my $sequential_extraction = $cfg{sequential_extraction} // 0;
+my $q = !!($cfg{quiet}) // 0;
# Get tokenization basis
my ($token_base_foundry, $token_base_layer) = split(/#/, $token_base) if $token_base;
@@ -395,8 +397,10 @@
# Create archive command
my @archive_cmd = ($^X, $0, 'archive', @keep_argv, '-i', $_, '-o', $new_out);
- print "Start serial processing of $_ to $new_out\n";
- print 'Command: ', join(' ', @archive_cmd), "\n";
+ unless ($q) {
+ print "Start serial processing of $_ to $new_out\n";
+ print 'Command: ', join(' ', @archive_cmd), "\n";
+ };
# Start archiving
system @archive_cmd;
@@ -608,7 +612,7 @@
# Sort files by length
@input = sort { length($a) <=> length($b) } @new_input;
- print 'Input is ' . join(', ', @input)."\n";
+ print 'Input is ' . join(', ', @input)."\n" unless $q;
};
@@ -669,17 +673,21 @@
# Iterate over all given sigles and extract
foreach (@sigle) {
- print "$_ ...\n";
+ unless ($q) {
+ print "$_ ...\n";
- # TODO: Make this OS independent
- print '... ' . (
+ # TODO: Make this OS independent
+ print '... ' . (
- # TODO:
- # - prefix???
- $archive->extract_sigle([$_], $output, $jobs)
- ? '' : 'not '
- );
- print "extracted.\n";
+ # TODO:
+ # - prefix???
+ $archive->extract_sigle(0, [$_], $output, $jobs)
+ ? '' : 'not '
+ );
+ print "extracted.\n";
+ } else {
+ $archive->extract_sigle(1, [$_], $output, $jobs);
+ }
};
}
@@ -714,15 +722,15 @@
# Create a temporary directory
if ($extract_dir eq ':temp:') {
$extract_dir = tempdir(CLEANUP => 0);
- print "Temporarily extract to $extract_dir\n";
+ print "Temporarily extract to $extract_dir\n" unless $q;
};
# Add some random extra to avoid clashes with multiple archives
$extract_dir = catdir($extract_dir, random_string('cccccc'));
# Extract to temporary directory
- if ($archive->extract_all($extract_dir, $sequential_extraction ? 1: $jobs)) {
- print "Extract sequentially to $extract_dir\n";
+ if ($archive->extract_all($q, $extract_dir, $sequential_extraction ? 1: $jobs)) {
+ print "Extract sequentially to $extract_dir\n" unless $q;
@input = ($extract_dir);
}
else {
@@ -759,7 +767,7 @@
};
# Initiate the tar file
- print "Writing to file $tar_file\n";
+ print "Writing to file $tar_file\n" unless $q;
$tar_fh = IO::File->new($tar_file, 'w');
$tar_fh->binmode(1);
@@ -790,10 +798,13 @@
my ($pid, $code) = @_;
my $data = pop;
- print 'Convert ['. ($jobs > 0 ? "\$$pid:" : '') .
- ($iter++) . "/$count]" .
- ($code ? " $code" : '') .
- ' ' . $data->[0] . "\n";
+ unless ($q) {
+ print 'Convert ['. ($jobs > 0 ? "\$$pid:" : '') .
+ $iter . "/$count]" .
+ ($code ? " $code" : '') .
+ ' ' . $data->[0] . "\n";
+ };
+ $iter++;
if (!$code && $to_tar && $data->[2]) {
my $filename = $data->[2];
@@ -821,7 +832,7 @@
my $t;
my $temp;
- print "Reading data ...\n";
+ print "Reading data ...\n" unless $q;
# unless (Cache::FastMmap->new(
# share_file => $cache_file,
@@ -850,7 +861,7 @@
last unless $it->next;
};
- print "Start processing ...\n";
+ print "Start processing ...\n" unless $q;
$t = Benchmark->new;
$count = scalar @dirs;
@@ -895,7 +906,7 @@
# Get sigles to extract
my $prefix = set_sigle($archive);
- print "Start processing ...\n";
+ print "Start processing ...\n" unless $q;
$t = Benchmark->new;
my @dirs = $archive->list_texts;
$count = scalar @dirs;
@@ -925,7 +936,7 @@
# because extraction can be horrible slow!
# Extract from archive
- if ($archive->extract_sigle([join('/', $corpus, $doc, $text)], $temp, $sequential_extraction ? 1 : $jobs)) {
+ if ($archive->extract_sigle($q, [join('/', $corpus, $doc, $text)], $temp, $sequential_extraction ? 1 : $jobs)) {
# Create corpus directory
my $input = catdir("$temp", $corpus);
@@ -960,7 +971,7 @@
}
else {
- print "Input is neither a directory nor an archive.\n\n";
+ print "Input is neither a directory nor an archive.\n\n" unless $q;
};
$pool->wait_all_children;
@@ -972,11 +983,12 @@
if ($to_tar && $tar_fh) {
$tar_archive->finish;
$tar_fh->close;
- print "Wrote to tar archive.\n";
+ print "Wrote to tar archive.\n" unless $q;
};
-
- print timestr(timediff(Benchmark->new, $t))."\n";
- print "Done.\n";
+ unless ($q) {
+ print timestr(timediff(Benchmark->new, $t))."\n";
+ print "Done.\n";
+ };
};
@@ -1016,23 +1028,28 @@
# Sigle is a doc sigle
if ($_ =~ m!^(?:\.[/\\])?[^/\\]+?[/\\][^/\\]+?$!) {
- print "$_ ...";
+ print "$_ ..." unless $q;
# Check if a prefix is needed
unless ($prefix_check) {
- if ($prefix = $archive->check_prefix) {
+ if ($prefix = $archive->check_prefix && !$q) {
print " with prefix ...";
};
$prefix_check = 1;
};
- print "\n";
+ unless ($q) {
+ print "\n";
- print '... ' . (
- $archive->extract_sigle([$_], $output, $sequential_extraction ? 1 : $jobs)
+ print '... ' . (
+ $archive->extract_sigle($q, [$_], $output, $sequential_extraction ? 1 : $jobs)
? '' : 'not '
- );
- print "extracted.\n";
+ );
+ print "extracted.\n";
+ }
+ else {
+ $archive->extract_sigle($q, [$_], $output, $sequential_extraction ? 1 : $jobs)
+ };
}
# Sigle is a text sigle
@@ -1041,7 +1058,7 @@
unless ($prefix_check) {
- if ($prefix = $archive->check_prefix) {
+ if ($prefix = $archive->check_prefix && !$q) {
print " with prefix ...";
};
$prefix_check = 1;
@@ -1410,6 +1427,11 @@
The L<Log::Any> log level, defaults to C<ERROR>.
+=item B<--quiet>
+
+Silence all information (non-log) outputs.
+
+
=item B<--help|-h>
Print help information.
@@ -1672,7 +1694,7 @@
=head1 COPYRIGHT AND LICENSE
-Copyright (C) 2015-2023, L<IDS Mannheim|https://www.ids-mannheim.de/>
+Copyright (C) 2015-2024, L<IDS Mannheim|https://www.ids-mannheim.de/>
Author: L<Nils Diewald|https://www.nils-diewald.de/>
diff --git a/t/annotation/mdp_dependency.t b/t/annotation/mdp_dependency.t
index f7f29a2..d5bdd97 100644
--- a/t/annotation/mdp_dependency.t
+++ b/t/annotation/mdp_dependency.t
@@ -33,7 +33,7 @@
my $dir = tempdir();
my $f_path = 'WPD15/A00/00081';
-$archive->extract_sigle([$f_path], $dir);
+$archive->extract_sigle(0, [$f_path], $dir);
ok(my $doc = KorAP::XML::Krill->new( path => $dir . '/' . $f_path));
diff --git a/t/archive.t b/t/archive.t
index 71bedce..77026d7 100644
--- a/t/archive.t
+++ b/t/archive.t
@@ -6,6 +6,7 @@
use File::Basename 'dirname';
use File::Spec::Functions qw/catfile catdir/;
use File::Temp qw/tempdir/;
+use Test::Output qw/:stdout :stderr :functions/;
use KorAP::XML::Archive;
@@ -36,7 +37,12 @@
{
local $SIG{__WARN__} = sub {};
- ok($archive->extract_sigle(['TEST/BSP/8'], $dir), 'Wrong path');
+ my $stdout = stdout_from(
+ sub {
+ ok($archive->extract_sigle(0, ['TEST/BSP/8'], $dir), 'Wrong path');
+ }
+ );
+ like($stdout, qr!Extract unzip!);
};
ok(-d catdir($dir, 'TEST'), 'Test corpus directory exists');
@@ -64,9 +70,15 @@
{
local $SIG{__WARN__} = sub {};
- ok($archive->extract_sigle(['REI/RB*', 'REI/BNG/00071'], $dir), 'Fine');
+ my $stdout = stdout_from(
+ sub {
+ ok($archive->extract_sigle(1, ['REI/RB*', 'REI/BNG/00071'], $dir), 'Fine');
+ }
+ );
+ is($stdout, '');
};
+
ok(-d catdir($dir, 'REI'), 'Test corpus directory exists');
ok(-d catdir($dir, 'REI','BNG'), 'Test corpus directory exists');
ok(-d catdir($dir, 'REI','BNG','00071'), 'Test corpus directory exists');
diff --git a/t/multiple_archives.t b/t/multiple_archives.t
index 7e2a0e7..e57bf1e 100644
--- a/t/multiple_archives.t
+++ b/t/multiple_archives.t
@@ -60,7 +60,7 @@
my $dir = tempdir(CLEANUP => 1);
{
local $SIG{__WARN__} = sub {};
- ok($archive->extract_sigle([$list[0]], $dir), 'Wrong path');
+ ok($archive->extract_sigle(0, [$list[0]], $dir), 'Wrong path');
};
ok(-d catdir($dir, 'WPD15'), 'Test corpus directory exists');
diff --git a/t/script/archive.t b/t/script/archive.t
index 1bb4f92..0f57183 100644
--- a/t/script/archive.t
+++ b/t/script/archive.t
@@ -238,6 +238,30 @@
ok(-f $json, 'Json file exists');
+# Test quiet
+
+# my $input = catfile($f, '..', 'corpus', 'archive.zip');
+# ok(-f $input, 'Input archive found');
+
+$call = join(
+ ' ',
+ 'perl', $script,
+ 'archive',
+ '--input' => '' . $input,
+ '--quiet',
+ '--output' => $output,
+ '--sigle' => 'TEST/BSP/2',
+ '--sigle' => 'TEST/BSP/5',
+ '-t' => 'Base#tokens_aggr',
+ '-m' => 'Sgbr'
+);
+
+{
+ local $SIG{__WARN__} = sub {};
+ my $out = stdout_from(sub { system($call); });
+
+ is($out, "\n", $call);
+};
done_testing;
__END__