Support --quiet flag (fixes #15) Change-Id: I2675131953f1ac455c85862278df9a702ad5d523

commit: a351837ef17c26ce1ade9e503a1617c624bc41b4 [log] [tgz]
author: Akron <nils@diewald-online.de> Mon Jan 22 23:29:00 2024 +0100
committer: Akron <nils@diewald-online.de> Tue Jan 23 08:47:38 2024 +0100
tree: a4a1be4faa6d8d2c212b900cd0a8535b754556ab
parent: 8ce23f791247643f9e2315bf1d046bed77eb8c48 [diff]
diff --git a/Changes b/Changes
index efd5116..684dd95 100644
--- a/Changes
+++ b/Changes

@@ -1,3 +1,6 @@
+0.52 2023-01-23
+        - Introduced 'quiet' flag.
+
 0.51 2023-12-23
         - Support ICC meta.
         - Fix date handling for years of length < 2.

diff --git a/Readme.pod b/Readme.pod
index 08aac66..a9ab89c 100644
--- a/Readme.pod
+++ b/Readme.pod

@@ -342,6 +342,11 @@
 The L<Log::Any> log level, defaults to C<ERROR>.
 
 
+=item B<--quiet>
+
+Silence all information (non-log) outputs.
+
+
 =item B<--help|-h>
 
 Print help information.
@@ -478,8 +483,8 @@
 
 =head1 About KorAP-XML
 
-KorAP-XML (Banski et al. 2012) is an implementation of the KorAP
-data model (Banski et al. 2013), where text data are stored physically
+KorAP-XML (Bański et al. 2012) is an implementation of the KorAP
+data model (Bański et al. 2013), where text data are stored physically
 separated from their interpretations (i.e. annotations).
 A text document in KorAP-XML therefore consists of several files
 containing primary data, metadata and annotations.
@@ -506,7 +511,7 @@
 The C<data.xml> contains the primary data, the C<header.xml> contains
 the metadata, and the annotation layers are stored in subfolders
 like C<base>, C<struct> or C<corenlp>
-(so-called "foundries"; Banski et al. 2013).
+(so-called "foundries"; Bański et al. 2013).
 
 Metadata is available in the TEI-P5 variant I5
 (Lüngen and Sperberg-McQueen 2012). See the documentation in
@@ -567,15 +572,15 @@
 
 =head2 References
 
-Piotr Banski, Cyril Belica, Helge Krause, Marc Kupietz, Carsten Schnober, Oliver Schonefeld, and Andreas Witt (2011):
+Piotr Bański, Cyril Belica, Helge Krause, Marc Kupietz, Carsten Schnober, Oliver Schonefeld, and Andreas Witt (2011):
 KorAP data model: first approximation, December.
 
-Piotr Banski, Peter M. Fischer, Elena Frick, Erik Ketzan, Marc Kupietz, Carsten Schnober, Oliver Schonefeld and Andreas Witt (2012):
+Piotr Bański, Peter M. Fischer, Elena Frick, Erik Ketzan, Marc Kupietz, Carsten Schnober, Oliver Schonefeld and Andreas Witt (2012):
 "The New IDS Corpus Analysis Platform: Challenges and Prospects",
 Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC 2012).
 L<PDF|http://www.lrec-conf.org/proceedings/lrec2012/pdf/789_Paper.pdf>
 
-Piotr Banski, Elena Frick, Michael Hanl, Marc Kupietz, Carsten Schnober and Andreas Witt (2013):
+Piotr Bański, Elena Frick, Michael Hanl, Marc Kupietz, Carsten Schnober and Andreas Witt (2013):
 "Robust corpus architecture: a new look at virtual collections and data access",
 Corpus Linguistics 2013. Abstract Book. Lancaster: UCREL, pp. 23-25.
 L<PDF|https://ids-pub.bsz-bw.de/frontdoor/deliver/index/docId/4485/file/Ba%c5%84ski_Frick_Hanl_Robust_corpus_architecture_2013.pdf>
@@ -604,7 +609,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2015-2023, L<IDS Mannheim|https://www.ids-mannheim.de/>
+Copyright (C) 2015-2024, L<IDS Mannheim|https://www.ids-mannheim.de/>
 
 Author: L<Nils Diewald|https://www.nils-diewald.de/>
 

diff --git a/lib/KorAP/XML/Archive.pm b/lib/KorAP/XML/Archive.pm
index 3606086..a1b0526 100644
--- a/lib/KorAP/XML/Archive.pm
+++ b/lib/KorAP/XML/Archive.pm

@@ -142,7 +142,7 @@
 
 sub extract_all {
   my $self = shift;
-  my ($target_dir, $jobs) = @_;
+  my ($quiet, $target_dir, $jobs) = @_;
 
   my @init_cmd = (
     'unzip',          # Use unzip program
@@ -163,12 +163,12 @@
     push @cmds, \@cmd;
   };
 
-  $self->_extract($jobs, @cmds);
+  $self->_extract($quiet, $jobs, @cmds);
 };
 
 
 sub _extract {
-  my ($self, $jobs, @cmds) = @_;
+  my ($self, $quiet, $jobs, @cmds) = @_;
 
   # Only single call
   if (!$jobs || $jobs == 1) {
@@ -179,8 +179,10 @@
       # Check for return code
       my $code = $?;
 
-      print "Extract" .
-        ($code ? " $code" : '') . " " . join(' ', @$_) . "\n";
+      unless ($quiet) {
+        print "Extract" .
+          ($code ? " $code" : '') . " " . join(' ', @$_) . "\n";
+      };
     };
   }
 
@@ -191,8 +193,10 @@
       sub {
         my ($pid, $code) = @_;
         my $data = pop;
-        print "Extract [\$$pid] " .
-          ($code ? " $code" : '') . " $$data\n";
+        unless ($quiet) {
+          print "Extract [\$$pid] " .
+            ($code ? " $code" : '') . " $$data\n";
+        };
       }
     );
 
@@ -215,7 +219,7 @@
 
 # Extract from sigle
 sub extract_sigle {
-  my ($self, $sigle, $target_dir, $jobs) = @_;
+  my ($self, $quiet, $sigle, $target_dir, $jobs) = @_;
   my @cmds = $self->cmds_from_sigle($sigle);
 
   @cmds = map {
@@ -223,7 +227,7 @@
     $_;
   } @cmds;
 
-  return $self->_extract($jobs, @cmds);
+  return $self->_extract($quiet, $jobs, @cmds);
 };
 
 

diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index 7f5e904..06dd102 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm

@@ -16,7 +16,7 @@
 
 our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
 
-our $VERSION = '0.51';
+our $VERSION = '0.52';
 
 has 'path';
 has [qw/text_sigle doc_sigle corpus_sigle/];

diff --git a/script/korapxml2krill b/script/korapxml2krill
index 4185020..51352e8 100755
--- a/script/korapxml2krill
+++ b/script/korapxml2krill

@@ -216,6 +216,7 @@
   'primary|p!'  => sub {
     warn 'Primary flag no longer supported!';
   },
+  'quiet'       => \($cfg{quiet}),
   'pretty|y'    => sub {
     warn 'Pretty flag no longer supported!';
   },
@@ -266,7 +267,7 @@
               gzip to-tar log lang cache non-word-tokens
               non-verbal-tokens sequential-extraction
               temporary-extract cache-init
-              koral extract-dir jobs!) {
+              koral extract-dir jobs quiet!) {
     my $underlined = $_ =~ tr/-/_/r;
     if (!defined($cfg{$underlined}) && defined $config{$_}) {
       $cfg{$underlined} = $config{$_};
@@ -303,6 +304,7 @@
 my $base_paragraphs  = lc($cfg{base_paragraphs}  // '');
 my $base_pagebreaks  = lc($cfg{base_pagebreaks}  // '');
 my $sequential_extraction = $cfg{sequential_extraction} // 0;
+my $q                = !!($cfg{quiet}) // 0;
 
 # Get tokenization basis
 my ($token_base_foundry, $token_base_layer) = split(/#/, $token_base) if $token_base;
@@ -395,8 +397,10 @@
 
     # Create archive command
     my @archive_cmd = ($^X, $0, 'archive', @keep_argv, '-i', $_, '-o', $new_out);
-    print "Start serial processing of $_ to $new_out\n";
-    print 'Command: ', join(' ', @archive_cmd), "\n";
+    unless ($q) {
+      print "Start serial processing of $_ to $new_out\n";
+      print 'Command: ', join(' ', @archive_cmd), "\n";
+    };
 
     # Start archiving
     system @archive_cmd;
@@ -608,7 +612,7 @@
   # Sort files by length
   @input = sort { length($a) <=> length($b) } @new_input;
 
-  print 'Input is ' . join(', ', @input)."\n";
+  print 'Input is ' . join(', ', @input)."\n" unless $q;
 };
 
 
@@ -669,17 +673,21 @@
     # Iterate over all given sigles and extract
     foreach (@sigle) {
 
-      print "$_ ...\n";
+      unless ($q) {
+        print "$_ ...\n";
 
-      # TODO: Make this OS independent
-      print '... ' . (
+        # TODO: Make this OS independent
+        print '... ' . (
 
-        # TODO:
-        #   - prefix???
-        $archive->extract_sigle([$_], $output, $jobs)
-        ? '' : 'not '
-      );
-      print "extracted.\n";
+          # TODO:
+          #   - prefix???
+          $archive->extract_sigle(0, [$_], $output, $jobs)
+          ? '' : 'not '
+        );
+        print "extracted.\n";
+      } else {
+        $archive->extract_sigle(1, [$_], $output, $jobs);
+      }
     };
   }
 
@@ -714,15 +722,15 @@
       # Create a temporary directory
       if ($extract_dir eq ':temp:') {
         $extract_dir = tempdir(CLEANUP => 0);
-        print "Temporarily extract to $extract_dir\n";
+        print "Temporarily extract to $extract_dir\n" unless $q;
       };
 
       # Add some random extra to avoid clashes with multiple archives
       $extract_dir = catdir($extract_dir, random_string('cccccc'));
 
       # Extract to temporary directory
-      if ($archive->extract_all($extract_dir, $sequential_extraction ? 1: $jobs)) {
-        print "Extract sequentially to $extract_dir\n";
+      if ($archive->extract_all($q, $extract_dir, $sequential_extraction ? 1: $jobs)) {
+        print "Extract sequentially to $extract_dir\n" unless $q;
         @input = ($extract_dir);
       }
       else {
@@ -759,7 +767,7 @@
     };
 
     # Initiate the tar file
-    print "Writing to file $tar_file\n";
+    print "Writing to file $tar_file\n" unless $q;
     $tar_fh = IO::File->new($tar_file, 'w');
     $tar_fh->binmode(1);
 
@@ -790,10 +798,13 @@
       my ($pid, $code) = @_;
       my $data = pop;
 
-      print 'Convert ['. ($jobs > 0 ? "\$$pid:" : '') .
-        ($iter++) . "/$count]" .
-        ($code ? " $code" : '') .
-        ' ' . $data->[0] . "\n";
+      unless ($q) {
+        print 'Convert ['. ($jobs > 0 ? "\$$pid:" : '') .
+          $iter . "/$count]" .
+          ($code ? " $code" : '') .
+          ' ' . $data->[0] . "\n";
+      };
+      $iter++;
 
       if (!$code && $to_tar && $data->[2]) {
         my $filename = $data->[2];
@@ -821,7 +832,7 @@
 
   my $t;
   my $temp;
-  print "Reading data ...\n";
+  print "Reading data ...\n" unless $q;
 
   #  unless (Cache::FastMmap->new(
   #    share_file => $cache_file,
@@ -850,7 +861,7 @@
       last unless $it->next;
     };
 
-    print "Start processing ...\n";
+    print "Start processing ...\n" unless $q;
     $t = Benchmark->new;
     $count = scalar @dirs;
 
@@ -895,7 +906,7 @@
     # Get sigles to extract
     my $prefix = set_sigle($archive);
 
-    print "Start processing ...\n";
+    print "Start processing ...\n" unless $q;
     $t = Benchmark->new;
     my @dirs = $archive->list_texts;
     $count = scalar @dirs;
@@ -925,7 +936,7 @@
       # because extraction can be horrible slow!
 
       # Extract from archive
-      if ($archive->extract_sigle([join('/', $corpus, $doc, $text)], $temp, $sequential_extraction ? 1 : $jobs)) {
+      if ($archive->extract_sigle($q, [join('/', $corpus, $doc, $text)], $temp, $sequential_extraction ? 1 : $jobs)) {
 
         # Create corpus directory
         my $input = catdir("$temp", $corpus);
@@ -960,7 +971,7 @@
   }
 
   else {
-    print "Input is neither a directory nor an archive.\n\n";
+    print "Input is neither a directory nor an archive.\n\n" unless $q;
   };
 
   $pool->wait_all_children;
@@ -972,11 +983,12 @@
   if ($to_tar && $tar_fh) {
     $tar_archive->finish;
     $tar_fh->close;
-    print "Wrote to tar archive.\n";
+    print "Wrote to tar archive.\n" unless $q;
   };
-
-  print timestr(timediff(Benchmark->new, $t))."\n";
-  print "Done.\n";
+  unless ($q) {
+    print timestr(timediff(Benchmark->new, $t))."\n";
+    print "Done.\n";
+  };
 };
 
 
@@ -1016,23 +1028,28 @@
       # Sigle is a doc sigle
       if ($_ =~ m!^(?:\.[/\\])?[^/\\]+?[/\\][^/\\]+?$!) {
 
-        print "$_ ...";
+        print "$_ ..." unless $q;
         # Check if a prefix is needed
         unless ($prefix_check) {
 
-          if ($prefix = $archive->check_prefix) {
+          if ($prefix = $archive->check_prefix && !$q) {
             print " with prefix ...";
           };
           $prefix_check = 1;
         };
 
-        print "\n";
+        unless ($q) {
+          print "\n";
 
-        print '... ' . (
-          $archive->extract_sigle([$_], $output, $sequential_extraction ? 1 : $jobs)
+          print '... ' . (
+            $archive->extract_sigle($q, [$_], $output, $sequential_extraction ? 1 : $jobs)
             ? '' : 'not '
-        );
-        print "extracted.\n";
+          );
+          print "extracted.\n";
+        }
+        else {
+          $archive->extract_sigle($q, [$_], $output, $sequential_extraction ? 1 : $jobs)
+        };
       }
 
       # Sigle is a text sigle
@@ -1041,7 +1058,7 @@
 
         unless ($prefix_check) {
 
-          if ($prefix = $archive->check_prefix) {
+          if ($prefix = $archive->check_prefix && !$q) {
             print " with prefix ...";
           };
           $prefix_check = 1;
@@ -1410,6 +1427,11 @@
 The L<Log::Any> log level, defaults to C<ERROR>.
 
 
+=item B<--quiet>
+
+Silence all information (non-log) outputs.
+
+
 =item B<--help|-h>
 
 Print help information.
@@ -1672,7 +1694,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2015-2023, L<IDS Mannheim|https://www.ids-mannheim.de/>
+Copyright (C) 2015-2024, L<IDS Mannheim|https://www.ids-mannheim.de/>
 
 Author: L<Nils Diewald|https://www.nils-diewald.de/>
 

diff --git a/t/annotation/mdp_dependency.t b/t/annotation/mdp_dependency.t
index f7f29a2..d5bdd97 100644
--- a/t/annotation/mdp_dependency.t
+++ b/t/annotation/mdp_dependency.t

@@ -33,7 +33,7 @@
 my $dir = tempdir();
 
 my $f_path = 'WPD15/A00/00081';
-$archive->extract_sigle([$f_path], $dir);
+$archive->extract_sigle(0, [$f_path], $dir);
 
 ok(my $doc = KorAP::XML::Krill->new( path => $dir . '/' . $f_path));
 

diff --git a/t/archive.t b/t/archive.t
index 71bedce..77026d7 100644
--- a/t/archive.t
+++ b/t/archive.t

@@ -6,6 +6,7 @@
 use File::Basename 'dirname';
 use File::Spec::Functions qw/catfile catdir/;
 use File::Temp qw/tempdir/;
+use Test::Output qw/:stdout :stderr :functions/;
 
 use KorAP::XML::Archive;
 
@@ -36,7 +37,12 @@
 
 {
   local $SIG{__WARN__} = sub {};
-  ok($archive->extract_sigle(['TEST/BSP/8'], $dir), 'Wrong path');
+  my $stdout = stdout_from(
+    sub {
+      ok($archive->extract_sigle(0, ['TEST/BSP/8'], $dir), 'Wrong path');
+    }
+  );
+  like($stdout, qr!Extract unzip!);
 };
 
 ok(-d catdir($dir, 'TEST'), 'Test corpus directory exists');
@@ -64,9 +70,15 @@
 
 {
   local $SIG{__WARN__} = sub {};
-  ok($archive->extract_sigle(['REI/RB*', 'REI/BNG/00071'], $dir), 'Fine');
+  my $stdout = stdout_from(
+    sub {
+      ok($archive->extract_sigle(1, ['REI/RB*', 'REI/BNG/00071'], $dir), 'Fine');
+    }
+  );
+  is($stdout, '');
 };
 
+
 ok(-d catdir($dir, 'REI'), 'Test corpus directory exists');
 ok(-d catdir($dir, 'REI','BNG'), 'Test corpus directory exists');
 ok(-d catdir($dir, 'REI','BNG','00071'), 'Test corpus directory exists');

diff --git a/t/multiple_archives.t b/t/multiple_archives.t
index 7e2a0e7..e57bf1e 100644
--- a/t/multiple_archives.t
+++ b/t/multiple_archives.t

@@ -60,7 +60,7 @@
 my $dir = tempdir(CLEANUP => 1);
 {
   local $SIG{__WARN__} = sub {};
-  ok($archive->extract_sigle([$list[0]], $dir), 'Wrong path');
+  ok($archive->extract_sigle(0, [$list[0]], $dir), 'Wrong path');
 };
 
 ok(-d catdir($dir, 'WPD15'), 'Test corpus directory exists');

diff --git a/t/script/archive.t b/t/script/archive.t
index 1bb4f92..0f57183 100644
--- a/t/script/archive.t
+++ b/t/script/archive.t

@@ -238,6 +238,30 @@
 
 ok(-f $json, 'Json file exists');
 
+# Test quiet
+
+# my $input = catfile($f, '..', 'corpus', 'archive.zip');
+# ok(-f $input, 'Input archive found');
+
+$call = join(
+  ' ',
+  'perl', $script,
+  'archive',
+  '--input' => '' . $input,
+  '--quiet',
+  '--output' => $output,
+  '--sigle' => 'TEST/BSP/2',
+  '--sigle' => 'TEST/BSP/5',
+  '-t' => 'Base#tokens_aggr',
+  '-m' => 'Sgbr'
+);
+
+{
+  local $SIG{__WARN__} = sub {};
+  my $out = stdout_from(sub { system($call); });
+
+  is($out, "\n", $call);
+};
 
 done_testing;
 __END__
commit	a351837ef17c26ce1ade9e503a1617c624bc41b4	[log] [tgz]
author	Akron <nils@diewald-online.de>	Mon Jan 22 23:29:00 2024 +0100
committer	Akron <nils@diewald-online.de>	Tue Jan 23 08:47:38 2024 +0100
tree	a4a1be4faa6d8d2c212b900cd0a8535b754556ab
parent	8ce23f791247643f9e2315bf1d046bed77eb8c48 [diff]