Support single dash for STDIN
Change-Id: Idaa8ab3632f5787b06193892b583763276ea0fb8
diff --git a/Changes b/Changes
index 887d6ba..ffc191f 100644
--- a/Changes
+++ b/Changes
@@ -2,6 +2,7 @@
- Improve handling of unknown header types
- Check for valid sigles to avoid broken directories
- Introduce exclusivity for inline tokens handling.
+ - Use single dash for STDIN.
2.2.0 2021-08-26 Release
- Remove unnecessary branch in recursive call
diff --git a/lib/Test/KorAP/XML/TEI.pm b/lib/Test/KorAP/XML/TEI.pm
index 2208103..7f81a68 100644
--- a/lib/Test/KorAP/XML/TEI.pm
+++ b/lib/Test/KorAP/XML/TEI.pm
@@ -116,7 +116,7 @@
# we pipe stdout through a temp file.
my (undef, $fn) = korap_tempfile($pattern);
- $call = "cat '$file' | $env $call $param > $fn";
+ $call = "$env $call $param -i '$file' > $fn";
my $stderr = capture_stderr { `$call` };
# Read from written file
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 0d196a5..6f3ad1f 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -29,7 +29,7 @@
1;
};
-our $VERSION = '2.2.0';
+our $VERSION = '2.3.0';
our $VERSION_MSG = "\ntei2korapxml - v$VERSION\n";
@@ -64,6 +64,7 @@
'header-file=s' => \(my $header_file = 'header'),
'tokens-file=s' => \(my $tokens_file = 'tokens'),
'log|l=s' => \(my $log_level = 'notice'),
+ '' => \(my $stdio),
'help|h' => sub {
pod2usage(
-verbose => 99,
@@ -161,12 +162,29 @@
# Input file handle (default: stdin)
-my $input_fh = *STDIN;
+my $input_fh;
-if ($input_fname ne '') {
+# Single dash was set
+if ($stdio) {
+ $input_fh = *STDIN;
+}
+
+# Input flag was passed
+elsif ($input_fname ne '') {
unless (open($input_fh, '<', $input_fname)) {
die $log->fatal("File '$input_fname' could not be opened.");
};
+}
+
+# No input to process
+else {
+ pod2usage(
+ -verbose => 99,
+ -sections => 'NAME|SYNOPSIS',
+ -msg => $VERSION_MSG,
+ -output => '-'
+ );
+ exit;
};
# Prevents segfaulting (see notes on segfault prevention)
@@ -408,7 +426,7 @@
=head1 SYNOPSIS
- cat corpus.i5.xml | tei2korapxml > corpus.korapxml.zip
+ cat corpus.i5.xml | tei2korapxml - > corpus.korapxml.zip
=head1 DESCRIPTION
@@ -416,9 +434,6 @@
L<I5|https://www.ids-mannheim.de/digspra/kl/projekte/korpora/textmodell>
based documents to the
L<KorAP-XML format|https://github.com/KorAP/KorAP-XML-Krill#about-korap-xml>.
-If no specific input is defined, data is
-read from C<STDIN>. If no specific output is defined, data is written
-to C<STDOUT>.
This program is usually called from inside another script.
@@ -490,6 +505,12 @@
=over 2
+=item B<--input|-i>
+
+The input file to process. If no specific input is defined and a single
+dash C<-> is passed as an argument, data is read from C<STDIN>.
+
+
=item B<--root|-r>
The root directory for output. Defaults to C<.>.
diff --git a/t/script.t b/t/script.t
index bd3f28d..f311db0 100644
--- a/t/script.t
+++ b/t/script.t
@@ -377,7 +377,7 @@
binmode STDERR;
stderr_like(
- sub { `cat '$tplfile' | perl '$script' -ti > '$outzip'` },
+ sub { `cat '$tplfile' | perl '$script' -ti - > '$outzip'` },
qr!tei2korapxml:.*? text_id=$text_sigle_esc!, # see above: print $fh encode_utf8($tpl);
);
};
@@ -632,14 +632,14 @@
# Generate zip file (unportable!)
stderr_like(
- sub { `cat '$file' | perl '$script' --skip-token-inline-annotations=0 > '$outzip'` },
+ sub { `cat '$file' | perl '$script' --skip-token-inline-annotations=0 - > '$outzip'` },
qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!,
'Processing 1'
);
# TODO: there should be a better way to test this
stderr_unlike(
- sub { `cat '$file' | perl '$script' --skip-token-inline-annotations=0 > '$outzip'` },
+ sub { `cat '$file' | perl '$script' --skip-token-inline-annotations=0 - > '$outzip'` },
qr!.*undefined value.*!,
'Processing 2'
);
@@ -748,5 +748,4 @@
};
-
done_testing;