Introducing --output parameter
Change-Id: Ibeb1bb625f9ef30ccb6207d8c4d20d4c0d0c9056
diff --git a/Changes b/Changes
index c060b03..6c8f440 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+2.6.0 2023-06-06
+ - Add -o parameter.
+
2.5.0 2023-01-24
- Upgrade minimal Perl version to 5.36 to improve
unicode handling.
diff --git a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
index 665d2e2..13baa9d 100644
--- a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
+++ b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
@@ -4,7 +4,7 @@
use warnings;
use File::Share ':all';
-our $VERSION = '2.5.0';
+our $VERSION = '2.6.0';
my $MIN_JAVA_VERSION = 17;
use constant {
diff --git a/lib/KorAP/XML/TEI/Zipper.pm b/lib/KorAP/XML/TEI/Zipper.pm
index 3f96370..f4f43af 100644
--- a/lib/KorAP/XML/TEI/Zipper.pm
+++ b/lib/KorAP/XML/TEI/Zipper.pm
@@ -33,7 +33,7 @@
$root_dir =~ s/^\.?\/+//;
};
- bless [$out // '-', undef, $root_dir // ''], $class;
+ bless [$out || '-', undef, $root_dir // ''], $class;
};
diff --git a/lib/Test/KorAP/XML/TEI.pm b/lib/Test/KorAP/XML/TEI.pm
index 7f81a68..5542c33 100644
--- a/lib/Test/KorAP/XML/TEI.pm
+++ b/lib/Test/KorAP/XML/TEI.pm
@@ -181,6 +181,41 @@
);
};
+# Check for stdout equality
+sub stdout_is {
+ my ($self, $value, $desc) = @_;
+ return $self->_test(
+ 'is',
+ $self->{stdout},
+ $value,
+ _desc($desc, 'exact match for stdout')
+ );
+};
+
+
+# Check for stdout similarity
+sub stdout_like {
+ my ($self, $value, $desc) = @_;
+ return $self->_test(
+ 'like',
+ $self->{stdout},
+ $value,
+ _desc($desc, 'similar to stdout')
+ );
+};
+
+
+# Check for stdout non-similarity
+sub stdout_unlike {
+ my ($self, $value, $desc) = @_;
+ return $self->_test(
+ 'unlike',
+ $self->{stdout},
+ $value,
+ _desc($desc, 'not similar to stdout')
+ );
+};
+
# Check if a zip exists
sub file_exists {
diff --git a/script/tei2korapxml b/script/tei2korapxml
index eff4fc5..c150c04 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -24,7 +24,7 @@
use KorAP::XML::TEI::Header;
use KorAP::XML::TEI::Inline;
-our $VERSION = '2.5.0';
+our $VERSION = '2.6.0';
our $VERSION_MSG = "\ntei2korapxml - v$VERSION\n";
@@ -44,6 +44,7 @@
GetOptions(
'root|r=s' => \(my $root_dir = '.'),
'input|i=s' => \(my $input_fname = ''),
+ 'output|o=s' => \(my $output_fname = ''),
'tokenizer-call|tc=s' => \(my $tokenizer_call),
'tokenizer-korap|tk' => \(my $tokenizer_korap),
'tokenizer-internal|ti' => \(my $tokenizer_intern),
@@ -166,7 +167,7 @@
};
# Initialize zipper
-my $zipper = KorAP::XML::TEI::Zipper->new($root_dir);
+my $zipper = KorAP::XML::TEI::Zipper->new($root_dir, $output_fname);
# text directory (below $root_dir)
my $dir = '';
@@ -563,6 +564,10 @@
The input file to process. If no specific input is defined and a single
dash C<-> is passed as an argument, data is read from C<STDIN>.
+=item B<--output|-o>
+
+The output zip file to be created. If no specific output is defined,
+data is written to C<STDOUT>.
=item B<--root|-r>
diff --git a/t/script.t b/t/script.t
index 67d46d0..e87f1b7 100644
--- a/t/script.t
+++ b/t/script.t
@@ -829,4 +829,37 @@
;
};
+subtest 'Write to output' => sub {
+
+ my $temp_out = korap_tempfile('out');
+
+ my $t = test_tei2korapxml(
+ file => catfile($f, 'data', 'stadigmer.p5.xml'),
+ tmp => 'script_out',
+ param => '-s -ti -o "' . $temp_out . '"',
+ )->stderr_like(qr!tei2korapxml:.*? text_id=NO_000\.00000!)
+ ->stdout_is('');
+
+ my $content;
+ open(X, '<' . $temp_out);
+ binmode(X);
+ $content .= <X> while !eof(X);
+ close(X);
+ $t->{stdout} = $content;
+
+ $t->unzip_xml('NO/000/00000/data.xml')
+ ->content_like(qr/har lurt/)
+ ->content_like(qr/etter at/)
+ ->content_like(qr/en stund/)
+ ->content_like(qr/skjønner med/)
+ ->content_like(qr/og det/)
+ ->content_like(qr/stadig mer/)
+ ->content_like(qr/sitt, og/)
+ ->content_like(qr/tenkt å bli/)
+ ->content_like(qr/er både/)
+ ;
+
+ unlink $temp_out;
+};
+
done_testing;