Switch input encoding based on XML processing instruction
Change-Id: I89e20c8af762615d37c216b0c89227fc3644fcb3
diff --git a/t/script.t b/t/script.t
index 88b3d2d..6bf98e4 100644
--- a/t/script.t
+++ b/t/script.t
@@ -306,11 +306,11 @@
my (undef, $outzip) = korap_tempfile('script_out5');
# because output 'textid=...' goes to STDERR (see script/tei2korapxml)
- binmode STDERR, qw{ :encoding(UTF-8) };
+ binmode STDERR;
stderr_like(
sub { `cat '$tplfile' | perl '$script' -ti > '$outzip'` },
- qr!tei2korapxml: .*? text_id=$text_sigle_lax!, # see above: print $fh encode_utf8($tpl);
+ qr!tei2korapxml: .*? text_id=$text_sigle_esc!, # see above: print $fh encode_utf8($tpl);
);
};
@@ -408,6 +408,32 @@
};
+subtest 'Check input encoding' => sub {
+
+ # Load example file
+ test_tei2korapxml(
+ file => catfile($f, 'data', 'goe_sample.i5.xml'),
+ env => 'KORAPXMLTEI_INLINE=1',
+ tmp => 'script_utf8_enc'
+ )
+ ->stderr_like(qr!tei2korapxml: .*? text_id=GOE_AGA\.00000!)
+ ->unzip_xml('GOE/AGA/00000/data.xml')
+ ->content_like(qr/\Q"Kriegstheater"\E/)
+ ->content_like(qr/\QTür'\E/)
+ ;
+
+ test_tei2korapxml(
+ file => catfile($f, 'data', 'goe_sample.i5.iso.xml'),
+ env => 'KORAPXMLTEI_INLINE=1',
+ tmp => 'script_iso_enc'
+ )
+ ->stderr_like(qr!tei2korapxml: .*? text_id=GOE_AGA\.00000!)
+ ->unzip_xml('GOE/AGA/00000/data.xml')
+ ->content_like(qr/\Q"Kriegstheater"\E/)
+ ->content_like(qr/\QTür'\E/)
+ ;
+};
+
subtest 'Test Log' => sub {
test_tei2korapxml(
tmp => 'script_out',