Fix handling of utf-characters in sigles
... by avoiding multiple utf-8-encodes
Change-Id: Ifd600c4dcfe2e3374b8416a3e033d6bd5a79931e
diff --git a/t/script.t b/t/script.t
index 6bf98e4..57e055e 100644
--- a/t/script.t
+++ b/t/script.t
@@ -434,6 +434,31 @@
;
};
+subtest 'Check encoding with utf-8 sigle' => sub {
+
+ # Load example file
+ my $file = catfile($f, 'data', 'wdd_sample.i5.xml');
+
+ my $t = test_tei2korapxml(
+ tmp => 'script_sigle',
+ file => $file,
+ param => "-ti"
+ )->stderr_like(qr!tei2korapxml: .*? text_id=WDD19_ß0000\.10317!)
+ ->stderr_unlike(qr!Debugging is activated!);
+
+ $t->unzip_xml('WDD19/ß0000/10317/header.xml')
+ ->text_is('idsHeader fileDesc titleStmt textSigle', 'WDD19/ß0000.10317');
+
+ $t->unzip_xml('WDD19/ß0000/10317/data.xml')
+ ->attr_is('raw_text', 'docid', 'WDD19_ß0000.10317');
+
+ $t->unzip_xml('WDD19/ß0000/10317/struct/structure.xml')
+ ->attr_is('layer', 'docid', 'WDD19_ß0000.10317');
+
+ $t->unzip_xml('WDD19/ß0000/10317/base/tokens_conservative.xml')
+ ->attr_is('layer', 'docid', 'WDD19_ß0000.10317');
+};
+
subtest 'Test Log' => sub {
test_tei2korapxml(
tmp => 'script_out',