Fix bug in comment removal procedure
Change-Id: Ia9fa8bd9f52f7b1404412602b1c04fca46a3d88d
diff --git a/script/tei2korapxml b/script/tei2korapxml
index da2cec6..df4150a 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -259,7 +259,7 @@
# TODO: yet not tested fo big amounts of data
# must-have, otherwise comments in input could be fatal (e.g.: ...<!--\n<idsHeader...\n-->...)
- remove_xml_comments( $input_fh, $_ ); # remove HTML comments (<!--...-->)
+ $_ = remove_xml_comments( $input_fh, $_ ); # remove HTML comments (<!--...-->)
if ( $data_fl && ($pos = index($_, '</' . $_TEXT_BODY)) >= 0) {
diff --git a/t/tei.t b/t/tei.t
index 928d28f..94f7577 100644
--- a/t/tei.t
+++ b/t/tei.t
@@ -7,7 +7,7 @@
unshift @INC, "$FindBin::Bin/../lib";
};
-use Test::KorAP::XML::TEI qw!korap_tempfile!;
+use Test::KorAP::XML::TEI qw!korap_tempfile test_tei2korapxml!;
use_ok('KorAP::XML::TEI', 'remove_xml_comments', 'escape_xml');
@@ -45,6 +45,21 @@
close($fh);
};
+
+subtest 'remove_xml_comments in script' => sub {
+ test_tei2korapxml(
+ template => {
+ text => "<!--\nDies ist ein\nmehrzeiligerKommentar -->Text1",
+ textSigle => 'A/B.1',
+ pattern => 'xx'
+ }
+ )
+ ->file_exists('A/B/1/data.xml')
+ ->unzip_xml('A/B/1/data.xml')
+ ->text_is('text', 'Text1');
+};
+
+
subtest 'escape_xml' => sub {
is(
escape_xml('"""'),