| use utf8; | 
 | use strict; | 
 | use warnings; | 
 | use Test::More; | 
 |  | 
 | use FindBin; | 
 | BEGIN { | 
 |   unshift @INC, "$FindBin::Bin/../lib"; | 
 | }; | 
 |  | 
 | use Test::KorAP::XML::TEI qw!korap_tempfile test_tei2korapxml!; | 
 |  | 
 | use_ok('KorAP::XML::TEI', 'remove_xml_comments', 'escape_xml', 'escape_xml_minimal', 'replace_entities'); | 
 |  | 
 | subtest 'remove_xml_comments' => sub { | 
 |   my ($fh, $filename) = korap_tempfile('tei'); | 
 |  | 
 |   print $fh <<'HTML'; | 
 | mehrzeiliger | 
 | Kommentar | 
 |   --> | 
 | Test | 
 | HTML | 
 |  | 
 |   is(remove_xml_comments($fh, "hallo"),"hallo"); | 
 |   is(remove_xml_comments($fh, "hallo <!-- Test -->"),"hallo "); | 
 |   is(remove_xml_comments($fh, "<!-- Test --> hallo")," hallo"); | 
 |  | 
 |   seek($fh, 0, 0); | 
 |  | 
 |   is(remove_xml_comments($fh, '<!--'), "Test\n"); | 
 |  | 
 |   seek($fh, 0, 0); | 
 |  | 
 |   print $fh <<'HTML'; | 
 | mehrzeiliger | 
 | Kommentar | 
 |   --><!-- Versuch | 
 | -->ist <!-- a --><!-- b --> ein Test | 
 | HTML | 
 |  | 
 |   seek($fh, 0, 0); | 
 |  | 
 |   is(remove_xml_comments($fh, 'Dies <!--'), "Dies ist  ein Test\n"); | 
 |  | 
 |   close($fh); | 
 | }; | 
 |  | 
 |  | 
 | subtest 'remove_xml_comments in script' => sub { | 
 |   test_tei2korapxml( | 
 |     template => { | 
 |       text => "<!--\nDies ist ein\nmehrzeiligerKommentar -->Text1", | 
 |       textSigle => 'A/B.1', | 
 |       pattern => 'xx' | 
 |     } | 
 |   ) | 
 |     ->file_exists('A/B/1/data.xml') | 
 |     ->unzip_xml('A/B/1/data.xml') | 
 |     ->text_is('text', 'Text1'); | 
 | }; | 
 |  | 
 |  | 
 | subtest 'skip missing dir in script' => sub { | 
 |   test_tei2korapxml( | 
 |     template => { | 
 |       text => "Nur ein Test", | 
 |       textSigle => '', | 
 |       pattern => 'missing_dir' | 
 |     } | 
 |   ) | 
 |     ->file_exists_not('A/B/1/data.xml') | 
 |     ->stderr_like(qr!Empty '<textSigle />' \(L29\) in header!) | 
 |     ->stderr_like(qr!skipping this text!) | 
 |     ; | 
 | }; | 
 |  | 
 |  | 
 | subtest 'escape_xml' => sub { | 
 |   is( | 
 |     escape_xml('"""'), | 
 |     '"""' | 
 |   ); | 
 |  | 
 |   is( | 
 |     escape_xml('&&&'), | 
 |     '&&&' | 
 |   ); | 
 |  | 
 |   is( | 
 |     escape_xml('<<<'), | 
 |     '<<<' | 
 |   ); | 
 |  | 
 |   is( | 
 |     escape_xml('>>>'), | 
 |     '>>>' | 
 |   ); | 
 |  | 
 |   is( | 
 |     escape_xml('<tag att1="foo" att2="bar">C&A</tag>'), | 
 |     '<tag att1="foo" att2="bar">C&A</tag>' | 
 |   ); | 
 | }; | 
 |  | 
 | subtest 'escape_xml_minimal' => sub { | 
 |   is( | 
 |       escape_xml_minimal('"""'), | 
 |       '"""' | 
 |   ); | 
 |  | 
 |   is( | 
 |       escape_xml_minimal('&&&'), | 
 |       '&&&' | 
 |   ); | 
 |  | 
 |   is( | 
 |       escape_xml_minimal('<<<'), | 
 |       '<<<' | 
 |   ); | 
 |  | 
 |   is( | 
 |       escape_xml_minimal('>>>'), | 
 |       '>>>' | 
 |   ); | 
 |  | 
 |   is( | 
 |       escape_xml_minimal('<tag att1="foo" att2="bar">C&A</tag>'), | 
 |       '<tag att1="foo" att2="bar">C&A</tag>' | 
 |   ); | 
 | }; | 
 |  | 
 | subtest 'Replace all entities' => sub { | 
 |   is( | 
 |     replace_entities('α≈„▒░▓█╗┐┌╔═─┬╦┴╩╝┘└╚│║┼╬┤╣╠├•ˇčˆ†‡ě€ƒ…&Horbar;ıι“„▄‹‘‚—–νœŒωΩ‰φπϖř”ρ›’’šŠσ□&squb;▪⊂˜™▀ŸžŽ'), | 
 |     'α≈„▒░▓█╗┐┌╔═─┬╦┴╩╝┘└╚│║┼╬┤╣╠├•ˇčˆ†‡ě€ƒ…‗ıι“„▄‹‘‚—–νœŒωΩ‰φπϖř”ρ›’‘šŠσ□■▪⊂˜™▀ŸžŽ' | 
 |   ); | 
 |   is(replace_entities('A'), 'A'); | 
 |   is(replace_entities('«'), replace_entities('«')); | 
 |   is(replace_entities('A'), 'A'); | 
 |   is(replace_entities('&<>'), '&<>') | 
 | }; | 
 |  | 
 | done_testing; |