blob: 69b4ee1e1c4ebe5d244ac54ba9781e2c3165f25e [file] [log] [blame]
Akron4f67cd42020-07-02 12:27:58 +02001use strict;
2use warnings;
3use Test::More;
Akron4f67cd42020-07-02 12:27:58 +02004
5use FindBin;
6BEGIN {
7 unshift @INC, "$FindBin::Bin/../lib";
8};
9
Akron19c6c352020-08-01 13:29:00 +020010use Test::KorAP::XML::TEI qw!korap_tempfile test_tei2korapxml!;
Peter Harders42e18a62020-07-21 02:43:26 +020011
Marc Kupietzfd0e6a92020-09-09 18:07:29 +020012use_ok('KorAP::XML::TEI', 'remove_xml_comments', 'escape_xml', 'escape_xml_minimal');
Akron4f67cd42020-07-02 12:27:58 +020013
Akron0465e9e2020-07-27 15:55:21 +020014subtest 'remove_xml_comments' => sub {
15 my ($fh, $filename) = korap_tempfile('tei');
Akron4f67cd42020-07-02 12:27:58 +020016
Akron0465e9e2020-07-27 15:55:21 +020017 print $fh <<'HTML';
Akron4f67cd42020-07-02 12:27:58 +020018mehrzeiliger
19Kommentar
20 -->
21Test
22HTML
23
Akron0465e9e2020-07-27 15:55:21 +020024 is(remove_xml_comments($fh, "hallo"),"hallo");
25 is(remove_xml_comments($fh, "hallo <!-- Test -->"),"hallo ");
26 is(remove_xml_comments($fh, "<!-- Test --> hallo")," hallo");
Akron4f67cd42020-07-02 12:27:58 +020027
Akron0465e9e2020-07-27 15:55:21 +020028 seek($fh, 0, 0);
Akron4f67cd42020-07-02 12:27:58 +020029
Akron0465e9e2020-07-27 15:55:21 +020030 is(remove_xml_comments($fh, '<!--'), "Test\n");
Akron4f67cd42020-07-02 12:27:58 +020031
Akron0465e9e2020-07-27 15:55:21 +020032 seek($fh, 0, 0);
Akron2d547bc2020-07-04 10:34:35 +020033
Akron0465e9e2020-07-27 15:55:21 +020034 print $fh <<'HTML';
Akron2d547bc2020-07-04 10:34:35 +020035mehrzeiliger
36Kommentar
37 --><!-- Versuch
38-->ist <!-- a --><!-- b --> ein Test
39HTML
40
Akron0465e9e2020-07-27 15:55:21 +020041 seek($fh, 0, 0);
Akron2d547bc2020-07-04 10:34:35 +020042
Akron0465e9e2020-07-27 15:55:21 +020043 is(remove_xml_comments($fh, 'Dies <!--'), "Dies ist ein Test\n");
Akron2d547bc2020-07-04 10:34:35 +020044
Akron0465e9e2020-07-27 15:55:21 +020045 close($fh);
46};
47
Akron19c6c352020-08-01 13:29:00 +020048
49subtest 'remove_xml_comments in script' => sub {
50 test_tei2korapxml(
51 template => {
52 text => "<!--\nDies ist ein\nmehrzeiligerKommentar -->Text1",
53 textSigle => 'A/B.1',
54 pattern => 'xx'
55 }
56 )
57 ->file_exists('A/B/1/data.xml')
58 ->unzip_xml('A/B/1/data.xml')
59 ->text_is('text', 'Text1');
60};
61
62
Akron0465e9e2020-07-27 15:55:21 +020063subtest 'escape_xml' => sub {
64 is(
65 escape_xml('"""'),
66 '&quot;&quot;&quot;'
67 );
68
69 is(
70 escape_xml('&&&'),
71 '&amp;&amp;&amp;'
72 );
73
74 is(
75 escape_xml('<<<'),
76 '&lt;&lt;&lt;'
77 );
78
79 is(
80 escape_xml('>>>'),
81 '&gt;&gt;&gt;'
82 );
83
84 is(
85 escape_xml('<tag att1="foo" att2="bar">C&A</tag>'),
86 '&lt;tag att1=&quot;foo&quot; att2=&quot;bar&quot;&gt;C&amp;A&lt;/tag&gt;'
87 );
88};
89
Marc Kupietzfd0e6a92020-09-09 18:07:29 +020090subtest 'escape_xml_minimal' => sub {
91 is(
92 escape_xml_minimal('"""'),
93 '"""'
94 );
95
96 is(
97 escape_xml_minimal('&&&'),
98 '&amp;&amp;&amp;'
99 );
100
101 is(
102 escape_xml_minimal('<<<'),
103 '&lt;&lt;&lt;'
104 );
105
106 is(
107 escape_xml_minimal('>>>'),
108 '&gt;&gt;&gt;'
109 );
110
111 is(
112 escape_xml_minimal('<tag att1="foo" att2="bar">C&A</tag>'),
113 '&lt;tag att1="foo" att2="bar"&gt;C&amp;A&lt;/tag&gt;'
114 );
115};
Akron2d547bc2020-07-04 10:34:35 +0200116
Akron4f67cd42020-07-02 12:27:58 +0200117done_testing;