Akron | 4f67cd4 | 2020-07-02 12:27:58 +0200 | [diff] [blame] | 1 | use strict; |
| 2 | use warnings; |
| 3 | use Test::More; |
Akron | 4f67cd4 | 2020-07-02 12:27:58 +0200 | [diff] [blame] | 4 | |
| 5 | use FindBin; |
| 6 | BEGIN { |
| 7 | unshift @INC, "$FindBin::Bin/../lib"; |
| 8 | }; |
| 9 | |
Akron | 19c6c35 | 2020-08-01 13:29:00 +0200 | [diff] [blame] | 10 | use Test::KorAP::XML::TEI qw!korap_tempfile test_tei2korapxml!; |
Peter Harders | 42e18a6 | 2020-07-21 02:43:26 +0200 | [diff] [blame] | 11 | |
Marc Kupietz | fd0e6a9 | 2020-09-09 18:07:29 +0200 | [diff] [blame^] | 12 | use_ok('KorAP::XML::TEI', 'remove_xml_comments', 'escape_xml', 'escape_xml_minimal'); |
Akron | 4f67cd4 | 2020-07-02 12:27:58 +0200 | [diff] [blame] | 13 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 14 | subtest 'remove_xml_comments' => sub { |
| 15 | my ($fh, $filename) = korap_tempfile('tei'); |
Akron | 4f67cd4 | 2020-07-02 12:27:58 +0200 | [diff] [blame] | 16 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 17 | print $fh <<'HTML'; |
Akron | 4f67cd4 | 2020-07-02 12:27:58 +0200 | [diff] [blame] | 18 | mehrzeiliger |
| 19 | Kommentar |
| 20 | --> |
| 21 | Test |
| 22 | HTML |
| 23 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 24 | is(remove_xml_comments($fh, "hallo"),"hallo"); |
| 25 | is(remove_xml_comments($fh, "hallo <!-- Test -->"),"hallo "); |
| 26 | is(remove_xml_comments($fh, "<!-- Test --> hallo")," hallo"); |
Akron | 4f67cd4 | 2020-07-02 12:27:58 +0200 | [diff] [blame] | 27 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 28 | seek($fh, 0, 0); |
Akron | 4f67cd4 | 2020-07-02 12:27:58 +0200 | [diff] [blame] | 29 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 30 | is(remove_xml_comments($fh, '<!--'), "Test\n"); |
Akron | 4f67cd4 | 2020-07-02 12:27:58 +0200 | [diff] [blame] | 31 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 32 | seek($fh, 0, 0); |
Akron | 2d547bc | 2020-07-04 10:34:35 +0200 | [diff] [blame] | 33 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 34 | print $fh <<'HTML'; |
Akron | 2d547bc | 2020-07-04 10:34:35 +0200 | [diff] [blame] | 35 | mehrzeiliger |
| 36 | Kommentar |
| 37 | --><!-- Versuch |
| 38 | -->ist <!-- a --><!-- b --> ein Test |
| 39 | HTML |
| 40 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 41 | seek($fh, 0, 0); |
Akron | 2d547bc | 2020-07-04 10:34:35 +0200 | [diff] [blame] | 42 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 43 | is(remove_xml_comments($fh, 'Dies <!--'), "Dies ist ein Test\n"); |
Akron | 2d547bc | 2020-07-04 10:34:35 +0200 | [diff] [blame] | 44 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 45 | close($fh); |
| 46 | }; |
| 47 | |
Akron | 19c6c35 | 2020-08-01 13:29:00 +0200 | [diff] [blame] | 48 | |
| 49 | subtest 'remove_xml_comments in script' => sub { |
| 50 | test_tei2korapxml( |
| 51 | template => { |
| 52 | text => "<!--\nDies ist ein\nmehrzeiligerKommentar -->Text1", |
| 53 | textSigle => 'A/B.1', |
| 54 | pattern => 'xx' |
| 55 | } |
| 56 | ) |
| 57 | ->file_exists('A/B/1/data.xml') |
| 58 | ->unzip_xml('A/B/1/data.xml') |
| 59 | ->text_is('text', 'Text1'); |
| 60 | }; |
| 61 | |
| 62 | |
Akron | 0465e9e | 2020-07-27 15:55:21 +0200 | [diff] [blame] | 63 | subtest 'escape_xml' => sub { |
| 64 | is( |
| 65 | escape_xml('"""'), |
| 66 | '"""' |
| 67 | ); |
| 68 | |
| 69 | is( |
| 70 | escape_xml('&&&'), |
| 71 | '&&&' |
| 72 | ); |
| 73 | |
| 74 | is( |
| 75 | escape_xml('<<<'), |
| 76 | '<<<' |
| 77 | ); |
| 78 | |
| 79 | is( |
| 80 | escape_xml('>>>'), |
| 81 | '>>>' |
| 82 | ); |
| 83 | |
| 84 | is( |
| 85 | escape_xml('<tag att1="foo" att2="bar">C&A</tag>'), |
| 86 | '<tag att1="foo" att2="bar">C&A</tag>' |
| 87 | ); |
| 88 | }; |
| 89 | |
Marc Kupietz | fd0e6a9 | 2020-09-09 18:07:29 +0200 | [diff] [blame^] | 90 | subtest 'escape_xml_minimal' => sub { |
| 91 | is( |
| 92 | escape_xml_minimal('"""'), |
| 93 | '"""' |
| 94 | ); |
| 95 | |
| 96 | is( |
| 97 | escape_xml_minimal('&&&'), |
| 98 | '&&&' |
| 99 | ); |
| 100 | |
| 101 | is( |
| 102 | escape_xml_minimal('<<<'), |
| 103 | '<<<' |
| 104 | ); |
| 105 | |
| 106 | is( |
| 107 | escape_xml_minimal('>>>'), |
| 108 | '>>>' |
| 109 | ); |
| 110 | |
| 111 | is( |
| 112 | escape_xml_minimal('<tag att1="foo" att2="bar">C&A</tag>'), |
| 113 | '<tag att1="foo" att2="bar">C&A</tag>' |
| 114 | ); |
| 115 | }; |
Akron | 2d547bc | 2020-07-04 10:34:35 +0200 | [diff] [blame] | 116 | |
Akron | 4f67cd4 | 2020-07-02 12:27:58 +0200 | [diff] [blame] | 117 | done_testing; |