Automatically replace entities with their corresponding characters

Source of the symbolic entities is the entity file from the TEI-I5 DTD
http://corpora.ids-mannheim.de/I5/DTD/ids-lat1.ent which contains all
entities that have been used in DeReKo. The list is very similar to
the Mathematical, Greek and Symbolic characters for XHTML
http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent, but not identical.

Numeric decimal and hexadecimal entities are replaced, too

Change-Id: Id00376c6953e9ac96ef04703872f38d37ef68096
diff --git a/t/tei.t b/t/tei.t
index 69b4ee1..be5cb3d 100644
--- a/t/tei.t
+++ b/t/tei.t
@@ -1,3 +1,4 @@
+use utf8;
 use strict;
 use warnings;
 use Test::More;
@@ -9,7 +10,7 @@
 
 use Test::KorAP::XML::TEI qw!korap_tempfile test_tei2korapxml!;
 
-use_ok('KorAP::XML::TEI', 'remove_xml_comments', 'escape_xml', 'escape_xml_minimal');
+use_ok('KorAP::XML::TEI', 'remove_xml_comments', 'escape_xml', 'escape_xml_minimal', 'replace_entities');
 
 subtest 'remove_xml_comments' => sub {
   my ($fh, $filename) = korap_tempfile('tei');
@@ -114,4 +115,15 @@
   );
 };
 
+subtest 'Replace all entities' => sub {
+  is(
+    replace_entities('α≈„▒░▓█╗┐┌╔═─┬╦┴╩╝┘└╚│║┼╬┤╣╠├•ˇčˆ†‡ě€ƒ…&Horbar;ıι“„▄‹‘‚—–νœŒωΩ‰φπϖř”ρ›’’šŠσ□&squb;▪⊂˜™▀ŸžŽ'),
+    'α≈„▒░▓█╗┐┌╔═─┬╦┴╩╝┘└╚│║┼╬┤╣╠├•ˇčˆ†‡ě€ƒ…‗ıι“„▄‹‘‚—–νœŒωΩ‰φπϖř”ρ›’‘šŠσ□■▪⊂˜™▀ŸžŽ'
+  );
+  is(replace_entities('A'), 'A');
+  is(replace_entities('«'), replace_entities('«'));
+  is(replace_entities('A'), 'A');
+  is(replace_entities('&<>'), '&<>')
+};
+
 done_testing;