Automatically replace entities with their corresponding characters

Source of the symbolic entities is the entity file from the TEI-I5 DTD
http://corpora.ids-mannheim.de/I5/DTD/ids-lat1.ent which contains all
entities that have been used in DeReKo. The list is very similar to
the Mathematical, Greek and Symbolic characters for XHTML
http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent, but not identical.

Numeric decimal and hexadecimal entities are replaced, too

Change-Id: Id00376c6953e9ac96ef04703872f38d37ef68096
diff --git a/script/tei2korapxml b/script/tei2korapxml
index f15376f..bd54462 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -19,7 +19,7 @@
   unshift @INC, "$FindBin::Bin/../lib";
 };
 
-use KorAP::XML::TEI qw!remove_xml_comments!;
+use KorAP::XML::TEI qw!remove_xml_comments replace_entities!;
 use KorAP::XML::TEI::Tokenizer::External;
 use KorAP::XML::TEI::Tokenizer::Conservative;
 use KorAP::XML::TEI::Tokenizer::Aggressive;
@@ -236,6 +236,7 @@
   };
 
   $_ = decode($input_enc, $_);
+  $_ = replace_entities($_);
 
   if ( index($_, $_TEXT_BODY) >= 0 && m#^(.*)<${_TEXT_BODY}(?: [^>]*)?>(.*)$# ){
 
@@ -258,6 +259,7 @@
 
       $_ = remove_xml_comments( $input_fh, $_ );
       $_ = decode($input_enc, $_);
+      $_ = replace_entities($_);
 
       # ~ end of text body ~
       if (($pos = index($_, '</' . $_TEXT_BODY)) >= 0) {