Do not escape double quoutes inside raw_text elements

This is not necessary and breaks tokenization compatibility.

Change-Id: Ib43733cf7264ee07b010a3478e8c4b728f7bd708
diff --git a/lib/KorAP/XML/TEI.pm b/lib/KorAP/XML/TEI.pm
index 23b6625..8f1678d 100644
--- a/lib/KorAP/XML/TEI.pm
+++ b/lib/KorAP/XML/TEI.pm
@@ -3,16 +3,21 @@
 use warnings;
 
 use Exporter 'import';
-our @EXPORT_OK = qw(remove_xml_comments escape_xml);
+our @EXPORT_OK = qw(remove_xml_comments escape_xml escape_xml_minimal);
 
 # convert '&', '<' and '>' into their corresponding sgml-entities
-my %ent = (
-  '"' => '&quot;',
+my %ent_without_quot = (
   '&' => '&amp;',
   '<' => '&lt;',
   '>' => '&gt;'
 );
 
+my %ent = (
+  %ent_without_quot,
+  '"' => '&quot;'
+);
+
+
 # remove xml comments
 sub remove_xml_comments {
   my ($fh, $html) = @_;
@@ -71,4 +76,12 @@
 };
 
 
+# Escape
+sub escape_xml_minimal {
+  my $data = shift // '';
+  $data =~ s/([&<>])/$ent_without_quot{$1}/ge;
+  return $data;
+};
+
+
 1;
diff --git a/script/tei2korapxml b/script/tei2korapxml
index ab1975c..eed322d 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -20,7 +20,7 @@
   unshift @INC, "$FindBin::Bin/../lib";
 };
 
-use KorAP::XML::TEI qw!remove_xml_comments escape_xml!;
+use KorAP::XML::TEI qw!remove_xml_comments escape_xml escape_xml_minimal!;
 use KorAP::XML::TEI::Tokenizer::External;
 use KorAP::XML::TEI::Tokenizer::Conservative;
 use KorAP::XML::TEI::Tokenizer::Aggressive;
@@ -364,7 +364,7 @@
             };
 
             # Encode and escape data
-            $data = escape_xml(encode( "UTF-8", $data ));
+            $data = escape_xml_minimal(encode( "UTF-8", $data ));
             # note: the index still refers to the 'single character'-versions,
             # which are counted as 1 (search for '&amp;' in data.xml and see
             # corresponding indices in $_tokens_file)
diff --git a/t/script.t b/t/script.t
index 4254937..9521d73 100644
--- a/t/script.t
+++ b/t/script.t
@@ -66,7 +66,8 @@
 # Uncompress GOE/AGA/00000/data.xml from zip file
   $t->unzip_xml('GOE/AGA/00000/data.xml')
     ->attr_is('raw_text', 'docid', 'GOE_AGA.00000', 'text id')
-    ->text_like('raw_text > text', qr!^Campagne in Frankreich 1792.*?uns allein begl.*cke\.$!, 'text content');
+    ->text_like('raw_text > text', qr!^Campagne in Frankreich 1792.*?uns allein begl.*cke\.$!, 'text content')
+    ->text_like('raw_text > text', qr!unter dem Titel "Kriegstheater"!, 'text content');
 
   $t->unzip_xml('GOE/AGA/00000/struct/structure.xml')
     ->text_is('span[id=s3] *[name=type]', 'Autobiographie', 'text content')
diff --git a/t/tei.t b/t/tei.t
index 94f7577..69b4ee1 100644
--- a/t/tei.t
+++ b/t/tei.t
@@ -9,7 +9,7 @@
 
 use Test::KorAP::XML::TEI qw!korap_tempfile test_tei2korapxml!;
 
-use_ok('KorAP::XML::TEI', 'remove_xml_comments', 'escape_xml');
+use_ok('KorAP::XML::TEI', 'remove_xml_comments', 'escape_xml', 'escape_xml_minimal');
 
 subtest 'remove_xml_comments' => sub {
   my ($fh, $filename) = korap_tempfile('tei');
@@ -87,5 +87,31 @@
   );
 };
 
+subtest 'escape_xml_minimal' => sub {
+  is(
+      escape_xml_minimal('"""'),
+      '"""'
+  );
+
+  is(
+      escape_xml_minimal('&&&'),
+      '&amp;&amp;&amp;'
+  );
+
+  is(
+      escape_xml_minimal('<<<'),
+      '&lt;&lt;&lt;'
+  );
+
+  is(
+      escape_xml_minimal('>>>'),
+      '&gt;&gt;&gt;'
+  );
+
+  is(
+      escape_xml_minimal('<tag att1="foo" att2="bar">C&A</tag>'),
+      '&lt;tag att1="foo" att2="bar"&gt;C&amp;A&lt;/tag&gt;'
+  );
+};
 
 done_testing;