Zip data.xml before tokens.xml

Makes it more (binary) compatible with private/Ingestion and current DeReKo zips
and less error provoking for old scripts.

Change-Id: Id3fbb94a0decaaa61f9659572c5cfad6520b471e
diff --git a/script/tei2korapxml b/script/tei2korapxml
index eed322d..0e72931 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -335,6 +335,20 @@
             #
 
 
+            # Encode and escape data
+            my $escaped_data = escape_xml_minimal(encode( "UTF-8", $data ));
+            # note: the index still refers to the 'single character'-versions,
+            # which are counted as 1 (search for '&' in data.xml and see
+            # corresponding indices in $_tokens_file)
+
+            if ($_DEBUG) {
+              $log->debug("Writing (utf8-formatted) xml file $dir/$_data_file");
+            };
+
+            $zipper->new_stream("$dir/$_data_file")
+              ->print("$data_prfx1$text_id_esc$data_prfx2$escaped_data$data_sfx");
+
+
             # ~ tokenization ~
 
             if ( $_GEN_TOK_EXT ){
@@ -363,19 +377,6 @@
               $cons_tok->reset;
             };
 
-            # Encode and escape data
-            $data = escape_xml_minimal(encode( "UTF-8", $data ));
-            # note: the index still refers to the 'single character'-versions,
-            # which are counted as 1 (search for '&' in data.xml and see
-            # corresponding indices in $_tokens_file)
-
-            if ($_DEBUG) {
-              $log->debug("Writing (utf8-formatted) xml file $dir/$_data_file");
-            };
-
-            $zipper->new_stream("$dir/$_data_file")
-              ->print("$data_prfx1$text_id_esc$data_prfx2$data$data_sfx");
-
             # ~ write structures ~
 
             write_structures() if @structures;