Create Zip-Factory for simpler handling of Zip streams

Change-Id: I66fb1e980437f9b931d71b8bc9fde54bda2aee6f
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 8fffecd..6136d47 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -41,7 +41,6 @@
 
 use XML::CompactTree::XS;
 use XML::LibXML::Reader;
-use IO::Compress::Zip qw(zip $ZipError :constants);
 use IPC::Open2 qw(open2);
 
 use FindBin;
@@ -51,6 +50,7 @@
 
 use KorAP::XML::TEI;
 use KorAP::XML::TEI::Tokenization;
+use KorAP::XML::TEI::Zipper;
 
 our $VERSION = '0.01';
 
@@ -114,13 +114,6 @@
   my ( @tok_tokens_con, @tok_tokens_agg, $txt, $offset );
 my $_base_tokenization_dir     = "base"; # name of directory for storing files of dummy tokenization (only used in func. select_tokenization)
 
-# man IO::Compress::Zip
-# At present three compression methods are supported by IO::Compress::Zip, namely
-# Store (no compression at all), Deflate, Bzip2 and LZMA.
-# Note that to create Bzip2 content, the module "IO::Compress::Bzip2" must be installed.
-# Note that to create LZMA content, the module "IO::Compress::Lzma" must be installed.
-my $_COMPRESSION_METHOD = ZIP_CM_DEFLATE; # The symbols, ZIP_CM_STORE, ZIP_CM_DEFLATE, ZIP_CM_BZIP2 and ZIP_CM_LZMA are used to select the compression method.
-
 my $_DEBUG           = 0;                            # set to 1 for minimal more debug output (no need to be parametrized)
 my $_XCT_LN          = 0;                            # only for debugging: include line numbers in elements of $tree_data
                                                      #  (see also manpage of XML::CompactTree::XS)
@@ -155,9 +148,8 @@
 # ~~~ variables ~~~
 #
 
-my $zip;                                             # IO::Compress::Zip object
-my $zip_outh;                                        # handle for zip file output (stdout)
-my $first_write;                                     # needed to decide wether to call '$zip->newStream' (for appending to zip file)
+# Initialize zipper
+my $zipper = KorAP::XML::TEI::Zipper->new;
 my $input_fh;                                        # input file handle (default: stdin)
 
 my $buf_in;                                          # text body data extracted from input document ($input_fh), further processed by XML::LibXML::Reader
@@ -287,9 +279,7 @@
 
   $input_fh = *STDIN;  # input file handle (default: stdin)
 
-  $zip_outh = *STDOUT; # output file handle (default: stdout)
-
-  $data_fl = 0; $first_write = 1;
+  $data_fl = 0;
 
   $buf_in = $data = $dir = $dir_doc = $dir_crp = "";
   $header_txt = $header_doc = $header_crp = "";
@@ -376,25 +366,11 @@
       
         print STDERR "DEBUG ($0): main(): Writing (utf8-formatted) xml file $_root_dir$dir/$_data_file\n" if $_DEBUG;
 
-        if ( $first_write ){
-
-          $first_write = 0;
-
-          # 1st time: create instance
-          $zip = new IO::Compress::Zip $zip_outh, Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD, Append => 0, Name => "$_root_dir$dir/$_data_file"
-            or die "ERROR ('$_root_dir$dir/$_data_file'): zip failed: $ZipError\n"
-
-        } else {
-
-          # closes the current compressed data stream and starts a new one.
-          $zip->newStream( Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD, Append => 1, Name => "$_root_dir$dir/$_data_file" )
-            or die "ERROR ('$_root_dir$dir/$_data_file'): zip failed: $ZipError\n"
-        }
 
         $data =~ s/(&|<|>)/$ent{$1}/g;
 
-        $zip->print( "$data_prfx1$text_id_esc$data_prfx2$data$data_sfx" );
-
+        $zipper->new_stream("$_root_dir$dir/$_data_file")
+          ->print("$data_prfx1$text_id_esc$data_prfx2$data$data_sfx");
 
         # ~ write structures ~
 
@@ -488,23 +464,10 @@
 
         print STDERR "DEBUG ($0): Writing file $_root_dir$dir/$_header_file\n" if $_DEBUG;
 
-        if ( $first_write ){
-
-          $first_write = 0;
-
-          $zip = new IO::Compress::Zip $zip_outh, Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD,
-                 Append => 0, Name => "$_root_dir$dir/$_header_file"
-            or die "ERROR ('$_root_dir$dir/$_header_file'): zip failed: $ZipError\n"
-
-        } else {
-
-          $zip->newStream( Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD, Append => 1, Name => "$_root_dir$dir/$_header_file" )
-            or die "ERROR ('$_root_dir$dir/$_header_file'): zip failed: $ZipError\n"
-        }
-
         $header_txt = encode_utf8( $header_txt );
 
-        $zip->print( "$header_prfx$header_txt" );
+        $zipper->new_stream("$_root_dir$dir/$_header_file")
+          ->print("$header_prfx$header_txt");
 
         $header_txt = "";
       }
@@ -547,7 +510,6 @@
 
       # ~ end of document header ~
 
-
       #print STDERR "end of doc header\n";
 
       # write it to header.xml
@@ -569,23 +531,10 @@
 
         print STDERR "DEBUG ($0): Writing file $_root_dir$dir_doc/$_header_file\n" if $_DEBUG;
 
-        if ( $first_write ){
-
-          $first_write = 0;
-
-          $zip = new IO::Compress::Zip $zip_outh, Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD, Append => 0,
-            Name => "$_root_dir$dir_doc/$_header_file"
-              or die "ERROR ('$_root_dir$dir_doc/$_header_file'): zip failed: $ZipError\n"
-
-        } else {
-
-          $zip->newStream( Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD, Append => 1, Name => "$_root_dir$dir_doc/$_header_file" )
-            or die "ERROR ('$_root_dir$dir_doc/$_header_file'): zip failed: $ZipError\n"
-        }
-
         $header_doc = encode_utf8( $header_doc );
 
-        $zip->print( "$header_prfx$header_doc" );
+        $zipper->new_stream("$_root_dir$dir_doc/$_header_file")
+          ->print("$header_prfx$header_doc");
 
         $header_doc = $dir_doc = "";
       }
@@ -682,23 +631,10 @@
 
         print STDERR "DEBUG ($0): Writing file $_root_dir$dir_crp/$_header_file\n" if $_DEBUG;
 
-        if ( $first_write ){
-
-          $first_write = 0;
-
-          $zip = new IO::Compress::Zip $zip_outh, Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD,
-            Append => 0, Name => "$_root_dir$dir_crp/$_header_file"
-               or die "ERROR ('$_root_dir$dir_crp/$_header_file'): zip failed: $ZipError\n";
-
-        } else {
-
-          $zip->newStream( Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD, Append => 1, Name => "$_root_dir$dir_crp/$_header_file" )
-            or die "ERROR ('$_root_dir$dir_crp/$_header_file'): zip failed: $ZipError\n"
-        }
-
         $header_crp = encode_utf8( $header_crp );
 
-        $zip->print( "$header_prfx$header_crp" );
+        $zipper->new_stream("$_root_dir$dir_crp/$_header_file")
+          ->print("$header_prfx$header_crp");
 
         $header_crp = $dir_crp = "";
       }
@@ -745,7 +681,7 @@
 
   } #end: while
 
-  $zip->close();
+  $zipper->close;
 
   ## DEPRECATED (only IDS-intern)
   if( $_GEN_TOK_BAS ){
@@ -1145,7 +1081,7 @@
         }
       }
     }else{
-      $zip->close();
+      $zipper->close;
       die "ERROR ($0): cannot retrieve token bounds from external tokenizer for text '$text_id' => Aborting ...\n";
     }
   ## 
@@ -1162,9 +1098,6 @@
 
   my ( $fname, $textid_esc, $bounds ) = @_;
 
-  $zip->newStream( Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD, Append => 1, Name => $fname)
-    or die "ERROR ('$fname'): zip failed: $ZipError\n";
-
   $output = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<?xml-model href=\"span.rng\" type=\"application/xml\""
     ." schematypens=\"http://relaxng.org/ns/structure/1.0\"?>\n\n<layer docid=\"$text_id_esc\" xmlns=\"http://ids-mannheim.de/ns/KorAP\""
     ." version=\"KorAP-0.4\">\n  <spanList>\n";
@@ -1180,7 +1113,7 @@
 
   $output .= "  </spanList>\n</layer>";
 
-  $zip->print ( "$output" );
+  $zipper->new_stream($fname)->print($output);
 
 } # end: sub write_tokenization
 
@@ -1197,9 +1130,6 @@
     return;
   }
 
-  $zip->newStream( Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD, Append => 1, Name => "$_root_dir$dir/$_structure_dir/$_structure_file" )
-      or die "ERROR ('$_root_dir$dir/$_structure_dir/$_structure_file'): zip failed: $ZipError\n";
-
   $output = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<?xml-model href=\"span.rng\" type=\"application/xml\""
            ." schematypens=\"http://relaxng.org/ns/structure/1.0\"?>\n\n<layer docid=\""
            .decode_utf8($text_id_esc)."\" xmlns=\"http://ids-mannheim.de/ns/KorAP\" version=\"KorAP-0.4\">\n  <spanList>\n";
@@ -1262,7 +1192,8 @@
 
   $output = encode_utf8( $output );
 
-  $zip->print( "$output" );
+  $zipper->new_stream("$_root_dir$dir/$_structure_dir/$_structure_file")
+    ->print($output);
 
   #print STDERR "$0: write_structures(): DONE\n";
 
@@ -1281,9 +1212,6 @@
     return;
   }
 
-  $zip->newStream( Zip64 => 1, TextFlag => 1, Method => $_COMPRESSION_METHOD, Append => 1, Name => "$_root_dir$dir/$_tokens_dir/$_tokens_file" )
-      or die "ERROR ('$_root_dir$dir/$_tokens_dir/$_tokens_file'): zip failed: $ZipError\n";
-
   $output = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<?xml-model href=\"span.rng\" type=\"application/xml\""
            ." schematypens=\"http://relaxng.org/ns/structure/1.0\"?>\n\n<layer docid=\""
            .decode_utf8($text_id_esc)."\" xmlns=\"http://ids-mannheim.de/ns/KorAP\" version=\"KorAP-0.4\">\n  <spanList>\n";
@@ -1361,7 +1289,8 @@
 
   $output = encode_utf8( $output );
 
-  $zip->print( "$output" );
+  $zipper->new_stream("$_root_dir$dir/$_tokens_dir/$_tokens_file")
+    ->print($output);
 
   #print STDERR "$0: write_tokens(): DONE\n";