Added 'extract' method support

Change-Id: I624e79f3400b1935f9b96ceaac43553ed2f4c73c
diff --git a/Changes b/Changes
index 78b7966..f4f2d5e 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,8 @@
+0.12 2016-02-27
+        - Added extract method to korapxml2krill.
+	- Fixed Mate/Dependency.
+	- Fixed skip flag in korapxml2krill.
+
 0.11 2016-02-23
         - Merged korap2krill and korap2krill_dir.
 
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index a0a8cbe..18a12b1 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -18,7 +18,7 @@
 #       Due to the kind of processing, processed metadata may be stored in
 #       a multiprocess cache instead.
 
-our $VERSION = '0.11';
+our $VERSION = '0.12';
 
 our @ATTR = qw/text_sigle
 	       doc_sigle
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 03a8088..289c2f4 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -42,9 +42,12 @@
 #
 # 2016/02/23
 # - Merge korapxml2krill and korapxml2krill_dir
+#
+# 2016/02/27
+# - Added extract function
 # ----------------------------------------------------------
 
-our $LAST_CHANGE = '2016/02/23';
+our $LAST_CHANGE = '2016/02/27';
 our $LOCAL = $FindBin::Bin;
 our $VERSION_MSG = <<"VERSION";
 Version $KorAP::XML::Krill::VERSION - diewald\@ids-mannheim.de - $LAST_CHANGE
@@ -58,6 +61,8 @@
   $cmd = shift @ARGV;
 };
 
+my (@skip, @sigle);
+
 # Parse options from the command line
 GetOptions(
   'input|i=s'   => \(my $input),
@@ -66,7 +71,8 @@
   'human|m'     => \(my $text),
   'token|t=s'   => \(my $token_base),
   'gzip|z'      => \(my $gzip),
-  'skip|s=s'    => \(my @skip),
+  'skip|s=s'    => \@skip,
+  'sigle|sg=s'  => \@sigle,
   'log|l=s'     => \(my $log_level = 'ERROR'),
   'allow|a=s'   => \(my @allow),
   'primary|p!'  => \(my $primary),
@@ -142,6 +148,9 @@
 };
 
 
+# Convert sigle to path construct
+s!^\s*([^_]+?)_([^\.]+?)\.(.+?)\s*$!$1/$2/$3! foreach @sigle;
+
 # Process a single file
 unless ($cmd) {
 
@@ -292,9 +301,44 @@
   stop_time;
 }
 
+# Extract XML files
+elsif ($cmd eq 'extract') {
+
+  pod2usage(%ERROR_HASH) unless $output;
+
+  # TODO: Support sigles and full archives
+
+  if ($output && (!-e $output || !-d $output)) {
+    print "Directory '$output' does not exist.\n\n";
+    exit(0);
+  };
+
+  if (-f($input) && (my $archive = KorAP::XML::Archive->new($input))) {
+
+    unless ($archive->test_unzip) {
+      print "Unzip is not installed or incompatible.\n\n";
+      exit(1);
+    };
+
+    # Test will be skipped
+
+    # Iterate over all given sigles and extract
+    foreach (@sigle) {
+      print "$_ ";
+      print '' . ($archive->extract('./'. $_, $output) ? '' : 'not ');
+      print "extracted.\n";
+    };
+
+    print "\n";
+    exit(1);
+  };
+}
+
 # Process an archive
 elsif ($cmd eq 'archive') {
 
+  # TODO: Support sigles
+
   pod2usage(%ERROR_HASH) unless $output;
 
   if ($output && (!-e $output || !-d $output)) {
@@ -488,7 +532,11 @@
 
 =item B<archive>
 
-Process an archive as a Zip-File or a folder of KorAP-XML documents.
+Process an archive as a Zip-file or a folder of KorAP-XML documents.
+
+=item B<extract>
+
+Extract KorAP-XML files from a Zip-file.
 
 =back
 
@@ -552,6 +600,12 @@
 
 Compress the output (expects a defined output file in single processing).
 
+=item B<--sigle|-sg>
+
+Extract the given text sigles.
+Currently only supported on C<extract>.
+Can be set multiple times.
+
 =item B<--log|-l>
 
 The L<Log4perl> log level, defaults to C<ERROR>.