Added test for sigles support in extract

Change-Id: I5f5596a88da6314f0d1f7e8299fef7425f89a52f
diff --git a/Changes b/Changes
index 499883c..401602e 100644
--- a/Changes
+++ b/Changes
@@ -12,6 +12,7 @@
         - Fixed setting multiple annotations in
           script.
         - Fixed output of version and help messages.
+        - Added extraction test.
 
 0.17 2016-03-22
         - Rewrite siglen to use slashes as separators.
diff --git a/MANIFEST b/MANIFEST
index 2496911..856b8f2 100755
--- a/MANIFEST
+++ b/MANIFEST
@@ -99,6 +99,7 @@
 t/sgbr/token.t
 t/script/single.t
 t/script/usage.t
+t/script/extract.t
 t/corpus/archive.zip
 t/corpus/BZK/header.xml
 t/corpus/GOE/header.xml
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 250c68d..4539f5d 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -220,20 +220,21 @@
 my $cache = Cache::FastMmap->new(
   share_file => $cache_file,
   cache_size => $cache_size,
-  init_file => $cache_init
+  init_file  => $cache_init
 );
 
+# Create batch object
 my $batch_file = KorAP::XML::Batch::File->new(
-  cache => $cache,
+  cache     => $cache,
   meta_type => $meta,
   overwrite => $overwrite,
-  foundry => $token_base_foundry,
-  layer => $token_base_layer,
-  gzip => $gzip,
-  log => $log,
-  primary => $primary,
-  pretty => $pretty,
-  anno => \@filtered_anno
+  foundry   => $token_base_foundry,
+  layer     => $token_base_layer,
+  gzip      => $gzip,
+  log       => $log,
+  primary   => $primary,
+  pretty    => $pretty,
+  anno      => \@filtered_anno
 );
 
 
@@ -313,13 +314,10 @@
 # Extract XML files
 elsif ($cmd eq 'extract') {
 
-warn '!!!!!!!!!!!!!------------> ';
-
-if ($output && (!-e $output || !-d $output)) {
-  print "Directory '$output' does not exist.\n\n";
-  exit(0);
-};
-
+  if ($output && (!-e $output || !-d $output)) {
+    print "Directory '$output' does not exist.\n\n";
+    exit(0);
+  };
 
   # TODO: Support sigles and full archives
 
@@ -333,9 +331,24 @@
     # Add further annotation archived
     $archive->attach($_) foreach @input;
 
+    # No sigles given
+    unless (@sigle) {
+
+      # Get files
+      foreach ($archive->list_texts) {
+
+        # Split path information
+        my ($prefix, $corpus, $doc, $text) = $archive->split_path($_);
+
+        # TODO: Make this OS independent
+        push @sigle, join '/', $corpus, $doc, $text;
+      };
+    };
+
     # Iterate over all given sigles and extract
     foreach (@sigle) {
       print "$_ ";
+      # TODO: Make this OS independent
       print '' . ($archive->extract('./' . $_, $output) ? '' : 'not ');
       print "extracted.\n";
     };
diff --git a/t/script/extract.t b/t/script/extract.t
new file mode 100644
index 0000000..4429a08
--- /dev/null
+++ b/t/script/extract.t
@@ -0,0 +1,113 @@
+#/usr/bin/env perl
+use strict;
+use warnings;
+use File::Basename 'dirname';
+use File::Spec::Functions qw/catdir catfile/;
+use File::Temp qw/tempdir/;
+use Mojo::Util qw/slurp/;
+use Mojo::JSON qw/decode_json/;
+use IO::Uncompress::Gunzip;
+use Test::More;
+use Test::Output;
+use Data::Dumper;
+use utf8;
+
+my $f = dirname(__FILE__);
+my $script = catfile($f, '..', '..', 'script', 'korapxml2krill');
+
+my $call = join(
+  ' ',
+  'perl', $script,
+  'extract'
+);
+
+# Test without parameters
+stdout_like(
+  sub {
+    system($call);
+  },
+  qr!extract.+?Extract KorAP-XML files!s,
+  $call
+);
+
+my $input = catfile($f, '..', 'corpus', 'archive.zip');
+ok(-f $input, 'Input archive found');
+
+my $output = tempdir(CLEANUP => 1);
+ok(-d $output, 'Output directory exists');
+
+$call = join(
+  ' ',
+  'perl', $script,
+  'extract',
+  '--input' => $input,
+  '--output' => $output,
+);
+
+# Test without compression
+stdout_like(
+  sub {
+    system($call);
+  },
+  qr!TEST/BSP/1 extracted.!s,
+  $call
+);
+
+ok(-d catdir($output, 'TEST', 'BSP', '1'), 'Directory created');
+ok(-d catdir($output, 'TEST', 'BSP', '1', 'base'), 'Directory created');
+ok(-d catdir($output, 'TEST', 'BSP', '1', 'sgbr'), 'Directory created');
+ok(-d catdir($output, 'TEST', 'BSP', '1', 'struct'), 'Directory created');
+ok(-f catfile($output, 'TEST', 'BSP', '1', 'data.xml'), 'File created');
+ok(-f catfile($output, 'TEST', 'BSP', '1', 'header.xml'), 'File created');
+ok(-d catdir($output, 'TEST', 'BSP', '2'), 'Directory created');
+ok(-d catdir($output, 'TEST', 'BSP', '3'), 'Directory created');
+
+# Check sigles
+my $output2 = tempdir(CLEANUP => 1);
+ok(-d $output2, 'Output directory exists');
+
+$call = join(
+  ' ',
+  'perl', $script,
+  'extract',
+  '--input' => $input,
+  '--output' => $output2,
+  '-sg' => 'TEST/BSP/4'
+);
+
+# Test with sigle
+stdout_like(
+  sub {
+    system($call);
+  },
+  qr!TEST/BSP/4 extracted.!s,
+  $call
+);
+
+# Test with sigle
+stdout_unlike(
+  sub {
+    system($call);
+  },
+  qr!TEST/BSP/5 extracted.!s,
+  $call
+);
+
+ok(!-d catdir($output2, 'TEST', 'BSP', '1'), 'Directory created');
+ok(!-d catdir($output2, 'TEST', 'BSP', '2'), 'Directory created');
+ok(!-d catdir($output2, 'TEST', 'BSP', '3'), 'Directory created');
+ok(-d catdir($output2, 'TEST', 'BSP', '4'), 'Directory created');
+ok(!-d catdir($output2, 'TEST', 'BSP', '5'), 'Directory created');
+
+
+done_testing;
+__END__
+
+
+
+
+
+# Test sigle!
+# Test multiple archives
+
+
diff --git a/t/script/single.t b/t/script/single.t
index a05e5e9..9d8d28f 100644
--- a/t/script/single.t
+++ b/t/script/single.t
@@ -203,8 +203,6 @@
 is($json->{keywords}, 'sgbrKodex:T', 'keywords');
 is($json->{publisher}, 'Dorfblatt GmbH', 'publisher');
 
-# Test sigle!
-
 done_testing;
 __END__