Added extraction method for documents in archives
Change-Id: Id4ea7d9801a5750c77f81a2251d389adb6e06d31
diff --git a/t/annotation/mdp_dependency.t b/t/annotation/mdp_dependency.t
index 30a8708..478bbc1 100644
--- a/t/annotation/mdp_dependency.t
+++ b/t/annotation/mdp_dependency.t
@@ -42,7 +42,7 @@
my $dir = tempdir();
my $f_path = 'WPD15/A00/00081';
-$archive->extract($f_path, $dir);
+$archive->extract_text($f_path, $dir);
ok(my $doc = KorAP::XML::Krill->new( path => $dir . '/' . $f_path));
diff --git a/t/archive.t b/t/archive.t
index 3d40549..0a17165 100644
--- a/t/archive.t
+++ b/t/archive.t
@@ -18,6 +18,8 @@
ok($archive->test, 'Test archive');
like($archive->path(0), qr/archive\.zip$/, 'Archive path');
+ok($archive->check_prefix, 'Archive has dot prefix');
+
my @list = $archive->list_texts;
is(scalar @list, 10, 'Found all tests');
is($list[0], './TEST/BSP/1', 'First document');
@@ -33,7 +35,7 @@
{
local $SIG{__WARN__} = sub {};
- ok($archive->extract('./TEST/BSP/8', $dir), 'Wrong path');
+ ok($archive->extract_text('./TEST/BSP/8', $dir), 'Wrong path');
};
ok(-d catdir($dir, 'TEST'), 'Test corpus directory exists');
@@ -41,6 +43,10 @@
ok(-d catdir($dir, 'TEST', 'BSP'), 'Test doc directory exists');
ok(-f catdir($dir, 'TEST', 'BSP', 'header.xml'), 'Test doc header exists');
+$file = catfile(dirname(__FILE__), 'corpus','archive_rei.zip');
+$archive = KorAP::XML::Archive->new($file);
+ok(!$archive->check_prefix, 'Archive has no prefix');
+
# TODO: Test attaching!
diff --git a/t/corpus/archive_rei.zip b/t/corpus/archive_rei.zip
new file mode 100644
index 0000000..8a00aaa
--- /dev/null
+++ b/t/corpus/archive_rei.zip
Binary files differ
diff --git a/t/multiple_archives.t b/t/multiple_archives.t
index 7865101..1da4e9d 100644
--- a/t/multiple_archives.t
+++ b/t/multiple_archives.t
@@ -60,7 +60,7 @@
my $dir = tempdir(CLEANUP => 1);
{
local $SIG{__WARN__} = sub {};
- ok($archive->extract($list[0], $dir), 'Wrong path');
+ ok($archive->extract_text($list[0], $dir), 'Wrong path');
};
ok(-d catdir($dir, 'WPD15'), 'Test corpus directory exists');
diff --git a/t/script/extract.t b/t/script/extract.t
index 0f8f0b7..6c689f9 100644
--- a/t/script/extract.t
+++ b/t/script/extract.t
@@ -104,6 +104,42 @@
ok(-d catdir($output2, 'TEST', 'BSP', '4'), 'Directory created');
ok(!-d catdir($output2, 'TEST', 'BSP', '5'), 'Directory created');
+
+# Test with document sigle
+my $input_rei = catdir($f, '..', 'corpus', 'archive_rei.zip');
+ok(-f $input_rei, 'Input archive found');
+
+$call = join(
+ ' ',
+ 'perl', $script,
+ 'extract',
+ '--input' => $input_rei,
+ '--output' => $output2,
+ '-sg' => 'REI/BNG'
+);
+
+# Test with sigle
+stdout_like(
+ sub {
+ system($call);
+ },
+ qr!REI/BNG extracted!s,
+ $call
+);
+
+# Test with sigle
+stdout_unlike(
+ sub {
+ system($call);
+ },
+ qr!REI/RBR extracted!s,
+ $call
+);
+
+ok(-d catdir($output2, 'REI', 'BNG', '00071'), 'Directory created');
+ok(-d catdir($output2, 'REI', 'BNG', '00128'), 'Directory created');
+ok(!-d catdir($output2, 'REI', 'RBR', '00610'), 'Directory not created');
+
# Check multiple archives
$output = tempdir(CLEANUP => 1);
ok(-d $output, 'Output directory exists');