Merge "Bugfix: intern tokenization"
diff --git a/t/script.t b/t/script.t
index eaf6348..85c2cea 100644
--- a/t/script.t
+++ b/t/script.t
@@ -2,7 +2,7 @@
use warnings;
use File::Basename 'dirname';
use File::Spec::Functions qw/catfile/;
-use File::Temp ':POSIX';
+use File::Temp qw/tempfile/;
use IO::Uncompress::Unzip qw(unzip $UnzipError);
use Test::More;
@@ -10,6 +10,11 @@
use Test::XML::Loy;
+our %ENV;
+# default: remove temp. file created by func. tempfile
+# to keep temp. files use e.g. 'KORAPXMLTEI_DONTUNLINK=1 prove -lr t/script.t'
+my $_UNLINK = $ENV{KORAPXMLTEI_DONTUNLINK}?0:1;
+
my $f = dirname(__FILE__);
my $script = catfile($f, '..', 'script', 'tei2korapxml');
ok(-f $script, 'Script found');
@@ -29,11 +34,24 @@
# Load example file
my $file = catfile($f, 'data', 'goe_sample.i5.xml');
-my $outzip = tmpnam();
+
+my ($fh, $outzip) = tempfile("KorAP-XML-TEI_script_XXXXXXXXXX", SUFFIX => ".tmp", TMPDIR => 1, UNLINK => $_UNLINK);
# Generate zip file (unportable!)
stderr_like(
sub { `cat '$file' | perl '$script' > '$outzip'` },
+# approaches for working with $fh (also better use OO interface then)
+# sub { open STDOUT, '>&', $fh; system("cat '$file' | perl '$script'") },
+# sub { open(my $pipe, "cat '$file' | perl '$script'|"); while(<$pipe>){$fh->print($_)}; $fh->close },
+# sub {
+# defined(my $pid = fork) or die "fork: $!";
+# if (!$pid) {
+# open STDOUT, '>&', $fh;
+# exec "cat '$file' | perl '$script'"
+# }
+# waitpid $pid, 0;
+# $fh->close;
+# },
qr!tei2korapxml: .*? text_id=GOE_AGA\.00000!,
'Processing'
);
@@ -45,6 +63,7 @@
ok($zip, 'Zip-File is created');
+# TODO: check wrong encoding in header-files (compare with input document)!
# Read GOE/header.xml
my $header_xml = '';
$header_xml .= $zip->getline while !$zip->eof;
@@ -110,6 +129,7 @@
# Read GOE/AGA/00000/struct/structure.xml
my $struct_xml = '';
$struct_xml .= $zip->getline while !$zip->eof;
+
ok($zip->close, 'Closed');
$t = Test::XML::Loy->new($struct_xml);
diff --git a/t/tei.t b/t/tei.t
index f9b5959..b602df6 100644
--- a/t/tei.t
+++ b/t/tei.t
@@ -8,9 +8,14 @@
unshift @INC, "$FindBin::Bin/../lib";
};
+our %ENV;
+# default: remove temp. file created by func. tempfile
+# to keep temp. files use e.g. 'KORAPXMLTEI_DONTUNLINK=1 prove -lr t/script.t'
+my $_UNLINK = $ENV{KORAPXMLTEI_DONTUNLINK}?0:1;
+
use_ok('KorAP::XML::TEI', 'remove_xml_comments');
-my ($fh, $filename) = tempfile();
+my ($fh, $filename) = tempfile("KorAP-XML-TEI_tei_XXXXXXXXXX", SUFFIX => ".tmp", TMPDIR => 1, UNLINK => $_UNLINK);
print $fh <<'HTML';
mehrzeiliger
diff --git a/t/tokenization-external.t b/t/tokenization-external.t
index e867aed..742c656 100644
--- a/t/tokenization-external.t
+++ b/t/tokenization-external.t
@@ -2,9 +2,7 @@
use warnings;
use Test::More;
use File::Basename 'dirname';
-use Data::Dumper;
use File::Spec::Functions qw/catfile/;
-use File::Temp 'tempfile';
use Test::XML::Loy;
use FindBin;
diff --git a/t/tokenization.t b/t/tokenization.t
index dfe05bb..d063eed 100644
--- a/t/tokenization.t
+++ b/t/tokenization.t
@@ -3,7 +3,6 @@
use Test::More;
use File::Basename 'dirname';
use File::Spec::Functions qw/catfile/;
-use File::Temp 'tempfile';
use FindBin;
BEGIN {
diff --git a/t/zipper.t b/t/zipper.t
index c9eabf0..6147f62 100644
--- a/t/zipper.t
+++ b/t/zipper.t
@@ -1,9 +1,8 @@
use strict;
use warnings;
use Test::More;
-use File::Basename 'dirname';
use File::Spec::Functions qw/catfile/;
-use File::Temp ':POSIX';
+use File::Temp qw/tempfile/;
use IO::Uncompress::Unzip;
use FindBin;
@@ -11,12 +10,18 @@
unshift @INC, "$FindBin::Bin/../lib";
};
+our %ENV;
+# default: remove temp. file created by func. tempfile
+# to keep temp. files use e.g. 'KORAPXMLTEI_DONTUNLINK=1 prove -lr t/script.t'
+my $_UNLINK = $ENV{KORAPXMLTEI_DONTUNLINK}?0:1;
+
require_ok('KorAP::XML::TEI::Zipper');
my $data;
-my $outzip = tmpnam();
+my ($fh, $outzip) = tempfile("KorAP-XML-TEI_zipper_XXXXXXXXXX", SUFFIX => ".tmp", TMPDIR => 1, UNLINK => $_UNLINK);
my $zip = KorAP::XML::TEI::Zipper->new($outzip);
+$fh->close;
ok($zip, 'Zipper initialized');