Improve i5 template testing
Change-Id: I1bfa22acf3ff9173d26edd9c0edaefe4c7fe3208
diff --git a/t/data/template.i5.xml b/t/data/template.i5.xml
deleted file mode 100644
index 9e0b26d..0000000
--- a/t/data/template.i5.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-I5 1.0//EN" "http://corpora.ids-mannheim.de/I5/DTD/i5.dtd">
-<idsCorpus>
- <idsHeader type="corpus">
- <fileDesc>
- <titleStmt>
- <korpusSigle>[KORPUSSIGLE]</korpusSigle>
- </titleStmt>
- </fileDesc>
- </idsHeader>
- <idsDoc version="1.0">
- <idsHeader type="document">
- <fileDesc>
- <titleStmt>
- <dokumentSigle>[DOKUMENTSIGLE]</dokumentSigle>
- </titleStmt>
- </fileDesc>
- </idsHeader>
- <idsText version="1.0">
- <idsHeader type="text">
- <fileDesc>
- <titleStmt>
- <textSigle>[TEXTSIGLE]</textSigle>
- </titleStmt>
- </fileDesc>
- </idsHeader>
- <text>
- [TEXT]
- </text>
- </idsText>
- </idsDoc>
-</idsCorpus>
diff --git a/t/script.t b/t/script.t
index 6ceadd9..742b417 100644
--- a/t/script.t
+++ b/t/script.t
@@ -14,7 +14,7 @@
unshift @INC, "$FindBin::Bin/../lib";
};
-use Test::KorAP::XML::TEI qw!korap_tempfile!;
+use Test::KorAP::XML::TEI qw!korap_tempfile i5_template!;
my $f = dirname(__FILE__);
my $script = catfile($f, '..', 'script', 'tei2korapxml');
@@ -344,23 +344,16 @@
subtest 'Test utf-8 handling' => sub {
-
- # Load template file
- $file = catfile($f, 'data', 'template.i5.xml');
- my $tpl = '';
- {
- open($fh, $file);
- $tpl .= <$fh> while !eof($fh);
- close($fh);
- }
-
# Introduce invalid utf-8 characters
my $text_sigle;
- { no warnings;
- # $text_sigle printed to file, without encoding: Aþ¿¿¿¿¿A_Bþ¿¿¿¿¿B.Cþ¿¿¿¿¿C
- # the utf8-sequence 'þ¿¿¿¿¿' encodes 32 bit of data (see 0x7FFF_FFFF in perlunicode)
- $text_sigle = "A\x{FFFF_FFFF}A_B\x{FFFF_FFFF}B.C\x{FFFF_FFFF}C" }
- # If CHECK is 0, encoding and decoding replace any malformed character with a substitution character.
+ {
+ no warnings;
+ # $text_sigle printed to file, without encoding: Aþ¿¿¿¿¿A_Bþ¿¿¿¿¿B.Cþ¿¿¿¿¿C
+ # the utf8-sequence 'þ¿¿¿¿¿' encodes 32 bit of data (see 0x7FFF_FFFF in perlunicode)
+ $text_sigle = "A\x{FFFF_FFFF}A_B\x{FFFF_FFFF}B.C\x{FFFF_FFFF}C"
+ }
+ # If CHECK is 0, encoding and decoding replace any malformed character
+ # with a substitution character.
# � = substitution character
my $text_sigle_lax = encode_utf8($text_sigle);
my $text_sigle_esc = encode('UTF-8', $text_sigle);
@@ -369,11 +362,17 @@
is(length($text_sigle_lax), 29); # Aþ¿¿¿¿¿A_Bþ¿¿¿¿¿B.Cþ¿¿¿¿¿C (byte string)
is(length($text_sigle_esc), 17); # A�A_B�B.C�C (byte string => length(�) = 3)
- { no warnings;
- $tpl =~ s!\[KORPUSSIGLE\]!A\x{FFFF_FFFF}A!;
- $tpl =~ s!\[DOKUMENTSIGLE\]!A\x{FFFF_FFFF}A_B\x{FFFF_FFFF}B!;
- $tpl =~ s!\[TEXT\]!<p>d\x{FFFF_FFFF}d e\x{FFFF_FFFF}e f\x{FFFF_FFFF}f</p>! }
- $tpl =~ s!\[TEXTSIGLE\]!$text_sigle!;
+
+ my $tpl;
+ {
+ no warnings;
+ $tpl = i5_template(
+ korpusSigle => "A\x{FFFF_FFFF}A",
+ dokumentSigle => "A\x{FFFF_FFFF}A_B\x{FFFF_FFFF}B",
+ text => "<p>d\x{FFFF_FFFF}d e\x{FFFF_FFFF}e f\x{FFFF_FFFF}f</p>",
+ textSigle => $text_sigle
+ );
+ };
my ($fh, $tplfile) = korap_tempfile('script_out4');
binmode($fh);
@@ -382,7 +381,8 @@
my (undef, $outzip) = korap_tempfile('script_out5');
- binmode STDERR, qw{ :encoding(UTF-8) }; # because output 'textid=...' goes to STDERR (see script/tei2korapxml)
+ # because output 'textid=...' goes to STDERR (see script/tei2korapxml)
+ binmode STDERR, qw{ :encoding(UTF-8) };
stderr_like(
sub { `cat '$tplfile' | perl '$script' -ti > '$outzip'` },
diff --git a/t/test.t b/t/test.t
index 5b6fbe1..de0c65e 100644
--- a/t/test.t
+++ b/t/test.t
@@ -1,26 +1,53 @@
use strict;
use warnings;
use Test::More;
+use Test::XML::Loy;
use FindBin;
BEGIN {
unshift @INC, "$FindBin::Bin/../lib";
};
-use_ok('Test::KorAP::XML::TEI','korap_tempfile');
+use_ok('Test::KorAP::XML::TEI','korap_tempfile', 'i5_template');
-my ($fh, $filename) = korap_tempfile('test');
-ok($fh, 'Filehandle created');
-ok($filename, 'Filename returned');
-close($fh);
+subtest 'korap_tempfile' => sub {
+ my ($fh, $filename) = korap_tempfile('test');
+ ok($fh, 'Filehandle created');
+ ok($filename, 'Filename returned');
+ close($fh);
-like($filename, qr!KorAP-XML-TEI_test_.+?\.tmp$!, 'Filename pattern');
+ like($filename, qr!KorAP-XML-TEI_test_.+?\.tmp$!, 'Filename pattern');
-($fh, $filename) = korap_tempfile();
-ok($fh, 'Filehandle created');
-ok($filename, 'Filename returned');
-close($fh);
+ ($fh, $filename) = korap_tempfile();
+ ok($fh, 'Filehandle created');
+ ok($filename, 'Filename returned');
+ close($fh);
-like($filename, qr!KorAP-XML-TEI_.+?\.tmp$!, 'Filename pattern');
+ like($filename, qr!KorAP-XML-TEI_.+?\.tmp$!, 'Filename pattern');
+};
+
+subtest 'i5_template' => sub {
+ my $tpl = i5_template();
+ my $t = Test::XML::Loy->new($tpl);
+ $t->text_is('korpusSigle', 'AAA')
+ ->text_is('dokumentSigle', 'AAA/BBB')
+ ->text_is('textSigle', 'AAA/BBB.00000')
+ ->text_like('text', qr!Lorem ipsum!)
+ ;
+
+ $tpl = i5_template(
+ korpusSigle => 'BBB',
+ dokumentSigle => 'BBB/CCC',
+ textSigle => 'BBB/CCC.11111',
+ text => 'Ein Versuch'
+ );
+ $t = Test::XML::Loy->new($tpl);
+ $t->text_is('korpusSigle', 'BBB')
+ ->text_is('dokumentSigle', 'BBB/CCC')
+ ->text_is('textSigle', 'BBB/CCC.11111')
+ ->text_unlike('text', qr!Lorem ipsum!)
+ ->text_like('text', qr!Ein Versuch!)
+ ;
+};
done_testing;