Improve i5 template testing

Change-Id: I1bfa22acf3ff9173d26edd9c0edaefe4c7fe3208
diff --git a/lib/Test/KorAP/XML/TEI.pm b/lib/Test/KorAP/XML/TEI.pm
index e94c8bb..f43843e 100644
--- a/lib/Test/KorAP/XML/TEI.pm
+++ b/lib/Test/KorAP/XML/TEI.pm
@@ -4,7 +4,12 @@
 use File::Temp qw/tempfile/;
 use Exporter 'import';
 
-our @EXPORT_OK = qw(korap_tempfile);
+our @EXPORT_OK = qw(korap_tempfile i5_template);
+
+our $data;
+unless ($data) {
+  $data .= <DATA> while !eof(DATA);
+};
 
 # Create a temporary file and file handle
 # That will stay intact, if KORAPXMLTEI_DONTUNLINK is set to true.
@@ -22,4 +27,69 @@
   )
 };
 
+
+# Return basic i5 document with replacable parts.
+# Supports:
+# - korpusSigle
+# - dokumentSigle
+# - textSigle
+# - text
+sub i5_template {
+  my %replace = @_;
+  my $tpl = $data;
+
+  foreach my $key (keys %replace) {
+    $tpl =~ s!<% $key %>!$replace{$key}!ge;
+  };
+
+  for ($tpl) {
+    s!<% korpusSigle %>!AAA!g;
+    s!<% dokumentSigle %>!AAA/BBB!g;
+    s!<% textSigle %>!AAA/BBB.00000!g;
+    s!<% text %>!Lorem ipsum!g;
+  };
+
+  return $tpl;
+};
+
+
 1;
+
+
+__DATA__
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE idsCorpus
+  PUBLIC
+  "-//IDS//DTD IDS-I5 1.0//EN"
+  "http://corpora.ids-mannheim.de/I5/DTD/i5.dtd">
+<idsCorpus>
+  <idsHeader type="corpus">
+    <fileDesc>
+      <titleStmt>
+        <korpusSigle><% korpusSigle %></korpusSigle>
+      </titleStmt>
+    </fileDesc>
+  </idsHeader>
+  <idsDoc version="1.0">
+    <idsHeader type="document">
+      <fileDesc>
+        <titleStmt>
+          <dokumentSigle><% dokumentSigle %></dokumentSigle>
+        </titleStmt>
+      </fileDesc>
+    </idsHeader>
+    <idsText version="1.0">
+      <idsHeader type="text">
+        <fileDesc>
+          <titleStmt>
+            <textSigle><% textSigle %></textSigle>
+          </titleStmt>
+        </fileDesc>
+      </idsHeader>
+      <text>
+        <% text %>
+      </text>
+    </idsText>
+  </idsDoc>
+</idsCorpus>
+__END__
diff --git a/t/data/template.i5.xml b/t/data/template.i5.xml
deleted file mode 100644
index 9e0b26d..0000000
--- a/t/data/template.i5.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE idsCorpus  PUBLIC "-//IDS//DTD IDS-I5 1.0//EN" "http://corpora.ids-mannheim.de/I5/DTD/i5.dtd">
-<idsCorpus>
-  <idsHeader type="corpus">
-    <fileDesc>
-      <titleStmt>
-        <korpusSigle>[KORPUSSIGLE]</korpusSigle>
-      </titleStmt>
-    </fileDesc>
-  </idsHeader>
-  <idsDoc version="1.0">
-    <idsHeader type="document">
-      <fileDesc>
-        <titleStmt>
-          <dokumentSigle>[DOKUMENTSIGLE]</dokumentSigle>
-        </titleStmt>
-      </fileDesc>
-    </idsHeader>
-    <idsText version="1.0">
-      <idsHeader type="text">
-        <fileDesc>
-          <titleStmt>
-            <textSigle>[TEXTSIGLE]</textSigle>
-          </titleStmt>
-        </fileDesc>
-      </idsHeader>
-      <text>
-        [TEXT]
-      </text>
-    </idsText>
-  </idsDoc>
-</idsCorpus>
diff --git a/t/script.t b/t/script.t
index 6ceadd9..742b417 100644
--- a/t/script.t
+++ b/t/script.t
@@ -14,7 +14,7 @@
   unshift @INC, "$FindBin::Bin/../lib";
 };
 
-use Test::KorAP::XML::TEI qw!korap_tempfile!;
+use Test::KorAP::XML::TEI qw!korap_tempfile i5_template!;
 
 my $f = dirname(__FILE__);
 my $script = catfile($f, '..', 'script', 'tei2korapxml');
@@ -344,23 +344,16 @@
 
 
 subtest 'Test utf-8 handling' => sub {
-
-  # Load template file
-  $file = catfile($f, 'data', 'template.i5.xml');
-  my $tpl = '';
-  {
-    open($fh, $file);
-    $tpl .= <$fh> while !eof($fh);
-    close($fh);
-  }
-
   # Introduce invalid utf-8 characters
   my $text_sigle;
-  { no warnings;
-  # $text_sigle printed to file, without encoding: Aþƒ¿¿¿¿¿A_Bþƒ¿¿¿¿¿B.Cþƒ¿¿¿¿¿C
-  # the utf8-sequence 'þƒ¿¿¿¿¿' encodes 32 bit of data (see 0x7FFF_FFFF in perlunicode)
-  $text_sigle = "A\x{FFFF_FFFF}A_B\x{FFFF_FFFF}B.C\x{FFFF_FFFF}C" }
-  # If CHECK is 0, encoding and decoding replace any malformed character with a substitution character.
+  {
+    no warnings;
+    # $text_sigle printed to file, without encoding: Aþƒ¿¿¿¿¿A_Bþƒ¿¿¿¿¿B.Cþƒ¿¿¿¿¿C
+    # the utf8-sequence 'þƒ¿¿¿¿¿' encodes 32 bit of data (see 0x7FFF_FFFF in perlunicode)
+    $text_sigle = "A\x{FFFF_FFFF}A_B\x{FFFF_FFFF}B.C\x{FFFF_FFFF}C"
+  }
+  # If CHECK is 0, encoding and decoding replace any malformed character
+  # with a substitution character.
   # � = substitution character
   my $text_sigle_lax = encode_utf8($text_sigle);
   my $text_sigle_esc = encode('UTF-8', $text_sigle);
@@ -369,11 +362,17 @@
   is(length($text_sigle_lax), 29); # Aþƒ¿¿¿¿¿A_Bþƒ¿¿¿¿¿B.Cþƒ¿¿¿¿¿C (byte string)
   is(length($text_sigle_esc), 17); # A�A_B�B.C�C (byte string => length(�) = 3)
 
-  { no warnings;
-  $tpl =~ s!\[KORPUSSIGLE\]!A\x{FFFF_FFFF}A!;
-  $tpl =~ s!\[DOKUMENTSIGLE\]!A\x{FFFF_FFFF}A_B\x{FFFF_FFFF}B!;
-  $tpl =~ s!\[TEXT\]!<p>d\x{FFFF_FFFF}d e\x{FFFF_FFFF}e f\x{FFFF_FFFF}f</p>! }
-  $tpl =~ s!\[TEXTSIGLE\]!$text_sigle!;
+
+  my $tpl;
+  {
+    no warnings;
+    $tpl = i5_template(
+      korpusSigle => "A\x{FFFF_FFFF}A",
+      dokumentSigle => "A\x{FFFF_FFFF}A_B\x{FFFF_FFFF}B",
+      text => "<p>d\x{FFFF_FFFF}d e\x{FFFF_FFFF}e f\x{FFFF_FFFF}f</p>",
+      textSigle => $text_sigle
+    );
+  };
 
   my ($fh, $tplfile) = korap_tempfile('script_out4');
   binmode($fh);
@@ -382,7 +381,8 @@
 
   my (undef, $outzip) = korap_tempfile('script_out5');
 
-  binmode STDERR, qw{ :encoding(UTF-8) }; # because output 'textid=...' goes to STDERR (see script/tei2korapxml)
+  # because output 'textid=...' goes to STDERR (see script/tei2korapxml)
+  binmode STDERR, qw{ :encoding(UTF-8) };
 
   stderr_like(
     sub { `cat '$tplfile' | perl '$script' -ti > '$outzip'` },
diff --git a/t/test.t b/t/test.t
index 5b6fbe1..de0c65e 100644
--- a/t/test.t
+++ b/t/test.t
@@ -1,26 +1,53 @@
 use strict;
 use warnings;
 use Test::More;
+use Test::XML::Loy;
 
 use FindBin;
 BEGIN {
   unshift @INC, "$FindBin::Bin/../lib";
 };
 
-use_ok('Test::KorAP::XML::TEI','korap_tempfile');
+use_ok('Test::KorAP::XML::TEI','korap_tempfile', 'i5_template');
 
-my ($fh, $filename) = korap_tempfile('test');
-ok($fh, 'Filehandle created');
-ok($filename, 'Filename returned');
-close($fh);
+subtest 'korap_tempfile' => sub {
+  my ($fh, $filename) = korap_tempfile('test');
+  ok($fh, 'Filehandle created');
+  ok($filename, 'Filename returned');
+  close($fh);
 
-like($filename, qr!KorAP-XML-TEI_test_.+?\.tmp$!, 'Filename pattern');
+  like($filename, qr!KorAP-XML-TEI_test_.+?\.tmp$!, 'Filename pattern');
 
-($fh, $filename) = korap_tempfile();
-ok($fh, 'Filehandle created');
-ok($filename, 'Filename returned');
-close($fh);
+  ($fh, $filename) = korap_tempfile();
+  ok($fh, 'Filehandle created');
+  ok($filename, 'Filename returned');
+  close($fh);
 
-like($filename, qr!KorAP-XML-TEI_.+?\.tmp$!, 'Filename pattern');
+  like($filename, qr!KorAP-XML-TEI_.+?\.tmp$!, 'Filename pattern');
+};
+
+subtest 'i5_template' => sub {
+  my $tpl = i5_template();
+  my $t = Test::XML::Loy->new($tpl);
+  $t->text_is('korpusSigle', 'AAA')
+    ->text_is('dokumentSigle', 'AAA/BBB')
+    ->text_is('textSigle', 'AAA/BBB.00000')
+    ->text_like('text', qr!Lorem ipsum!)
+    ;
+
+  $tpl = i5_template(
+    korpusSigle => 'BBB',
+    dokumentSigle => 'BBB/CCC',
+    textSigle => 'BBB/CCC.11111',
+    text => 'Ein Versuch'
+  );
+  $t = Test::XML::Loy->new($tpl);
+  $t->text_is('korpusSigle', 'BBB')
+    ->text_is('dokumentSigle', 'BBB/CCC')
+    ->text_is('textSigle', 'BBB/CCC.11111')
+    ->text_unlike('text', qr!Lorem ipsum!)
+    ->text_like('text', qr!Ein Versuch!)
+    ;
+};
 
 done_testing;