change utf8_encode and utf8_decode
ensure strictly valid UTF-8 output by using utf-8-strict instead of utf8
(see in Encode: 'encode_utf8' and 'UTF-8 vs. utf8 vs. UTF8'
and in perlunifaq: What's the difference between "UTF-8" and "utf8"?)
Change-Id: I6d8797ddd24339ecf2ab4ccacad3801a6a054ca2
diff --git a/t/data/template.i5.xml b/t/data/template.i5.xml
new file mode 100644
index 0000000..9e0b26d
--- /dev/null
+++ b/t/data/template.i5.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE idsCorpus PUBLIC "-//IDS//DTD IDS-I5 1.0//EN" "http://corpora.ids-mannheim.de/I5/DTD/i5.dtd">
+<idsCorpus>
+ <idsHeader type="corpus">
+ <fileDesc>
+ <titleStmt>
+ <korpusSigle>[KORPUSSIGLE]</korpusSigle>
+ </titleStmt>
+ </fileDesc>
+ </idsHeader>
+ <idsDoc version="1.0">
+ <idsHeader type="document">
+ <fileDesc>
+ <titleStmt>
+ <dokumentSigle>[DOKUMENTSIGLE]</dokumentSigle>
+ </titleStmt>
+ </fileDesc>
+ </idsHeader>
+ <idsText version="1.0">
+ <idsHeader type="text">
+ <fileDesc>
+ <titleStmt>
+ <textSigle>[TEXTSIGLE]</textSigle>
+ </titleStmt>
+ </fileDesc>
+ </idsHeader>
+ <text>
+ [TEXT]
+ </text>
+ </idsText>
+ </idsDoc>
+</idsCorpus>