Add genre classification based on metadata keywords
Addresses #16, but some keywords/genres are still missing
diff --git a/test/test-xml.sh b/test/test-xml.sh
index 3a77a95..622c554 100755
--- a/test/test-xml.sh
+++ b/test/test-xml.sh
@@ -31,6 +31,15 @@
observed=$(grep -Ec '^Copyright' target/dnb13.i5.xml)
assert_eq "$observed" "2" "spaces at <br> elements are inserted correctly"
+observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textType[contains(., 'Arztroman')])" target/dnb18.i5.xml)
+assert_gt "$observed" "0" "at least one textType contains 'Arztroman'"
+
+observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textType[normalize-space(.)=''])" target/dnb18.i5.xml)
+assert_eq "$observed" "0" "no empty textType elements"
+
+observed=$(xmlstarlet sel --net -t -v "count(/idsCorpus/idsDoc/idsText/idsHeader/profileDesc/textDesc/textTypeRef[normalize-space(.)=''])" target/dnb18.i5.xml)
+assert_eq "$observed" "0" "no empty textTypeRef elements"
+
exit_with_test_summary
diff --git a/xslt/epub2i5.xsl b/xslt/epub2i5.xsl
index 34ed090..277dfbf 100644
--- a/xslt/epub2i5.xsl
+++ b/xslt/epub2i5.xsl
@@ -83,6 +83,26 @@
<xsl:variable name="texttype" select="replace(($dnbBookdata//dc:subject[matches(., '^[A-Z] ')])[1], '^[A-Z] (.*)', '$1')"/>
+ <xsl:variable name="genretable">
+ <genres>
+ <genre keyRegex="krimi" genre="Roman: Kriminalroman"/>
+ <genre keyRegex="arztroman" genre="Roman: Arztroman"/>
+ <genre keyRegex="liebesroman" genre="Roman: Liebesroman"/>
+ <genre keyRegex="science.?fiction" genre="Roman: Science-Fiction-Roman"/>
+ <genre keyRegex="horror" genre="Roman: Horrorroman"/>
+ <genre keyRegex="western" genre="Roman: Westernroman"/>
+ <genre keyRegex="fantasy" genre="Roman: Fantasyroman"/>
+ <genre keyRegex="historischer roman" genre="Roman: Historischer Roman"/>
+ <genre keyRegex="erzählung" genre="Erzählung"/>
+ <genre keyRegex="novelle" genre="Novelle"/>
+ <genre keyRegex="anthologie" genre="Anthologie"/>
+ <genre keyRegex="kurzgeschichte" genre="Kurzgeschichte"/>
+ <genre keyRegex="roman" genre="Roman"/>
+ <genre keyRegex="." genre="Roman"/>
+ </genres>
+ </xsl:variable>
+
+ <xsl:variable name="textFullGenre" select="$genretable/genres/genre[matches($dnbBookdata, ./@keyRegex, 'i')][1]/@genre"/>
<xsl:variable name="verlag">
<xsl:choose>
<xsl:when test="contains(($dnbBookdata//dc:publisher)[1], ':')">
@@ -307,8 +327,8 @@
</creation>
<textClass/>
<textDesc>
- <textType><xsl:value-of select="$texttype"/></textType>
- <textTypeRef><xsl:value-of select="$texttype"/></textTypeRef>
+ <textType><xsl:value-of select="$textFullGenre"/></textType>
+ <textTypeRef><xsl:value-of select="replace($textFullGenre, '.*: *', '')"/></textTypeRef>
<textDomain/>
</textDesc>
</profileDesc>