blob: 13cd23a576d1de24d33b4e0eed962278e7d017be [file] [log] [blame]
Marc Kupietz28f48e12024-04-14 16:17:05 +02001<xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
2 xmlns:saxon="http://saxon.sf.net/"
Marc Kupietza5538652024-04-21 15:49:30 +02003 xmlns:xs="http://www.w3.org/2001/XMLSchema"
4 xmlns:TextClassifier="java:de.ids_mannheim.TextClassifier"
5 exclude-result-prefixes="saxon xs TextClassifier">
Marc Kupietz28f48e12024-04-14 16:17:05 +02006
7 <xsl:output method="xml" indent="yes" saxon:line-length="1000"
8 doctype-public="-//IDS//DTD IDS-I5 1.0//EN"
9 doctype-system="http://corpora.ids-mannheim.de/I5/DTD/i5.dtd"
10 />
11
Marc Kupietza5538652024-04-21 15:49:30 +020012 <xsl:variable name="domainClassifier" select="TextClassifier:new('models/dereko_domains_s.classifier')"/>
13
Marc Kupietz28f48e12024-04-14 16:17:05 +020014 <xsl:mode on-no-match="shallow-copy"/>
15
Marc Kupietza5538652024-04-21 15:49:30 +020016 <xsl:template match="textClass">
17 <xsl:variable name="classification" select="tokenize(TextClassifier:topicDomainsFromText($domainClassifier, ../../../text), ';')"/>
18 <textClass>
19 <catRef n="{$classification[1]}" target="{$classification[2]}" scheme="topic"/>
20 <xsl:if test="xs:decimal($classification[3]) > 0.0000001">
21 <catRef n="{$classification[3]}" target="{$classification[4]}" scheme="topic"/>
22 </xsl:if>
23 </textClass>
24 </xsl:template>
25
Marc Kupietz164a2832024-04-14 21:00:48 +020026 <xsl:template match="p[not(normalize-space())]" priority="1.0"/>
Marc Kupietz28f48e12024-04-14 16:17:05 +020027
Marc Kupietz164a2832024-04-14 21:00:48 +020028 <xsl:template match="div[not(normalize-space())]" priority="1.0"/>
29
Marc Kupietza6d175b2025-02-20 16:58:23 +010030 <xsl:template match="p[descendant::div|descendant::p and not(ancestor::item)]" priority="0.9">
Marc Kupietz28f48e12024-04-14 16:17:05 +020031 <div type="section">
32 <xsl:apply-templates/>
33 </div>
34 </xsl:template>
35
Marc Kupietz164a2832024-04-14 21:00:48 +020036 <xsl:template match="(ref|emph|hi|text())[parent::div]" priority="0.9">
Marc Kupietz28f48e12024-04-14 16:17:05 +020037 <p>
38 <xsl:apply-templates/>
39 </p>
40 </xsl:template>
41
42 <xsl:template match="head[parent::p]">
43 <hi rend="bold">
44 <xsl:value-of select="."/>
45 </hi>
46 </xsl:template>
47
48 <xsl:template match="hi[parent::div]">
49 <p>
50 <xsl:apply-templates/>
51 </p>
52 </xsl:template>
53
Marc Kupietza6d175b2025-02-20 16:58:23 +010054 <xsl:template match="div[ancestor::item]">
55 <p>
56 <xsl:apply-templates/>
57 </p>
58 </xsl:template>
59
60 <xsl:template match="p[normalize-space(.) = '&#160;']"/>
Marc Kupietz8d293632024-04-15 06:54:26 +020061
Marc Kupietz28f48e12024-04-14 16:17:05 +020062</xsl:stylesheet>