blob: a919fc67d51eded09e2cb40fc3e782d516167ee3 [file] [log] [blame]
Marc Kupietz28f48e12024-04-14 16:17:05 +02001<xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
2 xmlns:saxon="http://saxon.sf.net/"
Marc Kupietza5538652024-04-21 15:49:30 +02003 xmlns:xs="http://www.w3.org/2001/XMLSchema"
4 xmlns:TextClassifier="java:de.ids_mannheim.TextClassifier"
5 exclude-result-prefixes="saxon xs TextClassifier">
Marc Kupietz28f48e12024-04-14 16:17:05 +02006
7 <xsl:output method="xml" indent="yes" saxon:line-length="1000"
8 doctype-public="-//IDS//DTD IDS-I5 1.0//EN"
9 doctype-system="http://corpora.ids-mannheim.de/I5/DTD/i5.dtd"
10 />
11
Marc Kupietza5538652024-04-21 15:49:30 +020012 <xsl:variable name="domainClassifier" select="TextClassifier:new('models/dereko_domains_s.classifier')"/>
13
Marc Kupietz764658b2025-03-28 16:20:55 +010014 <xsl:mode streamable="yes" on-no-match="shallow-copy"/>
15 <xsl:mode streamable="no" on-no-match="shallow-copy" name="no-Streaming"/>
Marc Kupietz28f48e12024-04-14 16:17:05 +020016
Marc Kupietz764658b2025-03-28 16:20:55 +010017 <xsl:template match="idsText">
18 <xsl:variable name="idsText" as="node()">
19 <xsl:copy-of select="."/>
20 </xsl:variable>
21 <xsl:apply-templates select="$idsText" mode="no-Streaming"/>
22 </xsl:template>
23
24 <xsl:template match="textClass" mode="no-Streaming">
Marc Kupietza5538652024-04-21 15:49:30 +020025 <textClass>
Marc Kupietz764658b2025-03-28 16:20:55 +010026 <xsl:text>&#xa;</xsl:text>
27 <xsl:value-of disable-output-escaping="yes"
28 select="TextClassifier:insertCatRefs($domainClassifier, 'topic', ../../../text, 0.0001)"/>
29 <xsl:apply-templates mode="no-Streaming"/>
Marc Kupietza5538652024-04-21 15:49:30 +020030 </textClass>
31 </xsl:template>
32
Marc Kupietz764658b2025-03-28 16:20:55 +010033 <xsl:template match="p[not(normalize-space())]" priority="1.0" mode="no-Streaming"/>
Marc Kupietz28f48e12024-04-14 16:17:05 +020034
Marc Kupietz764658b2025-03-28 16:20:55 +010035 <xsl:template match="div[not(normalize-space())]" priority="1.0" mode="no-Streaming"/>
Marc Kupietz164a2832024-04-14 21:00:48 +020036
Marc Kupietz764658b2025-03-28 16:20:55 +010037 <xsl:template match="p[descendant::div|descendant::p and not(ancestor::item)]" priority="0.9" mode="no-Streaming">
Marc Kupietz28f48e12024-04-14 16:17:05 +020038 <div type="section">
Marc Kupietz764658b2025-03-28 16:20:55 +010039 <xsl:apply-templates mode="no-Streaming"/>
Marc Kupietz28f48e12024-04-14 16:17:05 +020040 </div>
41 </xsl:template>
42
Marc Kupietz764658b2025-03-28 16:20:55 +010043 <xsl:template match="(ref|emph|hi|text())[parent::div]" priority="0.9" mode="no-Streaming">
Marc Kupietz28f48e12024-04-14 16:17:05 +020044 <p>
Marc Kupietz764658b2025-03-28 16:20:55 +010045 <xsl:apply-templates mode="no-Streaming"/>
Marc Kupietz28f48e12024-04-14 16:17:05 +020046 </p>
47 </xsl:template>
48
Marc Kupietz764658b2025-03-28 16:20:55 +010049 <xsl:template match="head[parent::p]" mode="no-Streaming">
Marc Kupietz28f48e12024-04-14 16:17:05 +020050 <hi rend="bold">
51 <xsl:value-of select="."/>
52 </hi>
53 </xsl:template>
54
Marc Kupietz764658b2025-03-28 16:20:55 +010055 <xsl:template match="hi[parent::div]" mode="no-Streaming">
Marc Kupietz28f48e12024-04-14 16:17:05 +020056 <p>
Marc Kupietz764658b2025-03-28 16:20:55 +010057 <xsl:apply-templates mode="no-Streaming"/>
Marc Kupietz28f48e12024-04-14 16:17:05 +020058 </p>
59 </xsl:template>
60
Marc Kupietz764658b2025-03-28 16:20:55 +010061 <xsl:template match="div[ancestor::item]" mode="no-Streaming">
Marc Kupietza6d175b2025-02-20 16:58:23 +010062 <p>
Marc Kupietz764658b2025-03-28 16:20:55 +010063 <xsl:apply-templates mode="no-Streaming"/>
Marc Kupietza6d175b2025-02-20 16:58:23 +010064 </p>
65 </xsl:template>
66
Marc Kupietz764658b2025-03-28 16:20:55 +010067 <xsl:template match="p[normalize-space(.) = '&#160;']" mode="no-Streaming"/>
Marc Kupietz8d293632024-04-15 06:54:26 +020068
Marc Kupietz28f48e12024-04-14 16:17:05 +020069</xsl:stylesheet>