| Marc Kupietz | 28f48e1 | 2024-04-14 16:17:05 +0200 | [diff] [blame] | 1 | <xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
| 2 | xmlns:saxon="http://saxon.sf.net/" |
| Marc Kupietz | a553865 | 2024-04-21 15:49:30 +0200 | [diff] [blame^] | 3 | xmlns:xs="http://www.w3.org/2001/XMLSchema" |
| 4 | xmlns:TextClassifier="java:de.ids_mannheim.TextClassifier" |
| 5 | exclude-result-prefixes="saxon xs TextClassifier"> |
| Marc Kupietz | 28f48e1 | 2024-04-14 16:17:05 +0200 | [diff] [blame] | 6 | |
| 7 | <xsl:output method="xml" indent="yes" saxon:line-length="1000" |
| 8 | doctype-public="-//IDS//DTD IDS-I5 1.0//EN" |
| 9 | doctype-system="http://corpora.ids-mannheim.de/I5/DTD/i5.dtd" |
| 10 | /> |
| 11 | |
| Marc Kupietz | a553865 | 2024-04-21 15:49:30 +0200 | [diff] [blame^] | 12 | <xsl:variable name="domainClassifier" select="TextClassifier:new('models/dereko_domains_s.classifier')"/> |
| 13 | |
| Marc Kupietz | 28f48e1 | 2024-04-14 16:17:05 +0200 | [diff] [blame] | 14 | <xsl:mode on-no-match="shallow-copy"/> |
| 15 | |
| Marc Kupietz | a553865 | 2024-04-21 15:49:30 +0200 | [diff] [blame^] | 16 | <xsl:template match="textClass"> |
| 17 | <xsl:variable name="classification" select="tokenize(TextClassifier:topicDomainsFromText($domainClassifier, ../../../text), ';')"/> |
| 18 | <textClass> |
| 19 | <catRef n="{$classification[1]}" target="{$classification[2]}" scheme="topic"/> |
| 20 | <xsl:if test="xs:decimal($classification[3]) > 0.0000001"> |
| 21 | <catRef n="{$classification[3]}" target="{$classification[4]}" scheme="topic"/> |
| 22 | </xsl:if> |
| 23 | </textClass> |
| 24 | </xsl:template> |
| 25 | |
| Marc Kupietz | 164a283 | 2024-04-14 21:00:48 +0200 | [diff] [blame] | 26 | <xsl:template match="p[not(normalize-space())]" priority="1.0"/> |
| Marc Kupietz | 28f48e1 | 2024-04-14 16:17:05 +0200 | [diff] [blame] | 27 | |
| Marc Kupietz | 164a283 | 2024-04-14 21:00:48 +0200 | [diff] [blame] | 28 | <xsl:template match="div[not(normalize-space())]" priority="1.0"/> |
| 29 | |
| 30 | <xsl:template match="p[descendant::div|descendant::p]" priority="0.9"> |
| Marc Kupietz | 28f48e1 | 2024-04-14 16:17:05 +0200 | [diff] [blame] | 31 | <div type="section"> |
| 32 | <xsl:apply-templates/> |
| 33 | </div> |
| 34 | </xsl:template> |
| 35 | |
| Marc Kupietz | 164a283 | 2024-04-14 21:00:48 +0200 | [diff] [blame] | 36 | <xsl:template match="(ref|emph|hi|text())[parent::div]" priority="0.9"> |
| Marc Kupietz | 28f48e1 | 2024-04-14 16:17:05 +0200 | [diff] [blame] | 37 | <p> |
| 38 | <xsl:apply-templates/> |
| 39 | </p> |
| 40 | </xsl:template> |
| 41 | |
| 42 | <xsl:template match="head[parent::p]"> |
| 43 | <hi rend="bold"> |
| 44 | <xsl:value-of select="."/> |
| 45 | </hi> |
| 46 | </xsl:template> |
| 47 | |
| 48 | <xsl:template match="hi[parent::div]"> |
| 49 | <p> |
| 50 | <xsl:apply-templates/> |
| 51 | </p> |
| 52 | </xsl:template> |
| 53 | |
| Marc Kupietz | 8d29363 | 2024-04-15 06:54:26 +0200 | [diff] [blame] | 54 | <xsl:template match="p[normalize-space(.) = ' ']"/> |
| 55 | |
| Marc Kupietz | 28f48e1 | 2024-04-14 16:17:05 +0200 | [diff] [blame] | 56 | </xsl:stylesheet> |