Use stream moded when possible in secondary XSLT passes
This prevents errors with text nodes > 2GB.
Thanks @luengen !
diff --git a/xslt/pass2.xsl b/xslt/pass2.xsl
index 13cd23a..a919fc6 100644
--- a/xslt/pass2.xsl
+++ b/xslt/pass2.xsl
@@ -11,52 +11,59 @@
<xsl:variable name="domainClassifier" select="TextClassifier:new('models/dereko_domains_s.classifier')"/>
- <xsl:mode on-no-match="shallow-copy"/>
+ <xsl:mode streamable="yes" on-no-match="shallow-copy"/>
+ <xsl:mode streamable="no" on-no-match="shallow-copy" name="no-Streaming"/>
- <xsl:template match="textClass">
- <xsl:variable name="classification" select="tokenize(TextClassifier:topicDomainsFromText($domainClassifier, ../../../text), ';')"/>
+ <xsl:template match="idsText">
+ <xsl:variable name="idsText" as="node()">
+ <xsl:copy-of select="."/>
+ </xsl:variable>
+ <xsl:apply-templates select="$idsText" mode="no-Streaming"/>
+ </xsl:template>
+
+ <xsl:template match="textClass" mode="no-Streaming">
<textClass>
- <catRef n="{$classification[1]}" target="{$classification[2]}" scheme="topic"/>
- <xsl:if test="xs:decimal($classification[3]) > 0.0000001">
- <catRef n="{$classification[3]}" target="{$classification[4]}" scheme="topic"/>
- </xsl:if>
+ <xsl:text>
</xsl:text>
+ <xsl:value-of disable-output-escaping="yes"
+ select="TextClassifier:insertCatRefs($domainClassifier, 'topic', ../../../text, 0.0001)"/>
+ <xsl:apply-templates mode="no-Streaming"/>
</textClass>
</xsl:template>
- <xsl:template match="p[not(normalize-space())]" priority="1.0"/>
+ <xsl:template match="p[not(normalize-space())]" priority="1.0" mode="no-Streaming"/>
- <xsl:template match="div[not(normalize-space())]" priority="1.0"/>
+ <xsl:template match="div[not(normalize-space())]" priority="1.0" mode="no-Streaming"/>
- <xsl:template match="p[descendant::div|descendant::p and not(ancestor::item)]" priority="0.9">
+ <xsl:template match="p[descendant::div|descendant::p and not(ancestor::item)]" priority="0.9" mode="no-Streaming">
<div type="section">
- <xsl:apply-templates/>
+ <xsl:apply-templates mode="no-Streaming"/>
</div>
</xsl:template>
- <xsl:template match="(ref|emph|hi|text())[parent::div]" priority="0.9">
+ <xsl:template match="(ref|emph|hi|text())[parent::div]" priority="0.9" mode="no-Streaming">
<p>
- <xsl:apply-templates/>
+ <xsl:apply-templates mode="no-Streaming"/>
</p>
</xsl:template>
- <xsl:template match="head[parent::p]">
+ <xsl:template match="head[parent::p]" mode="no-Streaming">
<hi rend="bold">
<xsl:value-of select="."/>
</hi>
</xsl:template>
- <xsl:template match="hi[parent::div]">
+ <xsl:template match="hi[parent::div]" mode="no-Streaming">
<p>
- <xsl:apply-templates/>
+ <xsl:apply-templates mode="no-Streaming"/>
</p>
</xsl:template>
- <xsl:template match="div[ancestor::item]">
+ <xsl:template match="div[ancestor::item]" mode="no-Streaming">
<p>
- <xsl:apply-templates/>
+ <xsl:apply-templates mode="no-Streaming"/>
</p>
</xsl:template>
- <xsl:template match="p[normalize-space(.) = ' ']"/>
+ <xsl:template match="p[normalize-space(.) = ' ']" mode="no-Streaming"/>
</xsl:stylesheet>