blob: 46b4bb625474e25f1be556bb9ba040c646f2478f [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
version="3.0" expand-text="yes">
<xsl:param name="corpusID" as="xs:string"/>
<xsl:param name="docID" as="xs:string"/>
<xsl:param name="textID" as="xs:string"/>
<xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample1/KOT/'" as="xs:string"/>
<!-- the select is a fallback, so that it's clear to the reviewer what I'm passing in the param -->
<xsl:variable name="targetTextDir"
select="'test/output/' || $corpusID || '/' || $docID || '/' || $textID || '/'" as="xs:string"/>
<xsl:variable name="targetCorpusDir" select="'test/output/' || $corpusID || '/'" as="xs:string"/>
<xsl:variable name="systemDoctypeI5"
select="'http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd'" as="xs:string"
static="true"/>
<xsl:variable name="publicDoctypeI5" select="'-//IDS//DTD IDS-XCES 1.0//EN'" as="xs:string"
static="true"/>
<xsl:variable name="KorAP_namespace" select="'http://ids-mannheim.de/ns/KorAP'" static="true"
as="xs:string"/>
<xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
<!-- this is only a bit funny -->
<xsl:variable name="compoundID" as="xs:string"
select="$corpusID || '_' || $docID || '.' || $textID"/>
<!-- this is what occurs in the text and data layers as @docid -->
<xsl:mode name="corpus" on-no-match="deep-skip"/>
<xsl:mode name="text" on-no-match="deep-skip"/>
<!-- <xsl:variable name="text_depth" as="xs:integer" select="xs:integer('2')" static="true"/>
<!-\- this magic number indicates the depth of the <TEI> element inside teiCorpus/TEI -\->
-->
<xsl:function name="f:compute_nesting" as="xs:integer">
<xsl:param name="node" as="node()"/>
<xsl:variable name="rel_depth"
select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
as="xs:integer"/>
<!-- I think my skills are lacking -->
<xsl:sequence select="$rel_depth"/>
</xsl:function>
<xsl:function name="f:calc_content_length" as="xs:integer">
<xsl:param name="node" as="node()"/>
<xsl:choose>
<xsl:when test="$node/self::tei:text or $node/self::tei:body">
<xsl:variable name="last_corresp"
select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:when>
<xsl:when test="$node/self::tei:p">
<xsl:variable name="last_corresp"
select="$node/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:when>
<xsl:when test="$node/self::tei:s">
<xsl:variable name="last_corresp"
select="$node/descendant::tei:seg[last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<xsl:template name="xsl:initial-template">
<xsl:variable name="text.xml" as="document-node()" select="doc($sourceDir || 'text.xml')"/>
<xsl:variable name="ann_morphosyntax.xml" as="document-node()"
select="doc($sourceDir || 'ann_morphosyntax.xml')"/>
<xsl:variable name="ann_segmentation.xml" as="document-node()"
select="doc($sourceDir || 'ann_segmentation.xml')"/>
<xsl:call-template name="create_data">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
<xsl:with-param name="target" select="$targetTextDir || 'data.xml'" as="xs:string"/>
</xsl:call-template>
<xsl:call-template name="create_struct">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
as="document-node()"/>
<xsl:with-param name="target" select="$targetTextDir || 'struct/structure.xml'" as="xs:string"
/>
</xsl:call-template>
<xsl:call-template name="create_morpho">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
as="document-node()"/>
<xsl:with-param name="target" select="$targetTextDir || 'nkjp/morpho.xml'" as="xs:string"/>
</xsl:call-template>
<xsl:call-template name="create_text_header">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
<xsl:with-param name="target" select="$targetTextDir || 'header.xml'" as="xs:string"/>
</xsl:call-template>
<xsl:call-template name="create_corpus_header">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
<xsl:with-param name="target" select="$targetCorpusDir || 'header.xml'" as="xs:string"/>
</xsl:call-template>
</xsl:template>
<!-- ************************** data.xml ******************* -->
<xsl:template name="create_data">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<!-- create the data.xml file -->
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
<xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
<xsl:element name="raw_text" namespace="{$KorAP_namespace}">
<xsl:attribute name="docid" select="$compoundID"/>
<xsl:element name="metadata" namespace="{$KorAP_namespace}">
<xsl:attribute name="file" select="'metadata.xml'"/>
</xsl:element>
<xsl:element name="text" namespace="{$KorAP_namespace}">
<xsl:value-of select="$text.xml//*[local-name() = 'ab']"/>
</xsl:element>
</xsl:element>
</xsl:result-document>
</xsl:template>
<!-- ************************** struct ******************* -->
<xsl:template name="create_struct">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
<xsl:element name="layer" namespace="{$KorAP_namespace}">
<xsl:attribute name="docid" select="$compoundID"/>
<xsl:attribute name="version" select="$KorAP-XML_version"/>
<xsl:element name="spanList" namespace="{$KorAP_namespace}">
<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>
</xsl:element>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:*" mode="struct">
<xsl:param name="ini" as="xs:integer" required="no" select="0"/>
<xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
<xsl:param name="index" as="xs:integer" required="no" select="1"/>
<!-- I have made a major mess here, but it works... it's so spread out
because I wanted to make sure to be able to look up the individual
constituent values, should anything go wrong -->
<xsl:variable name="my_name" select="local-name()" as="xs:string"/>
<xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
<xsl:variable name="preceding-count" select="count($preceding)"/>
<xsl:variable name="outside-preceding-count" as="xs:integer">
<xsl:choose>
<xsl:when test="self::tei:s or self::tei:p">
<xsl:choose>
<xsl:when test="$preceding-count">
<xsl:sequence select="
sum(for $p in $preceding
return
count($p/descendant::*))"/>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
as="xs:integer"/>
<xsl:variable name="start" as="xs:integer">
<xsl:choose>
<xsl:when test="self::tei:text or self::tei:body">
<xsl:sequence select="0"/>
</xsl:when>
<xsl:when test="self::tei:p">
<xsl:variable name="first_corresp"
select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>
<xsl:when test="self::tei:s">
<xsl:variable name="first_corresp"
select="descendant::tei:seg[1]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>
<xsl:when test="self::tei:seg">
<xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>
</xsl:choose>
</xsl:variable>
<xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
</xsl:variable>
<xsl:element name="span" namespace="{$KorAP_namespace}">
<xsl:attribute name="id" select="'s' || $my_index"/>
<xsl:attribute name="from" select="$start"/>
<xsl:attribute name="to" select="$end"/>
<xsl:attribute name="l" select="f:compute_nesting(.)"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="local-name()"/>
</xsl:element>
<xsl:if test="count(@*)">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'attr'"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="type" select="'attr'"/>
<xsl:for-each select="@*">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="local-name(.)"/>
<xsl:value-of select="."/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:element>
</xsl:if>
</xsl:element>
</xsl:element>
<xsl:apply-templates mode="struct">
<xsl:with-param name="ini" select="$start" as="xs:integer"/>
<xsl:with-param name="fin" select="$end" as="xs:integer"/>
<xsl:with-param name="index" select="$my_index"/>
</xsl:apply-templates>
</xsl:template>
<!-- ************************** morpho ******************* -->
<xsl:template name="create_morpho">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
</xsl:result-document>
</xsl:template>
<!-- ************************** TEXT header ******************* -->
<xsl:template name="create_text_header">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<!-- create the local header.xml file -->
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
<xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc" mode="text"/>
</idsHeader>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:fileDesc" mode="text">
<fileDesc>
<xsl:apply-templates mode="text"/>
</fileDesc>
</xsl:template>
<xsl:template match="tei:title" mode="text">
<t.title>
<xsl:apply-templates/>
</t.title>
</xsl:template>
<xsl:template match="tei:titleStmt" mode="text">
<titleStmt>
<textSigle>
<xsl:value-of select="$corpusID || '/' || $textID"/>
</textSigle>
<xsl:apply-templates mode="text"/>
</titleStmt>
</xsl:template>
<!-- ************************** CORPUS header ******************* -->
<xsl:template name="create_corpus_header">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<!-- create the corpus-level header.xml file -->
<xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
<!--doctype-public="{$publicDoctypeI5}"
doctype-system="{$systemDoctypeI5}">
these are, sadly, useless
-->
<idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
<xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc" mode="corpus"/>
</idsHeader>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:fileDesc" mode="corpus">
<fileDesc>
<xsl:apply-templates mode="corpus"/>
</fileDesc>
</xsl:template>
<xsl:template match="tei:title" mode="corpus">
<c.title>
<xsl:apply-templates/>
</c.title>
</xsl:template>
<xsl:template match="tei:titleStmt" mode="corpus">
<titleStmt>
<korpusSigle>
<xsl:value-of select="$corpusID"/>
</korpusSigle>
<xsl:apply-templates mode="corpus"/>
</titleStmt>
</xsl:template>
<!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
<!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
-->
<!--<xsl:template name="test_full">
<xsl:param name="corpusID"/>
<xsl:param name="docID"/>
<xsl:param name="textID"/>
<xsl:call-template name="xsl:initial-template"/>
</xsl:template>-->
</xsl:stylesheet>