blob: d10ea89f17ae3ed26140cfc25e55059d5d4c3a6b [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
xmlns:fn="http://www.w3.org/2005/xpath-functions"
xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f fn map nkjp tei"
version="3.0" expand-text="yes">
<!-- PARAMETERS -->
<xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
<!-- the directory containing NKJP files, in the form of a collection of text-level dirs
(that is how we know both the $corpusID and the $docID) -->
<xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
<!-- where the corpus/document/text/annotations hierarchy is going to be created -->
<xsl:param name="skip_docID" as="xs:string">
<xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>
</xsl:param>
<!-- comma-separated list of document IDs to be skipped from processing
example: HellerPodgladanie,KOT
no functionality beyond string identity is supported
(this is just for testing) -->
<xsl:param name="SHOW_ORTH_IN_STRUCT" as="xs:boolean" select="true()"/>
<!-- for debugging structure.xml production -->
<!-- VARIABLES (= constants...) -->
<xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
<xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
<xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
<xsl:variable name="systemDoctypeI5" as="xs:string"
select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" static="true"/>
<xsl:variable name="publicDoctypeI5" as="xs:string" static="true"
select="'-//IDS//DTD I5 1.0//EN'"/>
<xsl:variable name="KorAP_namespace" static="true" as="xs:string"
select="'http://ids-mannheim.de/ns/KorAP'"/>
<xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
<!-- this is only a bit funny -->
<xsl:variable name="collection_params" as="xs:string" static="yes"
select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
/>
<!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
<xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
<!-- MODES -->
<xsl:mode name="corpus" on-no-match="deep-skip"/>
<xsl:mode name="text" on-no-match="deep-skip"/>
<xsl:mode name="header-text" on-no-match="text-only-copy"/>
<xsl:mode use-accumulators="#all"/>
<xsl:accumulator name="element-index" as="xs:integer" initial-value="0">
<xsl:accumulator-rule match="tei:*[ancestor-or-self::tei:text]" select="$value + 1" phase="start"/>
</xsl:accumulator>
<xsl:accumulator name="elem-offset-seq" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
<xsl:accumulator-rule match="tei:body/tei:p" phase="start">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
<xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
</xsl:variable>
<xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + 1"/>
<!-- for paragraphs, it's in either being initial or not -->
<xsl:sequence select="
$value,
map {
string(@xml:id): ($preceding_index,$our_base)
}"/>
</xsl:accumulator-rule>
<xsl:accumulator-rule match="tei:s" phase="start">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
<xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
</xsl:variable>
<xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + + xs:integer(f:is_preceded_by_ws(.,true()))"/>
<xsl:sequence select="
$value,
map {
string(@xml:id): ($preceding_index,$our_base)
}"/>
</xsl:accumulator-rule>
<xsl:accumulator-rule match="tei:w[parent::tei:seg[count(@nkjp:rejected) eq 0]]" phase="end">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
<xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
</xsl:variable>
<xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(parent::tei:seg,true()))"/>
<xsl:sequence select="
$value,
map {
string(parent::tei:seg/@xml:id): ($our_base,$our_base + string-length())
}"/>
</xsl:accumulator-rule>
<xsl:accumulator-rule match="tei:s" phase="end">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
<xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
</xsl:variable>
<xsl:variable name="our_base" as="xs:integer">
<xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
<xsl:sequence select="$incomplete[2]"/>
</xsl:variable>
<xsl:sequence select="
$value,
map {
string(@xml:id): ($our_base,$preceding_index)
}"/>
</xsl:accumulator-rule>
<xsl:accumulator-rule match="tei:body/tei:p" phase="end">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
<xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
</xsl:variable>
<xsl:variable name="our_base" as="xs:integer">
<xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
<xsl:sequence select="$incomplete[2]"/>
</xsl:variable>
<xsl:sequence select="
$value,
map {
string(@xml:id): ($our_base,$preceding_index)
}"/>
</xsl:accumulator-rule>
<xsl:accumulator-rule match="tei:body" phase="end">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
<xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
</xsl:variable>
<xsl:sequence select="
$value,
map {
string(@xml:id): (0, $preceding_index)
}"/>
</xsl:accumulator-rule>
<xsl:accumulator-rule match="tei:text" phase="end">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
<xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
</xsl:variable>
<xsl:sequence select="
$value,
map {
string(@xml:id): (0, $preceding_index)
}"/>
</xsl:accumulator-rule>
</xsl:accumulator>
<!-- FUNCTIONS -->
<xsl:function name="f:compute_nesting" as="xs:integer">
<xsl:param name="node" as="element()"/>
<xsl:variable name="rel_depth"
select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
as="xs:integer"/>
<xsl:sequence select="$rel_depth"/>
</xsl:function>
<xsl:function name="f:is_preceded_by_ws" as="xs:boolean">
<xsl:param name="node" as="element()"/>
<xsl:param name="suppress_initial" as="xs:boolean"/>
<xsl:choose>
<xsl:when test="local-name($node) eq 'seg'">
<xsl:choose>
<xsl:when test="$node/@nkjp:nps"><xsl:sequence select="fn:false()"/></xsl:when>
<xsl:when test="$node/preceding::tei:seg[count(@nkjp:rejected) eq 0][ancestor::tei:s[descendant::tei:seg = $node]]"><xsl:sequence select="fn:true()"/></xsl:when>
<!-- this is checking if we're preceding by a seg under the same tei:s, modulo choice/paren -->
<!-- THIS CHECK should be streamlined for efficiency, maybe try comparing positions in the descendant axis -->
<xsl:otherwise>
<xsl:sequence
select="not($suppress_initial) and not($node[count(preceding-sibling::tei:seg) eq 0]/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0]/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0])"
/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:when test="local-name($node) eq 's'">
<xsl:choose>
<xsl:when test="exists($node/preceding-sibling::tei:s)"><xsl:sequence select="fn:true()"/></xsl:when>
<xsl:otherwise>
<xsl:sequence select="not($suppress_initial) and exists($node/ancestor::tei:p[1]/preceding-sibling::tei:p)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:when test="local-name($node) eq 'p'">
<xsl:sequence select="exists($node/preceding-sibling::tei:p)"/>
</xsl:when>
<xsl:otherwise>
<xsl:message terminate="yes" select="'Wrong argument passed to f:is_preceded_by_ws(): ' || local-name($node) || ' Only p, s, seg are allowed.'"></xsl:message>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<!-- UTILITY TEMPLATES -->
<xsl:template match="@default" mode="#all"/>
<!-- this is to delete some auto-inserted attribute throughout -->
<xsl:template match="tei:w" mode="#all"/>
<!-- NKJP-SGJP has apparently resigned from standoff representations by adding <w> everywhere;
we reach for them, but from the level of <seg>, so we don't need to process <w> separately -->
<!-- fall-thru, skipping the potential <paren> element and filtering out the bad guys -->
<xsl:template match="tei:choice" mode="struct">
<xsl:apply-templates select="descendant::tei:seg[count(@nkjp:rejected) eq 0]" mode="struct"/>
</xsl:template>
<!-- MAIN PROCESSING -->
<xsl:template name="xsl:initial-template">
<xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
<!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
<xsl:call-template name="create_corpus_header">
<xsl:with-param name="text.xml" select="$collection_of_text[1]" as="document-node()"/>
<xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
</xsl:call-template>
<xsl:for-each select="$collection_of_text">
<xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
<xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
<xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
<xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
<xsl:choose>
<xsl:when test="$my_textID = $IDs_to_skip"/>
<!-- this is a utility step, for when we want to ignore some texts for any reason (debugging, selective update) -->
<xsl:otherwise>
<xsl:call-template name="process_single_sample">
<xsl:with-param name="text.xml" as="document-node()" select="."/>
<xsl:with-param name="ann_morphosyntax.xml" as="document-node()"
select="doc($ann_morphosyntax.uri)"/>
<xsl:with-param name="ann_segmentation.xml" as="document-node()"
select="doc($ann_segmentation.uri)"/>
<xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<xsl:template name="process_single_sample">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="my_textID" as="xs:string" select="'0-BAD_textID'"/>
<!-- empty textID should never happen, but if it does, it will be signalled at the top of the output -->
<xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
<xsl:variable name="compoundID" as="xs:string"
select="$corpusID || '_' || $docID || '.' || $my_textID"/>
<!-- this is what occurs in the text and data layers as @docid -->
<xsl:call-template name="create_data">
<!--<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>-->
<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml" as="document-node()"/>
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
<xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
</xsl:call-template>
<xsl:call-template name="create_struct">
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
as="document-node()"/>
<xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
/>
</xsl:call-template>
<!-- <xsl:call-template name="create_morpho">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
as="document-node()"/>
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
as="document-node()"/>
<xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
</xsl:call-template>
-->
<xsl:call-template name="create_text_header">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
<xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
</xsl:call-template>
</xsl:template>
<!-- ************************** data.xml ******************* -->
<xsl:template name="create_data">
<xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="compoundID" as="xs:string"/>
<xsl:param name="target" as="xs:string"/>
<!-- create the data.xml file -->
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
<xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
<xsl:element name="raw_text" namespace="{$KorAP_namespace}">
<xsl:attribute name="docid" select="$compoundID"/>
<xsl:element name="metadata" namespace="{$KorAP_namespace}">
<xsl:attribute name="file" select="'metadata.xml'"/>
</xsl:element>
<xsl:element name="text" namespace="{$KorAP_namespace}">
<xsl:variable name="content" as="xs:string+">
<xsl:for-each select="$ann_segmentation.xml/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s//tei:seg[count(@nkjp:rejected) eq 0]">
<xsl:sequence select="
if (f:is_preceded_by_ws(.,false())) then
' '
else
'', ./tei:w"/>
</xsl:for-each>
</xsl:variable>
<xsl:value-of select="string-join($content)"/>
</xsl:element>
</xsl:element>
</xsl:result-document>
</xsl:template>
<!-- ************************** struct ******************* -->
<xsl:template name="create_struct">
<xsl:param name="compoundID" as="xs:string"/>
<xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
<xsl:element name="layer" namespace="{$KorAP_namespace}">
<xsl:attribute name="docid" select="$compoundID"/>
<xsl:attribute name="version" select="$KorAP-XML_version"/>
<xsl:element name="spanList" namespace="{$KorAP_namespace}">
<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>
</xsl:element>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:*" mode="struct">
<xsl:variable name="offsets" as="xs:integer+">
<xsl:variable name="akku"
select="map:merge(tail(fn:accumulator-after('elem-offset-seq')), map {'duplicates': 'use-last'})"
as="map(xs:string,xs:integer+)"/> <!--test later if the merger is really needed here, given how accus work-->
<xsl:sequence select="map:get($akku, string(@xml:id))"/>
</xsl:variable>
<xsl:variable name="my_name" select="local-name()" as="xs:string"/>
<xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
<xsl:element name="span" namespace="{$KorAP_namespace}">
<xsl:attribute name="id" select="'s' || $my_index"/>
<xsl:attribute name="from" select="$offsets[1]"/>
<xsl:attribute name="to" select="$offsets[2]"/>
<xsl:attribute name="l" select="f:compute_nesting(.)"/>
<xsl:if test="local-name() eq 'seg' and $SHOW_ORTH_IN_STRUCT">
<xsl:comment><xsl:value-of select="fn:normalize-space(.)"/></xsl:comment>
</xsl:if>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX for morpho -->
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'name'"/>
<xsl:value-of select="local-name()"/>
</xsl:element>
<xsl:if test="count(@*)">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'attr'"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="type" select="'attr'"/>
<xsl:for-each select="@*">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="local-name(.)"/>
<xsl:value-of select="."/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:element>
</xsl:if>
</xsl:element>
</xsl:element>
<xsl:apply-templates mode="struct">
<!--<xsl:with-param name="index" select="$my_index"/>-->
</xsl:apply-templates>
</xsl:template>
<!-- ************************** morpho ******************* -->
<xsl:template name="create_morpho">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="compoundID" as="xs:string"/>
<xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
<xsl:element name="layer" namespace="{$KorAP_namespace}">
<xsl:attribute name="docid" select="$compoundID"/>
<xsl:attribute name="version" select="$KorAP-XML_version"/>
<xsl:element name="spanList" namespace="{$KorAP_namespace}">
<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
</xsl:apply-templates>
</xsl:element>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:*" mode="morpho">
<xsl:param name="ini" as="xs:integer" required="no" select="0"/>
<xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<xsl:variable name="my_name" select="local-name()" as="xs:string"/>
<xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
<xsl:variable name="preceding-count" select="count($preceding)"/>
<xsl:variable name="outside-preceding-count" as="xs:integer">
<xsl:choose>
<xsl:when test="self::tei:s or self::tei:p">
<xsl:choose>
<xsl:when test="$preceding-count">
<xsl:sequence select="
sum(for $p in $preceding
return
count($p/descendant::*))"/>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
<xsl:variable name="start" as="xs:integer">
<xsl:choose>
<xsl:when test="self::tei:text or self::tei:body">
<xsl:sequence select="0"/>
</xsl:when>
<xsl:when test="self::tei:p">
<xsl:variable name="first_corresp"
select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>
<xsl:when test="self::tei:s">
<xsl:variable name="first_corresp"
select="descendant::tei:seg[1]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>
<!--<xsl:when test="self::tei:seg">
<xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>-->
</xsl:choose>
</xsl:variable>
<xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
</xsl:variable>
<xsl:apply-templates mode="morpho">
<xsl:with-param name="ini" select="$start" as="xs:integer"/>
<xsl:with-param name="fin" select="$end" as="xs:integer"/>
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
</xsl:apply-templates>
</xsl:template>
<xsl:template match="tei:seg" mode="morpho">
<xsl:param name="ini" as="xs:integer" required="no" select="0"/>
<xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
<xsl:param name="index" as="xs:integer" required="no" select="1"/>
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<!-- I have made a major mess here, but it works... it's so spread out
because I wanted to make sure to be able to look up the individual
constituent values, should anything go wrong -->
<xsl:variable name="my_name" select="local-name()" as="xs:string"/>
<xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
<xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
<xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
<xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
<xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
<xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
<xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
<xsl:variable name="preceding-count" select="count($preceding)"/>
<!--<xsl:variable name="outside-preceding-count" as="xs:integer">
<xsl:choose>
<xsl:when test="self::tei:s or self::tei:p"> <!-\- THIS NEEDS TO BE REVISITED AFTER THIS TEMPLATE HAS BECOME MORE SPECIFIC -\->
<xsl:choose>
<xsl:when test="$preceding-count"> commented out for now
<xsl:sequence select="
sum(for $p in $preceding
return
count($p/descendant::*))"/>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>-->
<xsl:variable name="my_index" select="$index + 1 + $preceding-count" as="xs:integer"/>
<xsl:variable name="start" as="xs:integer">
<xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:variable>
<xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
</xsl:variable>
<xsl:element name="span" namespace="{$KorAP_namespace}">
<xsl:attribute name="id" select="'s' || $my_index"/>
<xsl:attribute name="from" select="$start"/>
<xsl:attribute name="to" select="$end"/>
<xsl:attribute name="l" select="f:compute_nesting(.)"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="type" select="'lex'"/>
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'lex'"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
<xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'lemma'"/>
<xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
</xsl:element>
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'pos'"/>
<xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
</xsl:element>
<xsl:if test="string-length($chosen-msd)">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'msd'"/>
<xsl:value-of select="$chosen-msd"/>
</xsl:element>
</xsl:if>
<xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'join'"/>
<xsl:value-of select="'left'"/>
</xsl:element>
</xsl:if>
</xsl:element>
</xsl:element>
</xsl:element>
</xsl:element>
<xsl:apply-templates mode="morpho">
<xsl:with-param name="ini" select="$start" as="xs:integer"/>
<xsl:with-param name="fin" select="$end" as="xs:integer"/>
<xsl:with-param name="index" select="$my_index"/>
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
</xsl:apply-templates>-->
</xsl:template>
<!-- ************************** TEXT header ******************* -->
<xsl:template name="create_text_header">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="compoundID" as="xs:string"/>
<xsl:param name="target" as="xs:string"/>
<!-- create the local header.xml file -->
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
<xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text">
<xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
</xsl:apply-templates>
</idsHeader>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:fileDesc" mode="text">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:title" mode="text">
<t.title>
<xsl:apply-templates/>
</t.title>
</xsl:template>
<xsl:template match="tei:titleStmt" mode="text">
<xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
<titleStmt>
<textSigle>
<xsl:value-of select="$compoundID"/>
</textSigle>
<xsl:apply-templates mode="text"/>
</titleStmt>
</xsl:template>
<xsl:template match="tei:publicationStmt" mode="text">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:availability" mode="text">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:profileDesc" mode="text">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:textClass" mode="text">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:catRef" mode="text corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
<xsl:copy-of select="."/>
</xsl:template>
<xsl:template match="tei:p" mode="text corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="header-text"/>
</xsl:element>
</xsl:template>
<!-- OPTIMIZATION has to take modes into account -->
<!-- ************************** CORPUS header ******************* -->
<xsl:template name="create_corpus_header">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<!-- create the corpus-level header.xml file -->
<xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
<!--doctype-public="{$publicDoctypeI5}"
doctype-system="{$systemDoctypeI5}">
these are, sadly, useless
-->
<idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
<xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
</idsHeader>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:fileDesc" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:title" mode="corpus">
<c.title>
<xsl:apply-templates mode="corpus" select="@*"/>
<xsl:apply-templates mode="header-text"/>
</c.title>
</xsl:template>
<xsl:template match="tei:titleStmt" mode="corpus">
<titleStmt>
<korpusSigle>
<xsl:value-of select="$corpusID"/>
</korpusSigle>
<xsl:apply-templates mode="corpus"/>
</titleStmt>
</xsl:template>
<xsl:template match="tei:publicationStmt" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:availability" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:encodingDesc" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus" select="@*"/>
<xsl:apply-templates mode="header-text"/>
</xsl:element>
</xsl:template>
<!--
<xsl:template match="tei:textClass" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:catRef" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus" select="@* | *"/>
</xsl:element>
</xsl:template>
-->
<!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
<!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
-->
<!--<xsl:template name="test_full">
<xsl:param name="corpusID"/>
<xsl:param name="docID"/>
<xsl:param name="textID"/>
<xsl:call-template name="xsl:initial-template"/>
</xsl:template>-->
<xsl:function name="f:calc_content_length" as="xs:integer">
<xsl:param name="node" as="node()"/>
<xsl:choose>
<xsl:when test="$node/self::tei:text or $node/self::tei:body">
<xsl:variable name="last_corresp"
select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:when>
<xsl:when test="$node/self::tei:p">
<xsl:variable name="last_corresp"
select="$node/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:when>
<xsl:when test="$node/self::tei:s">
<xsl:variable name="last_corresp"
select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
<xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
<!-- REMOVE THIS -->
<xsl:message select="$numbers"/>
</xsl:if>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
</xsl:stylesheet>
<!-- template for serializing maps in messages <xsl:message select="('map:',serialize($map, map{'method':'adaptive'}))"/> -->