blob: 03f66f3558532a2b761f5f651738bfd61421905d [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
version="3.0" expand-text="yes">
<!-- PARAMETERS -->
<xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
<!-- the directory containing NKJP files, in the form of a collection of text-level dirs
(that is how we know both the $corpusID and the $docID) -->
<xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
<!-- where the corpus/document/text/annotations hierarchy is going to be created -->
<xsl:param name="skip_docID" as="xs:string">
<xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>
</xsl:param>
<!-- comma-separated list of document IDs to be skipped from processing
example: HellerPodgladanie,KOT
no functionality beyond string identity is supported
(this is just for testing) -->
<!-- VARIABLES (= constants...) -->
<xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
<xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
<xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
<xsl:variable name="systemDoctypeI5" as="xs:string"
select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" static="true"/>
<xsl:variable name="publicDoctypeI5" as="xs:string" static="true"
select="'-//IDS//DTD I5 1.0//EN'"/>
<xsl:variable name="KorAP_namespace" static="true" as="xs:string"
select="'http://ids-mannheim.de/ns/KorAP'"/>
<xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
<!-- this is only a bit funny -->
<xsl:variable name="collection_params" as="xs:string" static="yes"
select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
/>
<!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
<xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
<!-- these two 'flags' are meant to increase the readability of the code
they are used for the output of the calc_offsets() function, where the
returned value is a sequence, (start, end)
remove together with the function!
-->
<xsl:variable name="OFFSET_START" as="xs:integer" static="yes" select="1"/>
<xsl:variable name="OFFSET_END" as="xs:integer" static="yes" select="2"/>
<!-- MODES -->
<xsl:mode name="corpus" on-no-match="deep-skip"/>
<xsl:mode name="text" on-no-match="deep-skip"/>
<xsl:mode name="header-text" on-no-match="text-only-copy"/>
<!-- FUNCTIONS -->
<xsl:function name="f:compute_nesting" as="xs:integer">
<xsl:param name="node" as="element()"/>
<xsl:variable name="rel_depth"
select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
as="xs:integer"/>
<xsl:sequence select="$rel_depth"/>
</xsl:function>
<xsl:function name="f:is_preceded_by_ws" as="xs:boolean">
<xsl:param name="node" as="element()"/>
<xsl:choose>
<xsl:when test="local-name($node) eq 'seg'">
<xsl:sequence
select="not(exists($node/@nkjp:nps)) and not($node[count(preceding-sibling::tei:seg) eq 0]/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0]/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0])"
/>
<!--and not($node/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0]/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0])-->
</xsl:when>
<xsl:when test="local-name($node) eq 's'">
<xsl:message select="'s - prec s: ' || $node/preceding-sibling::tei:s"/>
<xsl:message select="'same s - prec p: ' || $node/ancestor::tei:p[1]/preceding-sibling::tei:p || '&#10;'"/>
<xsl:sequence select="exists($node/preceding-sibling::tei:s) or exists($node/ancestor::tei:p[1]/preceding-sibling::tei:p)"/>
</xsl:when>
<xsl:when test="local-name($node) eq 'p'">
<xsl:message select="'p : ' || $node/preceding-sibling::tei:p"></xsl:message>
<xsl:sequence select="exists($node/preceding-sibling::tei:p)"/>
</xsl:when>
<xsl:otherwise>
<xsl:message terminate="yes" select="'Wrong argument passed to f:is_preceded_by_ws(): ' || local-name($node) || ' Only p, s, seg are allowed.'"></xsl:message>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<!-- UTILITY TEMPLATES -->
<xsl:template match="@default" mode="#all"/>
<!-- this is to delete some auto-inserted attribute throughout -->
<!--<xsl:template match="tei:w" mode="#all"/> w is better than ab, now ... -->
<!-- NKJP-SGJP has apparently resigned from standoff representations by adding <w> everywhere;
for the time being, we'll just stick to the standoff offsets, although that may need to
be revisited as the NKJP format has now began to stray from its schemas and assumptions -->
<xsl:template match="tei:choice" mode="#all"/>
<!-- THIS IS ONLY TEMPORARY,
because an interesting challenge came up where I will
probably have to abandon straightforward mapping because of TOKENIZATION alternatives;
but now, I just want this stylesheet to work, even if it eats some occasional token (which it now does, 'komuĊ›' and 'czym' vanish)
-->
<!-- MAIN PROCESSING -->
<xsl:template name="xsl:initial-template">
<xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
<!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
<xsl:call-template name="create_corpus_header">
<xsl:with-param name="text.xml" select="$collection_of_text[1]" as="document-node()"/>
<xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
</xsl:call-template>
<xsl:for-each select="$collection_of_text">
<xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
<xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
<xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
<xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
<xsl:choose>
<xsl:when test="$my_textID = $IDs_to_skip"/>
<!-- this is a utility step, for when we want to ignore some texts for any reason (debugging, selective update) -->
<xsl:otherwise>
<!--<xsl:message select="f:calc_offsets(doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[2]/tei:seg[1],false())"/>-->
<!-- <xsl:message select="doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[1] || f:is_preceded_by_ws(doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[1])"/>
<xsl:message select="doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[3] || f:is_preceded_by_ws(doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[3])"/>
-->
<xsl:call-template name="process_single_sample">
<xsl:with-param name="text.xml" as="document-node()" select="."/>
<xsl:with-param name="ann_morphosyntax.xml" as="document-node()"
select="doc($ann_morphosyntax.uri)"/>
<xsl:with-param name="ann_segmentation.xml" as="document-node()"
select="doc($ann_segmentation.uri)"/>
<xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<xsl:template name="process_single_sample">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="my_textID" as="xs:string" select="'0-BAD_textID'"/>
<!-- empty textID should never happen, but if it does, it will be signalled at the top of the output -->
<xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
<xsl:variable name="compoundID" as="xs:string"
select="$corpusID || '_' || $docID || '.' || $my_textID"/>
<!-- this is what occurs in the text and data layers as @docid -->
<xsl:call-template name="create_data">
<!--<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>-->
<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml" as="document-node()"/>
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
<xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
</xsl:call-template>
<xsl:call-template name="create_struct">
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
as="document-node()"/>
<xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
/>
</xsl:call-template>
<!-- <xsl:call-template name="create_morpho">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
as="document-node()"/>
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
as="document-node()"/>
<xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
</xsl:call-template>
-->
<xsl:call-template name="create_text_header">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
<xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
</xsl:call-template>
</xsl:template>
<!-- ************************** data.xml ******************* -->
<xsl:template name="create_data">
<!--<xsl:param name="text.xml" as="document-node()"/>-->
<xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="compoundID" as="xs:string"/>
<xsl:param name="target" as="xs:string"/>
<!-- create the data.xml file -->
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
<xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
<xsl:element name="raw_text" namespace="{$KorAP_namespace}">
<xsl:attribute name="docid" select="$compoundID"/>
<xsl:element name="metadata" namespace="{$KorAP_namespace}">
<xsl:attribute name="file" select="'metadata.xml'"/>
</xsl:element>
<xsl:element name="text" namespace="{$KorAP_namespace}">
<xsl:variable name="content" as="xs:string+">
<xsl:for-each select="$ann_segmentation.xml/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s//tei:seg[count(@nkjp:rejected) eq 0]">
<xsl:sequence select="
if (f:is_preceded_by_ws(.)) then
' '
else
'', ./tei:w"/>
</xsl:for-each>
</xsl:variable>
<xsl:value-of select="string-join($content)"/>
</xsl:element>
</xsl:element>
</xsl:result-document>
</xsl:template>
<!-- ************************** struct ******************* -->
<xsl:template name="create_struct">
<xsl:param name="compoundID" as="xs:string"/>
<xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<!-- map the entire document, so that the processing only takes place once, and for fast lookups -->
<xsl:variable name="map_w" as="map(xs:untypedAtomic,item()+)">
<xsl:variable name="segs" select="$ann_segmentation.xml/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s//tei:seg[count(@nkjp:rejected) eq 0]" as="element()+"/>
<xsl:map>
<xsl:for-each-group select="$segs" group-by="ancestor::tei:p[1]/@xml:id">
<xsl:variable name="current-p" select="current-grouping-key()"/>
<xsl:for-each-group select="current-group()" group-by="ancestor::tei:s[1]/@xml:id">
<xsl:variable name="current-s" select="current-grouping-key()"/>
<xsl:for-each select="current-group()">
<xsl:map-entry key="@xml:id" select="$current-p, $current-s, position(), f:is_preceded_by_ws(.), normalize-space(tei:w)"/>
</xsl:for-each>
</xsl:for-each-group>
</xsl:for-each-group>
</xsl:map>
</xsl:variable>
<xsl:message select="'size: ' || map:size($map_w)"/>
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
<xsl:element name="layer" namespace="{$KorAP_namespace}">
<xsl:attribute name="docid" select="$compoundID"/>
<xsl:attribute name="version" select="$KorAP-XML_version"/>
<xsl:element name="spanList" namespace="{$KorAP_namespace}">
<!--<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/> -->
</xsl:element>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:*" mode="struct">
<xsl:param name="ini" as="xs:integer" required="no" select="0"/>
<xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
<xsl:param name="index" as="xs:integer" required="no" select="1"/>
<!-- It's so spread out because I want to make sure to be able to look up the individual
constituent values, should anything go wrong; optimization will come when it's worked against a larger dataset -->
<xsl:variable name="my_name" select="local-name()" as="xs:string"/>
<xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
<xsl:variable name="preceding-count" select="count($preceding)"/>
<xsl:variable name="outside-preceding-count" as="xs:integer">
<xsl:choose>
<xsl:when test="self::tei:s or self::tei:p">
<xsl:choose>
<xsl:when test="$preceding-count">
<xsl:sequence select="
sum(for $p in $preceding
return
count($p/descendant::*))"/> <!--mind @nkjp:rejected -->
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
as="xs:integer"/>
<!--<xsl:copy select="//tei:seg[count(@nkjp:rejected) ne 0 and @nkjp:rejected ne 'true']"></xsl:copy>-->
<xsl:variable name="start" as="xs:integer">
<xsl:choose>
<xsl:when test="self::tei:text or self::tei:body">
<xsl:sequence select="0"/>
</xsl:when>
<xsl:when test="self::tei:p">
<xsl:variable name="first_corresp"
select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>
<xsl:when test="self::tei:s">
<xsl:variable name="first_corresp"
select="descendant::tei:seg[1]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>
<xsl:when test="self::tei:seg">
<xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>
</xsl:choose>
</xsl:variable>
<xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
</xsl:variable>
<xsl:element name="span" namespace="{$KorAP_namespace}">
<xsl:attribute name="id" select="'s' || $my_index"/>
<xsl:attribute name="from" select="$start"/>
<xsl:attribute name="to" select="$end"/>
<xsl:attribute name="l" select="f:compute_nesting(.)"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX -->
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'name'"/>
<xsl:value-of select="local-name()"/>
</xsl:element>
<xsl:if test="count(@*)">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'attr'"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="type" select="'attr'"/>
<xsl:for-each select="@*">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="local-name(.)"/>
<xsl:value-of select="."/>
</xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:element>
</xsl:if>
</xsl:element>
</xsl:element>
<xsl:apply-templates mode="struct">
<xsl:with-param name="ini" select="$start" as="xs:integer"/>
<xsl:with-param name="fin" select="$end" as="xs:integer"/>
<xsl:with-param name="index" select="$my_index"/>
</xsl:apply-templates>
</xsl:template>
<!-- ************************** morpho ******************* -->
<xsl:template name="create_morpho">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="compoundID" as="xs:string"/>
<xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
<xsl:element name="layer" namespace="{$KorAP_namespace}">
<xsl:attribute name="docid" select="$compoundID"/>
<xsl:attribute name="version" select="$KorAP-XML_version"/>
<xsl:element name="spanList" namespace="{$KorAP_namespace}">
<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
</xsl:apply-templates>
</xsl:element>
</xsl:element>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:*" mode="morpho">
<xsl:param name="ini" as="xs:integer" required="no" select="0"/>
<xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
<xsl:param name="index" as="xs:integer" required="no" select="1"/>
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<xsl:variable name="my_name" select="local-name()" as="xs:string"/>
<xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
<xsl:variable name="preceding-count" select="count($preceding)"/>
<xsl:variable name="outside-preceding-count" as="xs:integer">
<xsl:choose>
<xsl:when test="self::tei:s or self::tei:p">
<xsl:choose>
<xsl:when test="$preceding-count">
<xsl:sequence select="
sum(for $p in $preceding
return
count($p/descendant::*))"/>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
as="xs:integer"/>
<xsl:variable name="start" as="xs:integer">
<xsl:choose>
<xsl:when test="self::tei:text or self::tei:body">
<xsl:sequence select="0"/>
</xsl:when>
<xsl:when test="self::tei:p">
<xsl:variable name="first_corresp"
select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>
<xsl:when test="self::tei:s">
<xsl:variable name="first_corresp"
select="descendant::tei:seg[1]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>
<!--<xsl:when test="self::tei:seg">
<xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:when>-->
</xsl:choose>
</xsl:variable>
<xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
</xsl:variable>
<xsl:apply-templates mode="morpho">
<xsl:with-param name="ini" select="$start" as="xs:integer"/>
<xsl:with-param name="fin" select="$end" as="xs:integer"/>
<xsl:with-param name="index" select="$my_index"/>
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
</xsl:apply-templates>
</xsl:template>
<xsl:template match="tei:seg" mode="morpho">
<xsl:param name="ini" as="xs:integer" required="no" select="0"/>
<xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
<xsl:param name="index" as="xs:integer" required="no" select="1"/>
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<!-- I have made a major mess here, but it works... it's so spread out
because I wanted to make sure to be able to look up the individual
constituent values, should anything go wrong -->
<xsl:variable name="my_name" select="local-name()" as="xs:string"/>
<xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
<xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
<xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
<xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
<xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
<xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
<xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
<xsl:variable name="preceding-count" select="count($preceding)"/>
<!--<xsl:variable name="outside-preceding-count" as="xs:integer">
<xsl:choose>
<xsl:when test="self::tei:s or self::tei:p"> <!-\- THIS NEEDS TO BE REVISITED AFTER THIS TEMPLATE HAS BECOME MORE SPECIFIC -\->
<xsl:choose>
<xsl:when test="$preceding-count"> commented out for now
<xsl:sequence select="
sum(for $p in $preceding
return
count($p/descendant::*))"/>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="0"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>-->
<xsl:variable name="my_index" select="$index + 1 + $preceding-count" as="xs:integer"/>
<xsl:variable name="start" as="xs:integer">
<xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
</xsl:variable>
<xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
</xsl:variable>
<xsl:element name="span" namespace="{$KorAP_namespace}">
<xsl:attribute name="id" select="'s' || $my_index"/>
<xsl:attribute name="from" select="$start"/>
<xsl:attribute name="to" select="$end"/>
<xsl:attribute name="l" select="f:compute_nesting(.)"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="type" select="'lex'"/>
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'lex'"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
<xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'lemma'"/>
<xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
</xsl:element>
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'pos'"/>
<xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
</xsl:element>
<xsl:if test="string-length($chosen-msd)">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'msd'"/>
<xsl:value-of select="$chosen-msd"/>
</xsl:element>
</xsl:if>
<xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'join'"/>
<xsl:value-of select="'left'"/>
</xsl:element>
</xsl:if>
</xsl:element>
</xsl:element>
</xsl:element>
</xsl:element>
<xsl:apply-templates mode="morpho">
<xsl:with-param name="ini" select="$start" as="xs:integer"/>
<xsl:with-param name="fin" select="$end" as="xs:integer"/>
<xsl:with-param name="index" select="$my_index"/>
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
</xsl:apply-templates>-->
</xsl:template>
<!-- ************************** TEXT header ******************* -->
<xsl:template name="create_text_header">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="compoundID" as="xs:string"/>
<xsl:param name="target" as="xs:string"/>
<!-- create the local header.xml file -->
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
<xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text">
<xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
</xsl:apply-templates>
</idsHeader>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:fileDesc" mode="text">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:title" mode="text">
<t.title>
<xsl:apply-templates/>
</t.title>
</xsl:template>
<xsl:template match="tei:titleStmt" mode="text">
<xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
<titleStmt>
<textSigle>
<xsl:value-of select="$compoundID"/>
</textSigle>
<xsl:apply-templates mode="text"/>
</titleStmt>
</xsl:template>
<xsl:template match="tei:publicationStmt" mode="text">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:availability" mode="text">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:profileDesc" mode="text">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:textClass" mode="text">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:catRef" mode="text corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="text" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
<xsl:copy-of select="."/>
</xsl:template>
<xsl:template match="tei:p" mode="text corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="header-text"/>
</xsl:element>
</xsl:template>
<!-- OPTIMIZATION has to take modes into account -->
<!-- ************************** CORPUS header ******************* -->
<xsl:template name="create_corpus_header">
<xsl:param name="text.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<!-- create the corpus-level header.xml file -->
<xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
<!--doctype-public="{$publicDoctypeI5}"
doctype-system="{$systemDoctypeI5}">
these are, sadly, useless
-->
<idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
<xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
</idsHeader>
</xsl:result-document>
</xsl:template>
<xsl:template match="tei:fileDesc" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:title" mode="corpus">
<c.title>
<xsl:apply-templates mode="corpus" select="@*"/>
<xsl:apply-templates mode="header-text"/>
</c.title>
</xsl:template>
<xsl:template match="tei:titleStmt" mode="corpus">
<titleStmt>
<korpusSigle>
<xsl:value-of select="$corpusID"/>
</korpusSigle>
<xsl:apply-templates mode="corpus"/>
</titleStmt>
</xsl:template>
<xsl:template match="tei:publicationStmt" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:availability" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:encodingDesc" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus" select="@*"/>
<xsl:apply-templates mode="header-text"/>
</xsl:element>
</xsl:template>
<!--
<xsl:template match="tei:textClass" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus" select="@* | *"/>
</xsl:element>
</xsl:template>
<xsl:template match="tei:catRef" mode="corpus">
<xsl:element name="{local-name()}">
<xsl:apply-templates mode="corpus" select="@* | *"/>
</xsl:element>
</xsl:template>
-->
<!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
<!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
-->
<!--<xsl:template name="test_full">
<xsl:param name="corpusID"/>
<xsl:param name="docID"/>
<xsl:param name="textID"/>
<xsl:call-template name="xsl:initial-template"/>
</xsl:template>-->
<xsl:function name="f:calc_content_length" as="xs:integer">
<xsl:param name="node" as="node()"/>
<xsl:choose>
<xsl:when test="$node/self::tei:text or $node/self::tei:body">
<xsl:variable name="last_corresp"
select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:when>
<xsl:when test="$node/self::tei:p">
<xsl:variable name="last_corresp"
select="$node/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:when>
<xsl:when test="$node/self::tei:s">
<xsl:variable name="last_corresp"
select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
<xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
<!-- REMOVE THIS -->
<xsl:message select="$numbers"/>
</xsl:if>
<xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<xsl:function name="f:calc_offsets" as="xs:integer+">
<xsl:param name="node" as="element()"/>
<xsl:param name="skip_start" as="xs:boolean" />
<xsl:variable name="start" as="xs:integer">
<xsl:choose>
<xsl:when test="$skip_start or $node/self::tei:text or $node/self::tei:body">
<xsl:sequence select="0"/>
</xsl:when>
<!-- handle p -->
<xsl:when test="$node/self::tei:p">
<xsl:variable name="my_pos" as="xs:integer" select="count($node/preceding-sibling::tei:p) + 1"/>
<xsl:variable name="preceding" as="node()*"
select="$node/ancestor::tei:body/tei:p[position() lt $my_pos]"/>
<xsl:choose>
<xsl:when test="count($preceding) eq 0">
<xsl:sequence select="0"/>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="sum(f:calc_offsets($preceding[last()],true())[$OFFSET_END],1)"/>
<!-- BUG danger: I am not sure if a "1" should rather be added after each p; let me try to handle that in the return value of the $length variable,
and make it sensitive to the skip_start parameter
I will then have to remove the ",1" from here!
-->
<!-- <xsl:variable name="last_corresps"
select="$preceding/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
as="attribute(corresp)+"/>
<xsl:variable name="end_offsets" as="xs:integer+">
<xsl:for-each select="$last_corresps">
<xsl:variable name="numbers"
select="substring-after(substring-before(., ')'), ',')"/>
<xsl:sequence
select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
/>
</xsl:for-each>
</xsl:variable>
<xsl:sequence select="sum($end_offsets, 1)"/>
this is a non-recursive variant that may turn out to be much less cpu-intensive, not sure
- but if it's plugged in, it will have to be adjusted to the current form of the recursive variant,
because it hasn't been maintained since it got commented out
-->
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<!-- handle s -->
<!-- the value for s gets counted since the start of the current p
- so we look at the preceding s's
+ the preceding p's
-->
<xsl:when test="$node/self::tei:s">
<!--<xsl:variable name="last_corresp"
select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers"
select="substring-after(substring-before($last_corresp, ')'), ',')"/>
<xsl:sequence
select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
/>
-->
<xsl:variable name="internal_start" as="xs:integer">
<xsl:variable name="my_pos" as="xs:integer" select="count($node/preceding-sibling::tei:s) + 1"/>
<xsl:variable name="preceding" as="node()*"
select="$node/ancestor::tei:p[1]/tei:s[position() lt $my_pos]"/>
<xsl:choose>
<xsl:when test="count($preceding) eq 0">
<xsl:sequence select="0"/>
</xsl:when>
<xsl:otherwise>
<xsl:sequence select="f:calc_offsets($preceding[last()],true())[$OFFSET_END]"/>
<!--<xsl:sequence select="sum(f:calc_offsets($preceding[last()],true())[$OFFSET_END],1)"/>-->
<!-- again, CAREFUL ABOUT THE +1, it might need to vanish -->
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="external_start" as="xs:integer" select="f:calc_offsets($node/ancestor::tei:p[1],false())[$OFFSET_START]"/>
<xsl:sequence select="$internal_start + $external_start"/>
</xsl:when>
<!-- handle seg -->
<xsl:when test="$node/self::tei:seg">
<!-- for segs, the s elements are irrelevant, and the local offset is immediately available on the @corresp -->
<xsl:variable name="numbers"
select="substring-after(substring-before($node/@corresp, ')'), ',')"/>
<xsl:variable name="internal_start" select="xs:integer(substring-before($numbers, ','))"
as="xs:integer"/>
<xsl:variable name="external_start" as="xs:integer"
select="f:calc_offsets($node/ancestor::tei:p[1], false())[$OFFSET_START]"/>
<xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
<xsl:message select="'numbers: ' || $numbers"/>
</xsl:if>
<xsl:sequence select="$internal_start + $external_start"/>
</xsl:when>
</xsl:choose>
</xsl:variable>
<xsl:variable name="length" as="xs:integer">
<xsl:choose>
<xsl:when test="$node/self::tei:text or $node/self::tei:body">
<xsl:variable name="last_corresps"
select="$node/descendant::tei:p/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
as="attribute(corresp)+"/>
<xsl:variable name="end_offsets" as="xs:integer+">
<xsl:for-each select="$last_corresps">
<xsl:variable name="numbers" select="substring-after(substring-before(., ')'), ',')"/>
<xsl:sequence
select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
/>
</xsl:for-each>
</xsl:variable>
<xsl:sequence select="sum($end_offsets)"/>
</xsl:when>
<xsl:when test="$node/self::tei:p">
<xsl:variable name="last_corresps"
select="$node/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
as="attribute(corresp)+"/>
<xsl:variable name="end_offsets" as="xs:integer+">
<xsl:for-each select="$last_corresps">
<xsl:variable name="numbers" select="substring-after(substring-before(., ')'), ',')"/>
<xsl:sequence
select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
/>
</xsl:for-each>
</xsl:variable>
<xsl:sequence select="sum($end_offsets)"/>
</xsl:when>
<xsl:when test="$node/self::tei:s">
<xsl:variable name="last_corresp"
select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
as="attribute(corresp)"/>
<xsl:variable name="numbers"
select="substring-after(substring-before($last_corresp, ')'), ',')"/>
<xsl:sequence
select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
/>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="numbers"
select="substring-after(substring-before($node/@corresp, ')'), ',')"/>
<xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
<!-- REMOVE THIS -->
<xsl:message select="'rejected: ' || $numbers"/>
</xsl:if>
<xsl:sequence
select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:message select="local-name($node) || '[' || count($node/preceding-sibling::*[local-name() eq local-name($node)])+1 || '] length: ' || $length || ' skip_start: ' || $skip_start"/>
<xsl:sequence select="$start, $start + $length -1 + xs:integer($skip_start)"/>
</xsl:function>
</xsl:stylesheet>