blob: 03f66f3558532a2b761f5f651738bfd61421905d [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
5 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
6 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01007
banspe726b4a2022-03-28 05:47:45 +02008
9<!-- PARAMETERS -->
bansp5e2d1c02022-03-10 04:51:40 +010010
bansp8f6700b2022-03-27 05:27:09 +020011 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020012 <!-- the directory containing NKJP files, in the form of a collection of text-level dirs
13 (that is how we know both the $corpusID and the $docID) -->
Akron9a8ee3e2022-01-31 13:51:49 +010014
bansp8f6700b2022-03-27 05:27:09 +020015 <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020016 <!-- where the corpus/document/text/annotations hierarchy is going to be created -->
banspf2b24e62022-03-28 18:12:08 +020017
18 <xsl:param name="skip_docID" as="xs:string">
banspb5992532022-03-29 15:55:44 +020019 <xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>
20 </xsl:param>
21 <!-- comma-separated list of document IDs to be skipped from processing
banspf2b24e62022-03-28 18:12:08 +020022 example: HellerPodgladanie,KOT
banspd1bf1db2022-04-04 02:16:24 +020023 no functionality beyond string identity is supported
24 (this is just for testing) -->
banspb5992532022-03-29 15:55:44 +020025
bansp8f6700b2022-03-27 05:27:09 +020026
bansp9dc10002022-05-17 22:33:34 +020027<!-- VARIABLES (= constants...) -->
banspe726b4a2022-03-28 05:47:45 +020028
29 <xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
30 <xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
bansp8f6700b2022-03-27 05:27:09 +020031
32 <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +010033
banspd1bf1db2022-04-04 02:16:24 +020034 <xsl:variable name="systemDoctypeI5" as="xs:string"
35 select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" static="true"/>
bansp5e2d1c02022-03-10 04:51:40 +010036
banspd1bf1db2022-04-04 02:16:24 +020037 <xsl:variable name="publicDoctypeI5" as="xs:string" static="true"
38 select="'-//IDS//DTD I5 1.0//EN'"/>
bansp5e2d1c02022-03-10 04:51:40 +010039
banspd1bf1db2022-04-04 02:16:24 +020040 <xsl:variable name="KorAP_namespace" static="true" as="xs:string"
41 select="'http://ids-mannheim.de/ns/KorAP'"/>
bansp5e2d1c02022-03-10 04:51:40 +010042
bansp5f841732022-03-16 06:27:31 +010043 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
44 <!-- this is only a bit funny -->
45
banspe726b4a2022-03-28 05:47:45 +020046 <xsl:variable name="collection_params" as="xs:string" static="yes"
47 select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
48 />
49 <!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
50
51 <xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
banspd1bf1db2022-04-04 02:16:24 +020052
53<!-- these two 'flags' are meant to increase the readability of the code
54 they are used for the output of the calc_offsets() function, where the
Piotr Banski4f4c2d22022-05-19 01:44:32 +020055 returned value is a sequence, (start, end)
56
57 remove together with the function!
58
59 -->
banspd1bf1db2022-04-04 02:16:24 +020060 <xsl:variable name="OFFSET_START" as="xs:integer" static="yes" select="1"/>
61 <xsl:variable name="OFFSET_END" as="xs:integer" static="yes" select="2"/>
banspb5992532022-03-29 15:55:44 +020062
63
banspe726b4a2022-03-28 05:47:45 +020064<!-- MODES -->
bansp5e2d1c02022-03-10 04:51:40 +010065
66 <xsl:mode name="corpus" on-no-match="deep-skip"/>
67 <xsl:mode name="text" on-no-match="deep-skip"/>
bansp9103aab2022-03-19 05:10:21 +010068 <xsl:mode name="header-text" on-no-match="text-only-copy"/>
bansp5e2d1c02022-03-10 04:51:40 +010069
banspe726b4a2022-03-28 05:47:45 +020070
71 <!-- FUNCTIONS -->
72
bansp5f841732022-03-16 06:27:31 +010073 <xsl:function name="f:compute_nesting" as="xs:integer">
banspd1bf1db2022-04-04 02:16:24 +020074 <xsl:param name="node" as="element()"/>
bansp5f841732022-03-16 06:27:31 +010075 <xsl:variable name="rel_depth"
76 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
77 as="xs:integer"/>
bansp5f841732022-03-16 06:27:31 +010078 <xsl:sequence select="$rel_depth"/>
79 </xsl:function>
80
bansp9dc10002022-05-17 22:33:34 +020081<xsl:function name="f:is_preceded_by_ws" as="xs:boolean">
82 <xsl:param name="node" as="element()"/>
83 <xsl:choose>
84 <xsl:when test="local-name($node) eq 'seg'">
Piotr Banski4f4c2d22022-05-19 01:44:32 +020085 <xsl:sequence
86 select="not(exists($node/@nkjp:nps)) and not($node[count(preceding-sibling::tei:seg) eq 0]/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0]/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0])"
87 />
88 <!--and not($node/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0]/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0])-->
89
bansp9dc10002022-05-17 22:33:34 +020090 </xsl:when>
91 <xsl:when test="local-name($node) eq 's'">
92 <xsl:message select="'s - prec s: ' || $node/preceding-sibling::tei:s"/>
93 <xsl:message select="'same s - prec p: ' || $node/ancestor::tei:p[1]/preceding-sibling::tei:p || '&#10;'"/>
94
95 <xsl:sequence select="exists($node/preceding-sibling::tei:s) or exists($node/ancestor::tei:p[1]/preceding-sibling::tei:p)"/>
96 </xsl:when>
97 <xsl:when test="local-name($node) eq 'p'">
98 <xsl:message select="'p : ' || $node/preceding-sibling::tei:p"></xsl:message>
99 <xsl:sequence select="exists($node/preceding-sibling::tei:p)"/>
100 </xsl:when>
101 <xsl:otherwise>
102 <xsl:message terminate="yes" select="'Wrong argument passed to f:is_preceded_by_ws(): ' || local-name($node) || ' Only p, s, seg are allowed.'"></xsl:message>
103 </xsl:otherwise>
104 </xsl:choose>
105</xsl:function>
banspd1bf1db2022-04-04 02:16:24 +0200106
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200107
banspd1bf1db2022-04-04 02:16:24 +0200108
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200109
bansp5e2d1c02022-03-10 04:51:40 +0100110
banspb5992532022-03-29 15:55:44 +0200111
112<!-- UTILITY TEMPLATES -->
113
bansp9103aab2022-03-19 05:10:21 +0100114 <xsl:template match="@default" mode="#all"/>
bansp97ba7ce2022-03-26 05:14:06 +0100115 <!-- this is to delete some auto-inserted attribute throughout -->
bansp9103aab2022-03-19 05:10:21 +0100116
bansp9dc10002022-05-17 22:33:34 +0200117 <!--<xsl:template match="tei:w" mode="#all"/> w is better than ab, now ... -->
banspe726b4a2022-03-28 05:47:45 +0200118<!-- NKJP-SGJP has apparently resigned from standoff representations by adding <w> everywhere;
119 for the time being, we'll just stick to the standoff offsets, although that may need to
120 be revisited as the NKJP format has now began to stray from its schemas and assumptions -->
bansp8f6700b2022-03-27 05:27:09 +0200121
banspe726b4a2022-03-28 05:47:45 +0200122 <xsl:template match="tei:choice" mode="#all"/>
123<!-- THIS IS ONLY TEMPORARY,
124 because an interesting challenge came up where I will
125 probably have to abandon straightforward mapping because of TOKENIZATION alternatives;
126
127 but now, I just want this stylesheet to work, even if it eats some occasional token (which it now does, 'komuÅ›' and 'czym' vanish)
128 -->
bansp8f6700b2022-03-27 05:27:09 +0200129
banspb5992532022-03-29 15:55:44 +0200130
131 <!-- MAIN PROCESSING -->
132
133
bansp5e2d1c02022-03-10 04:51:40 +0100134 <xsl:template name="xsl:initial-template">
banspf2b24e62022-03-28 18:12:08 +0200135 <xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
banspd1bf1db2022-04-04 02:16:24 +0200136
banspe726b4a2022-03-28 05:47:45 +0200137 <!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
bansp8f6700b2022-03-27 05:27:09 +0200138 because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
139 <xsl:call-template name="create_corpus_header">
banspe726b4a2022-03-28 05:47:45 +0200140 <xsl:with-param name="text.xml" select="$collection_of_text[1]" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200141 <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
142 </xsl:call-template>
143
banspe726b4a2022-03-28 05:47:45 +0200144 <xsl:for-each select="$collection_of_text">
145 <xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
146 <xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
147 <xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
148 <xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
149
banspf2b24e62022-03-28 18:12:08 +0200150 <xsl:choose>
151 <xsl:when test="$my_textID = $IDs_to_skip"/>
bansp9dc10002022-05-17 22:33:34 +0200152 <!-- this is a utility step, for when we want to ignore some texts for any reason (debugging, selective update) -->
153
banspf2b24e62022-03-28 18:12:08 +0200154 <xsl:otherwise>
banspd1bf1db2022-04-04 02:16:24 +0200155
bansp9dc10002022-05-17 22:33:34 +0200156 <!--<xsl:message select="f:calc_offsets(doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[2]/tei:seg[1],false())"/>-->
banspd1bf1db2022-04-04 02:16:24 +0200157
bansp9dc10002022-05-17 22:33:34 +0200158<!-- <xsl:message select="doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[1] || f:is_preceded_by_ws(doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[1])"/>
159 <xsl:message select="doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[3] || f:is_preceded_by_ws(doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[3])"/>
160-->
161 <xsl:call-template name="process_single_sample">
banspf2b24e62022-03-28 18:12:08 +0200162 <xsl:with-param name="text.xml" as="document-node()" select="."/>
163 <xsl:with-param name="ann_morphosyntax.xml" as="document-node()"
164 select="doc($ann_morphosyntax.uri)"/>
165 <xsl:with-param name="ann_segmentation.xml" as="document-node()"
166 select="doc($ann_segmentation.uri)"/>
167 <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
bansp9dc10002022-05-17 22:33:34 +0200168 </xsl:call-template>
banspf2b24e62022-03-28 18:12:08 +0200169 </xsl:otherwise>
170 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200171 </xsl:for-each>
bansp8f6700b2022-03-27 05:27:09 +0200172 </xsl:template>
173
174 <xsl:template name="process_single_sample">
banspe726b4a2022-03-28 05:47:45 +0200175 <xsl:param name="text.xml" as="document-node()"/>
176 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
177 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspd1bf1db2022-04-04 02:16:24 +0200178 <xsl:param name="my_textID" as="xs:string" select="'0-BAD_textID'"/>
bansp9dc10002022-05-17 22:33:34 +0200179 <!-- empty textID should never happen, but if it does, it will be signalled at the top of the output -->
banspe726b4a2022-03-28 05:47:45 +0200180
181 <xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
182
183 <xsl:variable name="compoundID" as="xs:string"
184 select="$corpusID || '_' || $docID || '.' || $my_textID"/>
185 <!-- this is what occurs in the text and data layers as @docid -->
186
187
bansp5e2d1c02022-03-10 04:51:40 +0100188 <xsl:call-template name="create_data">
bansp9dc10002022-05-17 22:33:34 +0200189 <!--<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>-->
190 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200191 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
192 <xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100193 </xsl:call-template>
194
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200195 <xsl:call-template name="create_struct">
banspe726b4a2022-03-28 05:47:45 +0200196 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100197 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
198 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200199 <xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
bansp5f841732022-03-16 06:27:31 +0100200 />
201 </xsl:call-template>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200202
bansp9dc10002022-05-17 22:33:34 +0200203<!-- <xsl:call-template name="create_morpho">
bansp5f841732022-03-16 06:27:31 +0100204 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200205 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100206 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
207 as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100208 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
209 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200210 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100211 </xsl:call-template>
bansp9dc10002022-05-17 22:33:34 +0200212-->
bansp5e2d1c02022-03-10 04:51:40 +0100213 <xsl:call-template name="create_text_header">
214 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200215 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
216 <xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100217 </xsl:call-template>
218
bansp5e2d1c02022-03-10 04:51:40 +0100219 </xsl:template>
220
221 <!-- ************************** data.xml ******************* -->
222
223 <xsl:template name="create_data">
bansp9dc10002022-05-17 22:33:34 +0200224 <!--<xsl:param name="text.xml" as="document-node()"/>-->
225 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200226 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100227 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100228 <!-- create the data.xml file -->
229 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100230 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100231
Akron9a8ee3e2022-01-31 13:51:49 +0100232 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100233 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100234 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100235 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
236 <xsl:attribute name="file" select="'metadata.xml'"/>
237 </xsl:element>
238
239 <xsl:element name="text" namespace="{$KorAP_namespace}">
bansp9dc10002022-05-17 22:33:34 +0200240 <xsl:variable name="content" as="xs:string+">
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200241 <xsl:for-each select="$ann_segmentation.xml/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s//tei:seg[count(@nkjp:rejected) eq 0]">
bansp9dc10002022-05-17 22:33:34 +0200242 <xsl:sequence select="
243 if (f:is_preceded_by_ws(.)) then
244 ' '
245 else
246 '', ./tei:w"/>
247 </xsl:for-each>
248 </xsl:variable>
249 <xsl:value-of select="string-join($content)"/>
bansp5e2d1c02022-03-10 04:51:40 +0100250 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100251 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100252 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100253 </xsl:template>
254
bansp5f841732022-03-16 06:27:31 +0100255 <!-- ************************** struct ******************* -->
256
257 <xsl:template name="create_struct">
banspe726b4a2022-03-28 05:47:45 +0200258 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100259 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
260 <xsl:param name="target" as="xs:string"/>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200261
262
263<!-- map the entire document, so that the processing only takes place once, and for fast lookups -->
264 <xsl:variable name="map_w" as="map(xs:untypedAtomic,item()+)">
265 <xsl:variable name="segs" select="$ann_segmentation.xml/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s//tei:seg[count(@nkjp:rejected) eq 0]" as="element()+"/>
266 <xsl:map>
267 <xsl:for-each-group select="$segs" group-by="ancestor::tei:p[1]/@xml:id">
268 <xsl:variable name="current-p" select="current-grouping-key()"/>
269 <xsl:for-each-group select="current-group()" group-by="ancestor::tei:s[1]/@xml:id">
270 <xsl:variable name="current-s" select="current-grouping-key()"/>
271 <xsl:for-each select="current-group()">
272 <xsl:map-entry key="@xml:id" select="$current-p, $current-s, position(), f:is_preceded_by_ws(.), normalize-space(tei:w)"/>
273 </xsl:for-each>
274 </xsl:for-each-group>
275 </xsl:for-each-group>
276 </xsl:map>
277 </xsl:variable>
278
279 <xsl:message select="'size: ' || map:size($map_w)"/>
280
bansp5f841732022-03-16 06:27:31 +0100281 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
282 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
283 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
284 <xsl:element name="layer" namespace="{$KorAP_namespace}">
285 <xsl:attribute name="docid" select="$compoundID"/>
286 <xsl:attribute name="version" select="$KorAP-XML_version"/>
287
288 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200289 <!--<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/> -->
bansp5f841732022-03-16 06:27:31 +0100290 </xsl:element>
291 </xsl:element>
292 </xsl:result-document>
293 </xsl:template>
294
295 <xsl:template match="tei:*" mode="struct">
296 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
297 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
298 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200299
bansp9dc10002022-05-17 22:33:34 +0200300 <!-- It's so spread out because I want to make sure to be able to look up the individual
bansp3e5b20c2022-03-18 20:22:31 +0100301 constituent values, should anything go wrong; optimization will come when it's worked against a larger dataset -->
bansp5f841732022-03-16 06:27:31 +0100302 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
303 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
304 <xsl:variable name="preceding-count" select="count($preceding)"/>
bansp9dc10002022-05-17 22:33:34 +0200305
bansp5f841732022-03-16 06:27:31 +0100306 <xsl:variable name="outside-preceding-count" as="xs:integer">
307 <xsl:choose>
308 <xsl:when test="self::tei:s or self::tei:p">
309 <xsl:choose>
310 <xsl:when test="$preceding-count">
311 <xsl:sequence select="
312 sum(for $p in $preceding
313 return
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200314 count($p/descendant::*))"/> <!--mind @nkjp:rejected -->
bansp5f841732022-03-16 06:27:31 +0100315 </xsl:when>
316 <xsl:otherwise>
317 <xsl:sequence select="0"/>
318 </xsl:otherwise>
319 </xsl:choose>
320 </xsl:when>
321 <xsl:otherwise>
322 <xsl:sequence select="0"/>
323 </xsl:otherwise>
324 </xsl:choose>
325 </xsl:variable>
bansp9dc10002022-05-17 22:33:34 +0200326
bansp5f841732022-03-16 06:27:31 +0100327 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
328 as="xs:integer"/>
banspb5992532022-03-29 15:55:44 +0200329
330
331 <!--<xsl:copy select="//tei:seg[count(@nkjp:rejected) ne 0 and @nkjp:rejected ne 'true']"></xsl:copy>-->
bansp5f841732022-03-16 06:27:31 +0100332
333 <xsl:variable name="start" as="xs:integer">
334 <xsl:choose>
335 <xsl:when test="self::tei:text or self::tei:body">
336 <xsl:sequence select="0"/>
337 </xsl:when>
338 <xsl:when test="self::tei:p">
339 <xsl:variable name="first_corresp"
340 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
341 as="attribute(corresp)"/>
342 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
343 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
344 </xsl:when>
345 <xsl:when test="self::tei:s">
346 <xsl:variable name="first_corresp"
347 select="descendant::tei:seg[1]/attribute::corresp"
348 as="attribute(corresp)"/>
349 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
350 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
351 </xsl:when>
352 <xsl:when test="self::tei:seg">
353 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
354 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
355 </xsl:when>
356 </xsl:choose>
357 </xsl:variable>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200358
bansp5f841732022-03-16 06:27:31 +0100359 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
360 </xsl:variable>
bansp3e5b20c2022-03-18 20:22:31 +0100361
bansp5f841732022-03-16 06:27:31 +0100362 <xsl:element name="span" namespace="{$KorAP_namespace}">
363 <xsl:attribute name="id" select="'s' || $my_index"/>
364 <xsl:attribute name="from" select="$start"/>
365 <xsl:attribute name="to" select="$end"/>
366 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
367 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100368 <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX -->
bansp5f841732022-03-16 06:27:31 +0100369 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100370 <xsl:attribute name="name" select="'name'"/>
371 <xsl:value-of select="local-name()"/>
bansp5f841732022-03-16 06:27:31 +0100372 </xsl:element>
373 <xsl:if test="count(@*)">
374 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
375 <xsl:attribute name="name" select="'attr'"/>
376 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
377 <xsl:attribute name="type" select="'attr'"/>
378 <xsl:for-each select="@*">
379 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
380 <xsl:attribute name="name" select="local-name(.)"/>
381 <xsl:value-of select="."/>
382 </xsl:element>
383 </xsl:for-each>
384 </xsl:element>
385 </xsl:element>
386 </xsl:if>
387 </xsl:element>
388 </xsl:element>
389 <xsl:apply-templates mode="struct">
390 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
391 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
392 <xsl:with-param name="index" select="$my_index"/>
393 </xsl:apply-templates>
394 </xsl:template>
395
396 <!-- ************************** morpho ******************* -->
397
398 <xsl:template name="create_morpho">
399 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200400 <xsl:param name="compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100401 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100402 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
403 <xsl:param name="target" as="xs:string"/>
404
405 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
406 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
407 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp3e5b20c2022-03-18 20:22:31 +0100408 <xsl:element name="layer" namespace="{$KorAP_namespace}">
409 <xsl:attribute name="docid" select="$compoundID"/>
410 <xsl:attribute name="version" select="$KorAP-XML_version"/>
411
412 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
413 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
414 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
415 </xsl:apply-templates>
416 </xsl:element>
417 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100418 </xsl:result-document>
419 </xsl:template>
420
bansp3e5b20c2022-03-18 20:22:31 +0100421 <xsl:template match="tei:*" mode="morpho">
422 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
423 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
424 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
425 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
426 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
427 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
428 <xsl:variable name="preceding-count" select="count($preceding)"/>
429 <xsl:variable name="outside-preceding-count" as="xs:integer">
430 <xsl:choose>
431 <xsl:when test="self::tei:s or self::tei:p">
432 <xsl:choose>
433 <xsl:when test="$preceding-count">
434 <xsl:sequence select="
435 sum(for $p in $preceding
436 return
437 count($p/descendant::*))"/>
438 </xsl:when>
439 <xsl:otherwise>
440 <xsl:sequence select="0"/>
441 </xsl:otherwise>
442 </xsl:choose>
443 </xsl:when>
444 <xsl:otherwise>
445 <xsl:sequence select="0"/>
446 </xsl:otherwise>
447 </xsl:choose>
448 </xsl:variable>
449 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
450 as="xs:integer"/>
451
452 <xsl:variable name="start" as="xs:integer">
453 <xsl:choose>
454 <xsl:when test="self::tei:text or self::tei:body">
455 <xsl:sequence select="0"/>
456 </xsl:when>
457 <xsl:when test="self::tei:p">
458 <xsl:variable name="first_corresp"
459 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
460 as="attribute(corresp)"/>
461 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
462 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
463 </xsl:when>
464 <xsl:when test="self::tei:s">
465 <xsl:variable name="first_corresp"
466 select="descendant::tei:seg[1]/attribute::corresp"
467 as="attribute(corresp)"/>
468 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
469 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
470 </xsl:when>
471 <!--<xsl:when test="self::tei:seg">
472 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
473 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
474 </xsl:when>-->
475 </xsl:choose>
476 </xsl:variable>
477 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
478 </xsl:variable>
479
480 <xsl:apply-templates mode="morpho">
481 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
482 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
483 <xsl:with-param name="index" select="$my_index"/>
484 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
485 </xsl:apply-templates>
486 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100487
bansp3e5b20c2022-03-18 20:22:31 +0100488 <xsl:template match="tei:seg" mode="morpho">
489 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
490 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
491 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
492 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
493 <!-- I have made a major mess here, but it works... it's so spread out
494 because I wanted to make sure to be able to look up the individual
495 constituent values, should anything go wrong -->
496 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
497 <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
498 <xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
499 <xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
500 <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
501 <xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
502 <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
503 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
504 <xsl:variable name="preceding-count" select="count($preceding)"/>
banspe726b4a2022-03-28 05:47:45 +0200505 <!--<xsl:variable name="outside-preceding-count" as="xs:integer">
bansp3e5b20c2022-03-18 20:22:31 +0100506 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200507 <xsl:when test="self::tei:s or self::tei:p"> <!-\- THIS NEEDS TO BE REVISITED AFTER THIS TEMPLATE HAS BECOME MORE SPECIFIC -\->
bansp3e5b20c2022-03-18 20:22:31 +0100508 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200509 <xsl:when test="$preceding-count"> commented out for now
bansp3e5b20c2022-03-18 20:22:31 +0100510 <xsl:sequence select="
511 sum(for $p in $preceding
512 return
513 count($p/descendant::*))"/>
514 </xsl:when>
515 <xsl:otherwise>
516 <xsl:sequence select="0"/>
517 </xsl:otherwise>
518 </xsl:choose>
519 </xsl:when>
520 <xsl:otherwise>
521 <xsl:sequence select="0"/>
522 </xsl:otherwise>
523 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200524 </xsl:variable>-->
525 <xsl:variable name="my_index" select="$index + 1 + $preceding-count" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100526
527 <xsl:variable name="start" as="xs:integer">
528 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
529 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
530 </xsl:variable>
531 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
532 </xsl:variable>
533 <xsl:element name="span" namespace="{$KorAP_namespace}">
534 <xsl:attribute name="id" select="'s' || $my_index"/>
535 <xsl:attribute name="from" select="$start"/>
536 <xsl:attribute name="to" select="$end"/>
537 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
538 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
539 <xsl:attribute name="type" select="'lex'"/>
540 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
541 <xsl:attribute name="name" select="'lex'"/>
542 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
543 <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
544
545
546 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
547 <xsl:attribute name="name" select="'lemma'"/>
548 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
549 </xsl:element>
550 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
551 <xsl:attribute name="name" select="'pos'"/>
552 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
553 </xsl:element>
554 <xsl:if test="string-length($chosen-msd)">
555 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
556 <xsl:attribute name="name" select="'msd'"/>
557 <xsl:value-of select="$chosen-msd"/>
558 </xsl:element>
559 </xsl:if>
560 <xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
561 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
562 <xsl:attribute name="name" select="'join'"/>
563 <xsl:value-of select="'left'"/>
564 </xsl:element>
565 </xsl:if>
566 </xsl:element>
567 </xsl:element>
568 </xsl:element>
569 </xsl:element>
banspe726b4a2022-03-28 05:47:45 +0200570 <xsl:apply-templates mode="morpho">
bansp3e5b20c2022-03-18 20:22:31 +0100571 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
572 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
573 <xsl:with-param name="index" select="$my_index"/>
574 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200575 </xsl:apply-templates>-->
bansp3e5b20c2022-03-18 20:22:31 +0100576 </xsl:template>
banspe726b4a2022-03-28 05:47:45 +0200577
bansp5f841732022-03-16 06:27:31 +0100578 <!-- ************************** TEXT header ******************* -->
579
580 <xsl:template name="create_text_header">
581 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200582 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100583 <xsl:param name="target" as="xs:string"/>
584
585 <!-- create the local header.xml file -->
586 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
587 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
588
589 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
banspe726b4a2022-03-28 05:47:45 +0200590 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text">
591 <xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
592 </xsl:apply-templates>
bansp5f841732022-03-16 06:27:31 +0100593 </idsHeader>
594 </xsl:result-document>
595 </xsl:template>
596
597 <xsl:template match="tei:fileDesc" mode="text">
bansp9103aab2022-03-19 05:10:21 +0100598 <xsl:element name="{local-name()}">
bansp5f841732022-03-16 06:27:31 +0100599 <xsl:apply-templates mode="text"/>
bansp9103aab2022-03-19 05:10:21 +0100600 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100601 </xsl:template>
602
603 <xsl:template match="tei:title" mode="text">
604 <t.title>
605 <xsl:apply-templates/>
606 </t.title>
607 </xsl:template>
608
609 <xsl:template match="tei:titleStmt" mode="text">
banspe726b4a2022-03-28 05:47:45 +0200610 <xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
bansp5f841732022-03-16 06:27:31 +0100611 <titleStmt>
612 <textSigle>
banspe726b4a2022-03-28 05:47:45 +0200613 <xsl:value-of select="$compoundID"/>
bansp5f841732022-03-16 06:27:31 +0100614 </textSigle>
615 <xsl:apply-templates mode="text"/>
616 </titleStmt>
617 </xsl:template>
618
bansp9103aab2022-03-19 05:10:21 +0100619 <xsl:template match="tei:publicationStmt" mode="text">
620 <xsl:element name="{local-name()}">
621 <xsl:apply-templates mode="text"/>
622 </xsl:element>
623 </xsl:template>
624
625 <xsl:template match="tei:availability" mode="text">
626 <xsl:element name="{local-name()}">
627 <xsl:apply-templates mode="text" select="@* | *"/>
628 </xsl:element>
629 </xsl:template>
630
631 <xsl:template match="tei:profileDesc" mode="text">
632 <xsl:element name="{local-name()}">
633 <xsl:apply-templates mode="text"/>
634 </xsl:element>
635 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100636
bansp9103aab2022-03-19 05:10:21 +0100637 <xsl:template match="tei:textClass" mode="text">
638 <xsl:element name="{local-name()}">
639 <xsl:apply-templates mode="text" select="@* | *"/>
640 </xsl:element>
641 </xsl:template>
642
643 <xsl:template match="tei:catRef" mode="text corpus">
644 <xsl:element name="{local-name()}">
645 <xsl:apply-templates mode="text" select="@* | *"/>
646 </xsl:element>
647 </xsl:template>
648
649 <xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
650 <xsl:copy-of select="."/>
651 </xsl:template>
652
653 <xsl:template match="tei:p" mode="text corpus">
654 <xsl:element name="{local-name()}">
655 <xsl:apply-templates mode="header-text"/>
656 </xsl:element>
657 </xsl:template>
658
659
660 <!-- OPTIMIZATION has to take modes into account -->
bansp5e2d1c02022-03-10 04:51:40 +0100661 <!-- ************************** CORPUS header ******************* -->
662 <xsl:template name="create_corpus_header">
663 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100664 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100665
666 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100667 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100668
669 <!--doctype-public="{$publicDoctypeI5}"
670 doctype-system="{$systemDoctypeI5}">
671 these are, sadly, useless
672 -->
673
674 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
bansp9103aab2022-03-19 05:10:21 +0100675 <xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
bansp5e2d1c02022-03-10 04:51:40 +0100676 </idsHeader>
677 </xsl:result-document>
678 </xsl:template>
679
680 <xsl:template match="tei:fileDesc" mode="corpus">
bansp9103aab2022-03-19 05:10:21 +0100681 <xsl:element name="{local-name()}">
bansp5e2d1c02022-03-10 04:51:40 +0100682 <xsl:apply-templates mode="corpus"/>
bansp9103aab2022-03-19 05:10:21 +0100683 </xsl:element>
bansp5e2d1c02022-03-10 04:51:40 +0100684 </xsl:template>
bansp9103aab2022-03-19 05:10:21 +0100685
bansp5e2d1c02022-03-10 04:51:40 +0100686
687 <xsl:template match="tei:title" mode="corpus">
688 <c.title>
bansp9103aab2022-03-19 05:10:21 +0100689 <xsl:apply-templates mode="corpus" select="@*"/>
690 <xsl:apply-templates mode="header-text"/>
bansp5e2d1c02022-03-10 04:51:40 +0100691 </c.title>
692 </xsl:template>
693
694 <xsl:template match="tei:titleStmt" mode="corpus">
695 <titleStmt>
696 <korpusSigle>
697 <xsl:value-of select="$corpusID"/>
698 </korpusSigle>
699 <xsl:apply-templates mode="corpus"/>
700 </titleStmt>
701 </xsl:template>
702
bansp9103aab2022-03-19 05:10:21 +0100703 <xsl:template match="tei:publicationStmt" mode="corpus">
704 <xsl:element name="{local-name()}">
705 <xsl:apply-templates mode="corpus"/>
706 </xsl:element>
707 </xsl:template>
708
709 <xsl:template match="tei:availability" mode="corpus">
710 <xsl:element name="{local-name()}">
711 <xsl:apply-templates mode="corpus" select="@* | *"/>
712 </xsl:element>
713 </xsl:template>
714
715 <xsl:template match="tei:encodingDesc" mode="corpus">
716 <xsl:element name="{local-name()}">
717 <xsl:apply-templates mode="corpus"/>
718 </xsl:element>
719 </xsl:template>
720
721 <xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
722 <xsl:element name="{local-name()}">
723 <xsl:apply-templates mode="corpus" select="@* | *"/>
724 </xsl:element>
725 </xsl:template>
726
727 <xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
728 <xsl:element name="{local-name()}">
729 <xsl:apply-templates mode="corpus" select="@*"/>
730 <xsl:apply-templates mode="header-text"/>
731 </xsl:element>
732 </xsl:template>
733<!--
734 <xsl:template match="tei:textClass" mode="corpus">
735 <xsl:element name="{local-name()}">
736 <xsl:apply-templates mode="corpus" select="@* | *"/>
737 </xsl:element>
738 </xsl:template>
739
740 <xsl:template match="tei:catRef" mode="corpus">
741 <xsl:element name="{local-name()}">
742 <xsl:apply-templates mode="corpus" select="@* | *"/>
743 </xsl:element>
744 </xsl:template>
745-->
bansp5e2d1c02022-03-10 04:51:40 +0100746
747
748
749 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
750 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
751 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
752
753 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
754 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
755
756 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
757 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
758 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
759 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
760
761 -->
762 <!--<xsl:template name="test_full">
763 <xsl:param name="corpusID"/>
764 <xsl:param name="docID"/>
765 <xsl:param name="textID"/>
766 <xsl:call-template name="xsl:initial-template"/>
767 </xsl:template>-->
768
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200769 <xsl:function name="f:calc_content_length" as="xs:integer">
770 <xsl:param name="node" as="node()"/>
771 <xsl:choose>
772 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
773 <xsl:variable name="last_corresp"
774 select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
775 as="attribute(corresp)"/>
776 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
777 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
778 </xsl:when>
779 <xsl:when test="$node/self::tei:p">
780 <xsl:variable name="last_corresp"
781 select="$node/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
782 as="attribute(corresp)"/>
783 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
784 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
785 </xsl:when>
786 <xsl:when test="$node/self::tei:s">
787 <xsl:variable name="last_corresp"
788 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
789 as="attribute(corresp)"/>
790 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
791 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
792 </xsl:when>
793 <xsl:otherwise>
794 <xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
795 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
796 <!-- REMOVE THIS -->
797 <xsl:message select="$numbers"/>
798 </xsl:if>
799 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
800 </xsl:otherwise>
801 </xsl:choose>
802 </xsl:function>
803
804 <xsl:function name="f:calc_offsets" as="xs:integer+">
805 <xsl:param name="node" as="element()"/>
806 <xsl:param name="skip_start" as="xs:boolean" />
807
808 <xsl:variable name="start" as="xs:integer">
809 <xsl:choose>
810
811 <xsl:when test="$skip_start or $node/self::tei:text or $node/self::tei:body">
812 <xsl:sequence select="0"/>
813 </xsl:when>
814
815 <!-- handle p -->
816
817 <xsl:when test="$node/self::tei:p">
818 <xsl:variable name="my_pos" as="xs:integer" select="count($node/preceding-sibling::tei:p) + 1"/>
819 <xsl:variable name="preceding" as="node()*"
820 select="$node/ancestor::tei:body/tei:p[position() lt $my_pos]"/>
821
822 <xsl:choose>
823 <xsl:when test="count($preceding) eq 0">
824 <xsl:sequence select="0"/>
825 </xsl:when>
826 <xsl:otherwise>
827 <xsl:sequence select="sum(f:calc_offsets($preceding[last()],true())[$OFFSET_END],1)"/>
828
829 <!-- BUG danger: I am not sure if a "1" should rather be added after each p; let me try to handle that in the return value of the $length variable,
830 and make it sensitive to the skip_start parameter
831
832 I will then have to remove the ",1" from here!
833
834 -->
835
836 <!-- <xsl:variable name="last_corresps"
837 select="$preceding/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
838 as="attribute(corresp)+"/>
839 <xsl:variable name="end_offsets" as="xs:integer+">
840 <xsl:for-each select="$last_corresps">
841 <xsl:variable name="numbers"
842 select="substring-after(substring-before(., ')'), ',')"/>
843 <xsl:sequence
844 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
845 />
846 </xsl:for-each>
847 </xsl:variable>
848 <xsl:sequence select="sum($end_offsets, 1)"/>
849
850 this is a non-recursive variant that may turn out to be much less cpu-intensive, not sure
851 - but if it's plugged in, it will have to be adjusted to the current form of the recursive variant,
852 because it hasn't been maintained since it got commented out
853 -->
854 </xsl:otherwise>
855 </xsl:choose>
856 </xsl:when>
857
858 <!-- handle s -->
859
860 <!-- the value for s gets counted since the start of the current p
861 - so we look at the preceding s's
862 + the preceding p's
863 -->
864 <xsl:when test="$node/self::tei:s">
865 <!--<xsl:variable name="last_corresp"
866 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
867 as="attribute(corresp)"/>
868 <xsl:variable name="numbers"
869 select="substring-after(substring-before($last_corresp, ')'), ',')"/>
870 <xsl:sequence
871 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
872 />
873 -->
874
875 <xsl:variable name="internal_start" as="xs:integer">
876 <xsl:variable name="my_pos" as="xs:integer" select="count($node/preceding-sibling::tei:s) + 1"/>
877 <xsl:variable name="preceding" as="node()*"
878 select="$node/ancestor::tei:p[1]/tei:s[position() lt $my_pos]"/>
879
880 <xsl:choose>
881 <xsl:when test="count($preceding) eq 0">
882 <xsl:sequence select="0"/>
883 </xsl:when>
884 <xsl:otherwise>
885 <xsl:sequence select="f:calc_offsets($preceding[last()],true())[$OFFSET_END]"/>
886 <!--<xsl:sequence select="sum(f:calc_offsets($preceding[last()],true())[$OFFSET_END],1)"/>-->
887 <!-- again, CAREFUL ABOUT THE +1, it might need to vanish -->
888 </xsl:otherwise>
889 </xsl:choose>
890 </xsl:variable>
891
892 <xsl:variable name="external_start" as="xs:integer" select="f:calc_offsets($node/ancestor::tei:p[1],false())[$OFFSET_START]"/>
893
894 <xsl:sequence select="$internal_start + $external_start"/>
895 </xsl:when>
896
897 <!-- handle seg -->
898
899 <xsl:when test="$node/self::tei:seg">
900 <!-- for segs, the s elements are irrelevant, and the local offset is immediately available on the @corresp -->
901
902 <xsl:variable name="numbers"
903 select="substring-after(substring-before($node/@corresp, ')'), ',')"/>
904
905 <xsl:variable name="internal_start" select="xs:integer(substring-before($numbers, ','))"
906 as="xs:integer"/>
907 <xsl:variable name="external_start" as="xs:integer"
908 select="f:calc_offsets($node/ancestor::tei:p[1], false())[$OFFSET_START]"/>
909
910 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
911
912 <xsl:message select="'numbers: ' || $numbers"/>
913 </xsl:if>
914 <xsl:sequence select="$internal_start + $external_start"/>
915 </xsl:when>
916 </xsl:choose>
917 </xsl:variable>
918
919 <xsl:variable name="length" as="xs:integer">
920 <xsl:choose>
921
922 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
923 <xsl:variable name="last_corresps"
924 select="$node/descendant::tei:p/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
925 as="attribute(corresp)+"/>
926
927 <xsl:variable name="end_offsets" as="xs:integer+">
928 <xsl:for-each select="$last_corresps">
929 <xsl:variable name="numbers" select="substring-after(substring-before(., ')'), ',')"/>
930 <xsl:sequence
931 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
932 />
933 </xsl:for-each>
934 </xsl:variable>
935
936 <xsl:sequence select="sum($end_offsets)"/>
937
938 </xsl:when>
939 <xsl:when test="$node/self::tei:p">
940 <xsl:variable name="last_corresps"
941 select="$node/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
942 as="attribute(corresp)+"/>
943 <xsl:variable name="end_offsets" as="xs:integer+">
944 <xsl:for-each select="$last_corresps">
945 <xsl:variable name="numbers" select="substring-after(substring-before(., ')'), ',')"/>
946 <xsl:sequence
947 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
948 />
949 </xsl:for-each>
950 </xsl:variable>
951 <xsl:sequence select="sum($end_offsets)"/>
952 </xsl:when>
953
954
955
956
957 <xsl:when test="$node/self::tei:s">
958 <xsl:variable name="last_corresp"
959 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
960 as="attribute(corresp)"/>
961 <xsl:variable name="numbers"
962 select="substring-after(substring-before($last_corresp, ')'), ',')"/>
963 <xsl:sequence
964 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
965 />
966 </xsl:when>
967 <xsl:otherwise>
968 <xsl:variable name="numbers"
969 select="substring-after(substring-before($node/@corresp, ')'), ',')"/>
970 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
971 <!-- REMOVE THIS -->
972 <xsl:message select="'rejected: ' || $numbers"/>
973 </xsl:if>
974 <xsl:sequence
975 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
976 />
977 </xsl:otherwise>
978 </xsl:choose>
979 </xsl:variable>
980
981 <xsl:message select="local-name($node) || '[' || count($node/preceding-sibling::*[local-name() eq local-name($node)])+1 || '] length: ' || $length || ' skip_start: ' || $skip_start"/>
982
983 <xsl:sequence select="$start, $start + $length -1 + xs:integer($skip_start)"/>
984 </xsl:function>
985
986
Akron9a8ee3e2022-01-31 13:51:49 +0100987</xsl:stylesheet>