blob: c70a01d6cfb9eecdf3ab906356d2435c50bed9ce [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
5 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
6 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01007
bansp5e2d1c02022-03-10 04:51:40 +01008 <xsl:param name="corpusID" as="xs:string"/>
9 <xsl:param name="docID" as="xs:string"/>
10 <xsl:param name="textID" as="xs:string"/>
11
bansp8f6700b2022-03-27 05:27:09 +020012 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
13 <!-- the directory containing NKJP files, in the form of a collection of text-level dirs -->
Akron9a8ee3e2022-01-31 13:51:49 +010014
bansp8f6700b2022-03-27 05:27:09 +020015 <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
16
17 <xsl:variable name="targetTextDir_slashed"
18 select="$targetDir || '/' || $corpusID || '/' || $docID || '/' || $textID || '/'" as="xs:string"/>
19
20 <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +010021
22 <xsl:variable name="systemDoctypeI5"
bansp97ba7ce2022-03-26 05:14:06 +010023 select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" as="xs:string"
bansp5e2d1c02022-03-10 04:51:40 +010024 static="true"/>
25
bansp97ba7ce2022-03-26 05:14:06 +010026 <xsl:variable name="publicDoctypeI5" select="'-//IDS//DTD I5 1.0//EN'" as="xs:string"
bansp5e2d1c02022-03-10 04:51:40 +010027 static="true"/>
28
29 <xsl:variable name="KorAP_namespace" select="'http://ids-mannheim.de/ns/KorAP'" static="true"
30 as="xs:string"/>
31
bansp5f841732022-03-16 06:27:31 +010032 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
33 <!-- this is only a bit funny -->
34
35 <xsl:variable name="compoundID" as="xs:string"
36 select="$corpusID || '_' || $docID || '.' || $textID"/>
37 <!-- this is what occurs in the text and data layers as @docid -->
bansp5e2d1c02022-03-10 04:51:40 +010038
39 <xsl:mode name="corpus" on-no-match="deep-skip"/>
40 <xsl:mode name="text" on-no-match="deep-skip"/>
bansp9103aab2022-03-19 05:10:21 +010041 <xsl:mode name="header-text" on-no-match="text-only-copy"/>
bansp5e2d1c02022-03-10 04:51:40 +010042
bansp5f841732022-03-16 06:27:31 +010043 <!-- <xsl:variable name="text_depth" as="xs:integer" select="xs:integer('2')" static="true"/>
44 <!-\- this magic number indicates the depth of the <TEI> element inside teiCorpus/TEI -\->
45-->
46 <xsl:function name="f:compute_nesting" as="xs:integer">
47 <xsl:param name="node" as="node()"/>
48 <xsl:variable name="rel_depth"
49 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
50 as="xs:integer"/>
51<!-- I think my skills are lacking -->
52 <xsl:sequence select="$rel_depth"/>
53 </xsl:function>
54
55 <xsl:function name="f:calc_content_length" as="xs:integer">
56 <xsl:param name="node" as="node()"/>
57 <xsl:choose>
58 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
59 <xsl:variable name="last_corresp"
60 select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
61 as="attribute(corresp)"/>
62 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
63 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
64 </xsl:when>
65 <xsl:when test="$node/self::tei:p">
66 <xsl:variable name="last_corresp"
67 select="$node/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
68 as="attribute(corresp)"/>
69 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
70 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
71 </xsl:when>
72 <xsl:when test="$node/self::tei:s">
73 <xsl:variable name="last_corresp"
74 select="$node/descendant::tei:seg[last()]/attribute::corresp"
75 as="attribute(corresp)"/>
76 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
77 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
78 </xsl:when>
79 <xsl:otherwise>
80 <xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
81 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
82 </xsl:otherwise>
83 </xsl:choose>
84 </xsl:function>
bansp5e2d1c02022-03-10 04:51:40 +010085
bansp9103aab2022-03-19 05:10:21 +010086 <xsl:template match="@default" mode="#all"/>
bansp97ba7ce2022-03-26 05:14:06 +010087 <!-- this is to delete some auto-inserted attribute throughout -->
bansp9103aab2022-03-19 05:10:21 +010088
bansp8f6700b2022-03-27 05:27:09 +020089 <xsl:variable name="collection_params" as="xs:string" static="yes"
90 select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
91 />
92
93<xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
94
bansp5e2d1c02022-03-10 04:51:40 +010095 <xsl:template name="xsl:initial-template">
bansp8f6700b2022-03-27 05:27:09 +020096 <xsl:variable name="text.xml" select="$collection_of_text[1]"/>
97
98 <!-- we only want to call this once, and we process a random NKJP corpus file for that purpose,
99 because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
100 <xsl:call-template name="create_corpus_header">
101 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
102 <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
103 </xsl:call-template>
104
105
106 </xsl:template>
107
108 <xsl:template name="process_single_sample">
109 <xsl:variable name="text.xml" as="document-node()" select="doc($sourceDir || '/text.xml')"/>
bansp5f841732022-03-16 06:27:31 +0100110 <xsl:variable name="ann_morphosyntax.xml" as="document-node()"
bansp8f6700b2022-03-27 05:27:09 +0200111 select="doc($sourceDir || '/ann_morphosyntax.xml')"/>
bansp5f841732022-03-16 06:27:31 +0100112 <xsl:variable name="ann_segmentation.xml" as="document-node()"
bansp8f6700b2022-03-27 05:27:09 +0200113 select="doc($sourceDir || '/ann_segmentation.xml')"/>
bansp5e2d1c02022-03-10 04:51:40 +0100114
115 <xsl:call-template name="create_data">
116 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200117 <xsl:with-param name="target" select="$targetTextDir_slashed || 'data.xml'" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100118 </xsl:call-template>
119
120 <xsl:call-template name="create_struct">
121 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
122 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
123 as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200124 <xsl:with-param name="target" select="$targetTextDir_slashed || 'struct/structure.xml'" as="xs:string"
bansp5f841732022-03-16 06:27:31 +0100125 />
126 </xsl:call-template>
127
128 <xsl:call-template name="create_morpho">
129 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
bansp3e5b20c2022-03-18 20:22:31 +0100130 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
131 as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100132 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
133 as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200134 <xsl:with-param name="target" select="$targetTextDir_slashed || 'nkjp/morpho.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100135 </xsl:call-template>
136
137 <xsl:call-template name="create_text_header">
138 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200139 <xsl:with-param name="target" select="$targetTextDir_slashed || 'header.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100140 </xsl:call-template>
141
bansp5e2d1c02022-03-10 04:51:40 +0100142 </xsl:template>
143
144 <!-- ************************** data.xml ******************* -->
145
146 <xsl:template name="create_data">
147 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100148 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100149 <!-- create the data.xml file -->
150 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100151 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100152
Akron9a8ee3e2022-01-31 13:51:49 +0100153 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100154 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100155 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100156 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
157 <xsl:attribute name="file" select="'metadata.xml'"/>
158 </xsl:element>
159
160 <xsl:element name="text" namespace="{$KorAP_namespace}">
banspf79443e2022-02-25 14:25:33 +0100161 <xsl:value-of select="$text.xml//*[local-name() = 'ab']"/>
bansp5e2d1c02022-03-10 04:51:40 +0100162 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100163 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100164 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100165 </xsl:template>
166
bansp5f841732022-03-16 06:27:31 +0100167 <!-- ************************** struct ******************* -->
168
169 <xsl:template name="create_struct">
170 <xsl:param name="text.xml" as="document-node()"/>
171 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
172 <xsl:param name="target" as="xs:string"/>
173
174 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
175 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
176 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
177 <xsl:element name="layer" namespace="{$KorAP_namespace}">
178 <xsl:attribute name="docid" select="$compoundID"/>
179 <xsl:attribute name="version" select="$KorAP-XML_version"/>
180
181 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
182 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>
183 </xsl:element>
184 </xsl:element>
185 </xsl:result-document>
186 </xsl:template>
187
188 <xsl:template match="tei:*" mode="struct">
189 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
190 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
191 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
192 <!-- I have made a major mess here, but it works... it's so spread out
193 because I wanted to make sure to be able to look up the individual
bansp3e5b20c2022-03-18 20:22:31 +0100194 constituent values, should anything go wrong; optimization will come when it's worked against a larger dataset -->
bansp5f841732022-03-16 06:27:31 +0100195 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
196 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
197 <xsl:variable name="preceding-count" select="count($preceding)"/>
198 <xsl:variable name="outside-preceding-count" as="xs:integer">
199 <xsl:choose>
200 <xsl:when test="self::tei:s or self::tei:p">
201 <xsl:choose>
202 <xsl:when test="$preceding-count">
203 <xsl:sequence select="
204 sum(for $p in $preceding
205 return
206 count($p/descendant::*))"/>
207 </xsl:when>
208 <xsl:otherwise>
209 <xsl:sequence select="0"/>
210 </xsl:otherwise>
211 </xsl:choose>
212 </xsl:when>
213 <xsl:otherwise>
214 <xsl:sequence select="0"/>
215 </xsl:otherwise>
216 </xsl:choose>
217 </xsl:variable>
218 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
219 as="xs:integer"/>
220
221 <xsl:variable name="start" as="xs:integer">
222 <xsl:choose>
223 <xsl:when test="self::tei:text or self::tei:body">
224 <xsl:sequence select="0"/>
225 </xsl:when>
226 <xsl:when test="self::tei:p">
227 <xsl:variable name="first_corresp"
228 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
229 as="attribute(corresp)"/>
230 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
231 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
232 </xsl:when>
233 <xsl:when test="self::tei:s">
234 <xsl:variable name="first_corresp"
235 select="descendant::tei:seg[1]/attribute::corresp"
236 as="attribute(corresp)"/>
237 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
238 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
239 </xsl:when>
240 <xsl:when test="self::tei:seg">
241 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
242 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
243 </xsl:when>
244 </xsl:choose>
245 </xsl:variable>
246 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
247 </xsl:variable>
bansp3e5b20c2022-03-18 20:22:31 +0100248
bansp5f841732022-03-16 06:27:31 +0100249 <xsl:element name="span" namespace="{$KorAP_namespace}">
250 <xsl:attribute name="id" select="'s' || $my_index"/>
251 <xsl:attribute name="from" select="$start"/>
252 <xsl:attribute name="to" select="$end"/>
253 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
254 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100255 <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX -->
bansp5f841732022-03-16 06:27:31 +0100256 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100257 <xsl:attribute name="name" select="'name'"/>
258 <xsl:value-of select="local-name()"/>
bansp5f841732022-03-16 06:27:31 +0100259 </xsl:element>
260 <xsl:if test="count(@*)">
261 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
262 <xsl:attribute name="name" select="'attr'"/>
263 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
264 <xsl:attribute name="type" select="'attr'"/>
265 <xsl:for-each select="@*">
266 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
267 <xsl:attribute name="name" select="local-name(.)"/>
268 <xsl:value-of select="."/>
269 </xsl:element>
270 </xsl:for-each>
271 </xsl:element>
272 </xsl:element>
273 </xsl:if>
274 </xsl:element>
275 </xsl:element>
276 <xsl:apply-templates mode="struct">
277 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
278 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
279 <xsl:with-param name="index" select="$my_index"/>
280 </xsl:apply-templates>
281 </xsl:template>
282
283 <!-- ************************** morpho ******************* -->
284
285 <xsl:template name="create_morpho">
286 <xsl:param name="text.xml" as="document-node()"/>
bansp3e5b20c2022-03-18 20:22:31 +0100287 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100288 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
289 <xsl:param name="target" as="xs:string"/>
290
291 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
292 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
293 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp3e5b20c2022-03-18 20:22:31 +0100294 <xsl:element name="layer" namespace="{$KorAP_namespace}">
295 <xsl:attribute name="docid" select="$compoundID"/>
296 <xsl:attribute name="version" select="$KorAP-XML_version"/>
297
298 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
299 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
300 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
301 </xsl:apply-templates>
302 </xsl:element>
303 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100304 </xsl:result-document>
305 </xsl:template>
306
bansp3e5b20c2022-03-18 20:22:31 +0100307 <xsl:template match="tei:*" mode="morpho">
308 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
309 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
310 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
311 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
312 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
313 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
314 <xsl:variable name="preceding-count" select="count($preceding)"/>
315 <xsl:variable name="outside-preceding-count" as="xs:integer">
316 <xsl:choose>
317 <xsl:when test="self::tei:s or self::tei:p">
318 <xsl:choose>
319 <xsl:when test="$preceding-count">
320 <xsl:sequence select="
321 sum(for $p in $preceding
322 return
323 count($p/descendant::*))"/>
324 </xsl:when>
325 <xsl:otherwise>
326 <xsl:sequence select="0"/>
327 </xsl:otherwise>
328 </xsl:choose>
329 </xsl:when>
330 <xsl:otherwise>
331 <xsl:sequence select="0"/>
332 </xsl:otherwise>
333 </xsl:choose>
334 </xsl:variable>
335 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
336 as="xs:integer"/>
337
338 <xsl:variable name="start" as="xs:integer">
339 <xsl:choose>
340 <xsl:when test="self::tei:text or self::tei:body">
341 <xsl:sequence select="0"/>
342 </xsl:when>
343 <xsl:when test="self::tei:p">
344 <xsl:variable name="first_corresp"
345 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
346 as="attribute(corresp)"/>
347 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
348 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
349 </xsl:when>
350 <xsl:when test="self::tei:s">
351 <xsl:variable name="first_corresp"
352 select="descendant::tei:seg[1]/attribute::corresp"
353 as="attribute(corresp)"/>
354 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
355 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
356 </xsl:when>
357 <!--<xsl:when test="self::tei:seg">
358 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
359 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
360 </xsl:when>-->
361 </xsl:choose>
362 </xsl:variable>
363 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
364 </xsl:variable>
365
366 <xsl:apply-templates mode="morpho">
367 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
368 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
369 <xsl:with-param name="index" select="$my_index"/>
370 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
371 </xsl:apply-templates>
372 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100373
bansp3e5b20c2022-03-18 20:22:31 +0100374 <xsl:template match="tei:seg" mode="morpho">
375 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
376 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
377 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
378 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
379 <!-- I have made a major mess here, but it works... it's so spread out
380 because I wanted to make sure to be able to look up the individual
381 constituent values, should anything go wrong -->
382 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
383 <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
384 <xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
385 <xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
386 <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
387 <xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
388 <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
389 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
390 <xsl:variable name="preceding-count" select="count($preceding)"/>
391 <xsl:variable name="outside-preceding-count" as="xs:integer">
392 <xsl:choose>
393 <xsl:when test="self::tei:s or self::tei:p">
394 <xsl:choose>
395 <xsl:when test="$preceding-count">
396 <xsl:sequence select="
397 sum(for $p in $preceding
398 return
399 count($p/descendant::*))"/>
400 </xsl:when>
401 <xsl:otherwise>
402 <xsl:sequence select="0"/>
403 </xsl:otherwise>
404 </xsl:choose>
405 </xsl:when>
406 <xsl:otherwise>
407 <xsl:sequence select="0"/>
408 </xsl:otherwise>
409 </xsl:choose>
410 </xsl:variable>
411 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
412 as="xs:integer"/>
413
414 <xsl:variable name="start" as="xs:integer">
415 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
416 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
417 </xsl:variable>
418 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
419 </xsl:variable>
420 <xsl:element name="span" namespace="{$KorAP_namespace}">
421 <xsl:attribute name="id" select="'s' || $my_index"/>
422 <xsl:attribute name="from" select="$start"/>
423 <xsl:attribute name="to" select="$end"/>
424 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
425 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
426 <xsl:attribute name="type" select="'lex'"/>
427 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
428 <xsl:attribute name="name" select="'lex'"/>
429 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
430 <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
431
432
433 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
434 <xsl:attribute name="name" select="'lemma'"/>
435 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
436 </xsl:element>
437 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
438 <xsl:attribute name="name" select="'pos'"/>
439 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
440 </xsl:element>
441 <xsl:if test="string-length($chosen-msd)">
442 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
443 <xsl:attribute name="name" select="'msd'"/>
444 <xsl:value-of select="$chosen-msd"/>
445 </xsl:element>
446 </xsl:if>
447 <xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
448 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
449 <xsl:attribute name="name" select="'join'"/>
450 <xsl:value-of select="'left'"/>
451 </xsl:element>
452 </xsl:if>
453 </xsl:element>
454 </xsl:element>
455 </xsl:element>
456 </xsl:element>
457 <xsl:apply-templates mode="morpho">
458 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
459 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
460 <xsl:with-param name="index" select="$my_index"/>
461 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
462 </xsl:apply-templates>
463 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100464 <!-- ************************** TEXT header ******************* -->
465
466 <xsl:template name="create_text_header">
467 <xsl:param name="text.xml" as="document-node()"/>
468 <xsl:param name="target" as="xs:string"/>
469
470 <!-- create the local header.xml file -->
471 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
472 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
473
474 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
bansp9103aab2022-03-19 05:10:21 +0100475 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text"/>
bansp5f841732022-03-16 06:27:31 +0100476 </idsHeader>
477 </xsl:result-document>
478 </xsl:template>
479
480 <xsl:template match="tei:fileDesc" mode="text">
bansp9103aab2022-03-19 05:10:21 +0100481 <xsl:element name="{local-name()}">
bansp5f841732022-03-16 06:27:31 +0100482 <xsl:apply-templates mode="text"/>
bansp9103aab2022-03-19 05:10:21 +0100483 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100484 </xsl:template>
485
486 <xsl:template match="tei:title" mode="text">
487 <t.title>
488 <xsl:apply-templates/>
489 </t.title>
490 </xsl:template>
491
492 <xsl:template match="tei:titleStmt" mode="text">
493 <titleStmt>
494 <textSigle>
banspba37fb92022-03-22 03:34:48 +0100495 <xsl:value-of select="$corpusID || '/' || $docID || '.' || $textID"/>
bansp5f841732022-03-16 06:27:31 +0100496 </textSigle>
497 <xsl:apply-templates mode="text"/>
498 </titleStmt>
499 </xsl:template>
500
bansp9103aab2022-03-19 05:10:21 +0100501 <xsl:template match="tei:publicationStmt" mode="text">
502 <xsl:element name="{local-name()}">
503 <xsl:apply-templates mode="text"/>
504 </xsl:element>
505 </xsl:template>
506
507 <xsl:template match="tei:availability" mode="text">
508 <xsl:element name="{local-name()}">
509 <xsl:apply-templates mode="text" select="@* | *"/>
510 </xsl:element>
511 </xsl:template>
512
513 <xsl:template match="tei:profileDesc" mode="text">
514 <xsl:element name="{local-name()}">
515 <xsl:apply-templates mode="text"/>
516 </xsl:element>
517 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100518
bansp9103aab2022-03-19 05:10:21 +0100519 <xsl:template match="tei:textClass" mode="text">
520 <xsl:element name="{local-name()}">
521 <xsl:apply-templates mode="text" select="@* | *"/>
522 </xsl:element>
523 </xsl:template>
524
525 <xsl:template match="tei:catRef" mode="text corpus">
526 <xsl:element name="{local-name()}">
527 <xsl:apply-templates mode="text" select="@* | *"/>
528 </xsl:element>
529 </xsl:template>
530
531 <xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
532 <xsl:copy-of select="."/>
533 </xsl:template>
534
535 <xsl:template match="tei:p" mode="text corpus">
536 <xsl:element name="{local-name()}">
537 <xsl:apply-templates mode="header-text"/>
538 </xsl:element>
539 </xsl:template>
540
541
542 <!-- OPTIMIZATION has to take modes into account -->
bansp5e2d1c02022-03-10 04:51:40 +0100543 <!-- ************************** CORPUS header ******************* -->
544 <xsl:template name="create_corpus_header">
545 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100546 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100547
548 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100549 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100550
551 <!--doctype-public="{$publicDoctypeI5}"
552 doctype-system="{$systemDoctypeI5}">
553 these are, sadly, useless
554 -->
555
556 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
bansp9103aab2022-03-19 05:10:21 +0100557 <xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
bansp5e2d1c02022-03-10 04:51:40 +0100558 </idsHeader>
559 </xsl:result-document>
560 </xsl:template>
561
562 <xsl:template match="tei:fileDesc" mode="corpus">
bansp9103aab2022-03-19 05:10:21 +0100563 <xsl:element name="{local-name()}">
bansp5e2d1c02022-03-10 04:51:40 +0100564 <xsl:apply-templates mode="corpus"/>
bansp9103aab2022-03-19 05:10:21 +0100565 </xsl:element>
bansp5e2d1c02022-03-10 04:51:40 +0100566 </xsl:template>
bansp9103aab2022-03-19 05:10:21 +0100567
bansp5e2d1c02022-03-10 04:51:40 +0100568
569 <xsl:template match="tei:title" mode="corpus">
570 <c.title>
bansp9103aab2022-03-19 05:10:21 +0100571 <xsl:apply-templates mode="corpus" select="@*"/>
572 <xsl:apply-templates mode="header-text"/>
bansp5e2d1c02022-03-10 04:51:40 +0100573 </c.title>
574 </xsl:template>
575
576 <xsl:template match="tei:titleStmt" mode="corpus">
577 <titleStmt>
578 <korpusSigle>
579 <xsl:value-of select="$corpusID"/>
580 </korpusSigle>
581 <xsl:apply-templates mode="corpus"/>
582 </titleStmt>
583 </xsl:template>
584
bansp9103aab2022-03-19 05:10:21 +0100585 <xsl:template match="tei:publicationStmt" mode="corpus">
586 <xsl:element name="{local-name()}">
587 <xsl:apply-templates mode="corpus"/>
588 </xsl:element>
589 </xsl:template>
590
591 <xsl:template match="tei:availability" mode="corpus">
592 <xsl:element name="{local-name()}">
593 <xsl:apply-templates mode="corpus" select="@* | *"/>
594 </xsl:element>
595 </xsl:template>
596
597 <xsl:template match="tei:encodingDesc" mode="corpus">
598 <xsl:element name="{local-name()}">
599 <xsl:apply-templates mode="corpus"/>
600 </xsl:element>
601 </xsl:template>
602
603 <xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
604 <xsl:element name="{local-name()}">
605 <xsl:apply-templates mode="corpus" select="@* | *"/>
606 </xsl:element>
607 </xsl:template>
608
609 <xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
610 <xsl:element name="{local-name()}">
611 <xsl:apply-templates mode="corpus" select="@*"/>
612 <xsl:apply-templates mode="header-text"/>
613 </xsl:element>
614 </xsl:template>
615<!--
616 <xsl:template match="tei:textClass" mode="corpus">
617 <xsl:element name="{local-name()}">
618 <xsl:apply-templates mode="corpus" select="@* | *"/>
619 </xsl:element>
620 </xsl:template>
621
622 <xsl:template match="tei:catRef" mode="corpus">
623 <xsl:element name="{local-name()}">
624 <xsl:apply-templates mode="corpus" select="@* | *"/>
625 </xsl:element>
626 </xsl:template>
627-->
bansp5e2d1c02022-03-10 04:51:40 +0100628
629
630
631 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
632 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
633 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
634
635 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
636 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
637
638 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
639 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
640 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
641 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
642
643 -->
644 <!--<xsl:template name="test_full">
645 <xsl:param name="corpusID"/>
646 <xsl:param name="docID"/>
647 <xsl:param name="textID"/>
648 <xsl:call-template name="xsl:initial-template"/>
649 </xsl:template>-->
650
Akron9a8ee3e2022-01-31 13:51:49 +0100651</xsl:stylesheet>