blob: d8babb589493d3bc126a74a9b8a47715b5a70b89 [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
5 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
6 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01007
banspe726b4a2022-03-28 05:47:45 +02008
9<!-- PARAMETERS -->
bansp5e2d1c02022-03-10 04:51:40 +010010
bansp8f6700b2022-03-27 05:27:09 +020011 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020012 <!-- the directory containing NKJP files, in the form of a collection of text-level dirs
13 (that is how we know both the $corpusID and the $docID) -->
Akron9a8ee3e2022-01-31 13:51:49 +010014
bansp8f6700b2022-03-27 05:27:09 +020015 <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020016 <!-- where the corpus/document/text/annotations hierarchy is going to be created -->
banspf2b24e62022-03-28 18:12:08 +020017
18 <xsl:param name="skip_docID" as="xs:string">
banspb5992532022-03-29 15:55:44 +020019 <xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>
20 </xsl:param>
21 <!-- comma-separated list of document IDs to be skipped from processing
banspf2b24e62022-03-28 18:12:08 +020022 example: HellerPodgladanie,KOT
banspd1bf1db2022-04-04 02:16:24 +020023 no functionality beyond string identity is supported
24 (this is just for testing) -->
banspb5992532022-03-29 15:55:44 +020025
bansp8f6700b2022-03-27 05:27:09 +020026
banspe726b4a2022-03-28 05:47:45 +020027<!-- VARIABLES -->
28
29 <xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
30 <xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
bansp8f6700b2022-03-27 05:27:09 +020031
32 <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +010033
banspd1bf1db2022-04-04 02:16:24 +020034 <xsl:variable name="systemDoctypeI5" as="xs:string"
35 select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" static="true"/>
bansp5e2d1c02022-03-10 04:51:40 +010036
banspd1bf1db2022-04-04 02:16:24 +020037 <xsl:variable name="publicDoctypeI5" as="xs:string" static="true"
38 select="'-//IDS//DTD I5 1.0//EN'"/>
bansp5e2d1c02022-03-10 04:51:40 +010039
banspd1bf1db2022-04-04 02:16:24 +020040 <xsl:variable name="KorAP_namespace" static="true" as="xs:string"
41 select="'http://ids-mannheim.de/ns/KorAP'"/>
bansp5e2d1c02022-03-10 04:51:40 +010042
bansp5f841732022-03-16 06:27:31 +010043 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
44 <!-- this is only a bit funny -->
45
banspe726b4a2022-03-28 05:47:45 +020046 <xsl:variable name="collection_params" as="xs:string" static="yes"
47 select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
48 />
49 <!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
50
51 <xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
banspd1bf1db2022-04-04 02:16:24 +020052
53<!-- these two 'flags' are meant to increase the readability of the code
54 they are used for the output of the calc_offsets() function, where the
55 returned value is a sequence, (start, end) -->
56 <xsl:variable name="OFFSET_START" as="xs:integer" static="yes" select="1"/>
57 <xsl:variable name="OFFSET_END" as="xs:integer" static="yes" select="2"/>
banspb5992532022-03-29 15:55:44 +020058
59
banspe726b4a2022-03-28 05:47:45 +020060<!-- MODES -->
bansp5e2d1c02022-03-10 04:51:40 +010061
62 <xsl:mode name="corpus" on-no-match="deep-skip"/>
63 <xsl:mode name="text" on-no-match="deep-skip"/>
bansp9103aab2022-03-19 05:10:21 +010064 <xsl:mode name="header-text" on-no-match="text-only-copy"/>
bansp5e2d1c02022-03-10 04:51:40 +010065
banspe726b4a2022-03-28 05:47:45 +020066
67 <!-- FUNCTIONS -->
68
bansp5f841732022-03-16 06:27:31 +010069 <xsl:function name="f:compute_nesting" as="xs:integer">
banspd1bf1db2022-04-04 02:16:24 +020070 <xsl:param name="node" as="element()"/>
bansp5f841732022-03-16 06:27:31 +010071 <xsl:variable name="rel_depth"
72 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
73 as="xs:integer"/>
bansp5f841732022-03-16 06:27:31 +010074 <xsl:sequence select="$rel_depth"/>
75 </xsl:function>
76
banspd1bf1db2022-04-04 02:16:24 +020077
78 <xsl:function name="f:calc_offsets" as="xs:integer+">
79 <xsl:param name="node" as="element()"/>
80 <xsl:param name="skip_start" as="xs:boolean" />
81
82 <xsl:variable name="start" as="xs:integer">
83 <xsl:choose>
84
85 <xsl:when test="$skip_start or $node/self::tei:text or $node/self::tei:body">
86 <xsl:sequence select="0"/>
87 </xsl:when>
88
89 <!-- handle p -->
90
91 <xsl:when test="$node/self::tei:p">
92 <xsl:variable name="my_pos" as="xs:integer" select="count($node/preceding-sibling::tei:p) + 1"/>
93 <xsl:variable name="preceding" as="node()*"
94 select="$node/ancestor::tei:body/tei:p[position() lt $my_pos]"/>
95
96 <xsl:choose>
97 <xsl:when test="count($preceding) eq 0">
98 <xsl:sequence select="0"/>
99 </xsl:when>
100 <xsl:otherwise>
101 <xsl:sequence select="sum(f:calc_offsets($preceding[last()],true())[$OFFSET_END],1)"/>
102
103<!-- BUG danger: I am not sure if a "1" should rather be added after each p; let me try to handle that in the return value of the $length variable,
104 and make it sensitive to the skip_start parameter
105
106 I will then have to remove the ",1" from here!
107
108 -->
109
110<!-- <xsl:variable name="last_corresps"
111 select="$preceding/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
112 as="attribute(corresp)+"/>
113 <xsl:variable name="end_offsets" as="xs:integer+">
114 <xsl:for-each select="$last_corresps">
115 <xsl:variable name="numbers"
116 select="substring-after(substring-before(., ')'), ',')"/>
117 <xsl:sequence
118 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
119 />
120 </xsl:for-each>
121 </xsl:variable>
122 <xsl:sequence select="sum($end_offsets, 1)"/>
123
124 this is a non-recursive variant that may turn out to be much less cpu-intensive, not sure
125 - but if it's plugged in, it will have to be adjusted to the current form of the recursive variant,
126 because it hasn't been maintained since it got commented out
127 -->
128 </xsl:otherwise>
129 </xsl:choose>
130 </xsl:when>
131
132 <!-- handle s -->
133
134<!-- the value for s gets counted since the start of the current p
135 - so we look at the preceding s's
136 + the preceding p's
137 -->
138 <xsl:when test="$node/self::tei:s">
139 <!--<xsl:variable name="last_corresp"
140 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
141 as="attribute(corresp)"/>
142 <xsl:variable name="numbers"
143 select="substring-after(substring-before($last_corresp, ')'), ',')"/>
144 <xsl:sequence
145 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
146 />
147 -->
148
149 <xsl:variable name="internal_start" as="xs:integer">
150 <xsl:variable name="my_pos" as="xs:integer" select="count($node/preceding-sibling::tei:s) + 1"/>
151 <xsl:variable name="preceding" as="node()*"
152 select="$node/ancestor::tei:p[1]/tei:s[position() lt $my_pos]"/>
153
154 <xsl:choose>
155 <xsl:when test="count($preceding) eq 0">
156 <xsl:sequence select="0"/>
157 </xsl:when>
158 <xsl:otherwise>
159 <xsl:sequence select="sum(f:calc_offsets($preceding[last()],true())[$OFFSET_END],1)"/>
160 <!-- again, CAREFUL ABOUT THE +1, it might need to vanish -->
161 </xsl:otherwise>
162 </xsl:choose>
163 </xsl:variable>
164
165 <xsl:variable name="external_start" as="xs:integer" select="f:calc_offsets($node/ancestor::tei:p[1],true())"/>
166
167 <xsl:sequence select="$internal_start + $external_start"/>
168 </xsl:when>
169
170 <!-- handle seg -->
171
172 <xsl:when test="$node/self::tei:seg">
173 <!-- for segs, the s elements are irrelevant, and the local offset is immediately available on the @corresp -->
174
175 <xsl:variable name="numbers"
176 select="substring-after(substring-before($node/@corresp, ')'), ',')"/>
177
178 <xsl:variable name="internal_start" select="xs:integer(substring-before($numbers, ','))"
179 as="xs:integer"/>
180 <xsl:variable name="external_start" as="xs:integer"
181 select="f:calc_offsets($node/ancestor::tei:p[1], true())"/>
182
183 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
184
185 <xsl:message select="'numbers: ' || $numbers"/>
186 </xsl:if>
187 <xsl:sequence select="$internal_start + $external_start"/>
188 </xsl:when>
189 </xsl:choose>
190 </xsl:variable>
191
192 <xsl:variable name="length" as="xs:integer">
193 <xsl:choose>
194
195 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
196 <xsl:variable name="last_corresps"
197 select="$node/descendant::tei:p/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
198 as="attribute(corresp)+"/>
199
200 <xsl:variable name="end_offsets" as="xs:integer+">
201 <xsl:for-each select="$last_corresps">
202 <xsl:variable name="numbers" select="substring-after(substring-before(., ')'), ',')"/>
203 <xsl:sequence
204 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
205 />
206 </xsl:for-each>
207 </xsl:variable>
208
209 <xsl:sequence select="sum($end_offsets)"/>
210
211 </xsl:when>
212 <xsl:when test="$node/self::tei:p">
213 <xsl:variable name="last_corresps"
214 select="$node/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
215 as="attribute(corresp)+"/>
216 <xsl:variable name="end_offsets" as="xs:integer+">
217 <xsl:for-each select="$last_corresps">
218 <xsl:variable name="numbers" select="substring-after(substring-before(., ')'), ',')"/>
219 <xsl:sequence
220 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
221 />
222 </xsl:for-each>
223 </xsl:variable>
224 <xsl:sequence select="sum($end_offsets)"/>
225 </xsl:when>
226
227
228
229
230 <xsl:when test="$node/self::tei:s">
231 <xsl:variable name="last_corresp"
232 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
233 as="attribute(corresp)"/>
234 <xsl:variable name="numbers"
235 select="substring-after(substring-before($last_corresp, ')'), ',')"/>
236 <xsl:sequence
237 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
238 />
239 </xsl:when>
240 <xsl:otherwise>
241 <xsl:variable name="numbers"
242 select="substring-after(substring-before($node/@corresp, ')'), ',')"/>
243 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
244 <!-- REMOVE THIS -->
245 <xsl:message select="$numbers"/>
246 </xsl:if>
247 <xsl:sequence
248 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
249 />
250 </xsl:otherwise>
251 </xsl:choose>
252 </xsl:variable>
253
254 <xsl:message select="'length: ' || $length"/>
255
256 <xsl:sequence select="$start, $start + $length -1"/>
257 </xsl:function>
258
bansp5f841732022-03-16 06:27:31 +0100259 <xsl:function name="f:calc_content_length" as="xs:integer">
260 <xsl:param name="node" as="node()"/>
261 <xsl:choose>
262 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
263 <xsl:variable name="last_corresp"
banspd1bf1db2022-04-04 02:16:24 +0200264 select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
bansp5f841732022-03-16 06:27:31 +0100265 as="attribute(corresp)"/>
266 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
267 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
268 </xsl:when>
269 <xsl:when test="$node/self::tei:p">
270 <xsl:variable name="last_corresp"
banspd1bf1db2022-04-04 02:16:24 +0200271 select="$node/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
bansp5f841732022-03-16 06:27:31 +0100272 as="attribute(corresp)"/>
273 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
274 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
275 </xsl:when>
276 <xsl:when test="$node/self::tei:s">
277 <xsl:variable name="last_corresp"
banspd1bf1db2022-04-04 02:16:24 +0200278 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
bansp5f841732022-03-16 06:27:31 +0100279 as="attribute(corresp)"/>
280 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
281 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
282 </xsl:when>
283 <xsl:otherwise>
284 <xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
banspd1bf1db2022-04-04 02:16:24 +0200285 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
286 <!-- REMOVE THIS -->
287 <xsl:message select="$numbers"/>
288 </xsl:if>
bansp5f841732022-03-16 06:27:31 +0100289 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
290 </xsl:otherwise>
291 </xsl:choose>
292 </xsl:function>
bansp5e2d1c02022-03-10 04:51:40 +0100293
banspb5992532022-03-29 15:55:44 +0200294
295<!-- UTILITY TEMPLATES -->
296
bansp9103aab2022-03-19 05:10:21 +0100297 <xsl:template match="@default" mode="#all"/>
bansp97ba7ce2022-03-26 05:14:06 +0100298 <!-- this is to delete some auto-inserted attribute throughout -->
bansp9103aab2022-03-19 05:10:21 +0100299
banspe726b4a2022-03-28 05:47:45 +0200300 <xsl:template match="tei:w" mode="#all"/>
301<!-- NKJP-SGJP has apparently resigned from standoff representations by adding <w> everywhere;
302 for the time being, we'll just stick to the standoff offsets, although that may need to
303 be revisited as the NKJP format has now began to stray from its schemas and assumptions -->
bansp8f6700b2022-03-27 05:27:09 +0200304
banspe726b4a2022-03-28 05:47:45 +0200305 <xsl:template match="tei:choice" mode="#all"/>
306<!-- THIS IS ONLY TEMPORARY,
307 because an interesting challenge came up where I will
308 probably have to abandon straightforward mapping because of TOKENIZATION alternatives;
309
310 but now, I just want this stylesheet to work, even if it eats some occasional token (which it now does, 'komuÅ›' and 'czym' vanish)
311 -->
bansp8f6700b2022-03-27 05:27:09 +0200312
banspb5992532022-03-29 15:55:44 +0200313
314 <!-- MAIN PROCESSING -->
315
316
bansp5e2d1c02022-03-10 04:51:40 +0100317 <xsl:template name="xsl:initial-template">
banspf2b24e62022-03-28 18:12:08 +0200318 <xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
banspd1bf1db2022-04-04 02:16:24 +0200319
banspe726b4a2022-03-28 05:47:45 +0200320 <!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
bansp8f6700b2022-03-27 05:27:09 +0200321 because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
322 <xsl:call-template name="create_corpus_header">
banspe726b4a2022-03-28 05:47:45 +0200323 <xsl:with-param name="text.xml" select="$collection_of_text[1]" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200324 <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
325 </xsl:call-template>
326
banspe726b4a2022-03-28 05:47:45 +0200327 <xsl:for-each select="$collection_of_text">
328 <xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
329 <xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
330 <xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
331 <xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
332
banspf2b24e62022-03-28 18:12:08 +0200333 <xsl:choose>
334 <xsl:when test="$my_textID = $IDs_to_skip"/>
335 <xsl:otherwise>
banspd1bf1db2022-04-04 02:16:24 +0200336
337 <xsl:message select="f:calc_offsets(doc($ann_segmentation.uri)//tei:body/tei:p[4],false())"/>
338
339 <!--<xsl:call-template name="process_single_sample">
banspf2b24e62022-03-28 18:12:08 +0200340 <xsl:with-param name="text.xml" as="document-node()" select="."/>
341 <xsl:with-param name="ann_morphosyntax.xml" as="document-node()"
342 select="doc($ann_morphosyntax.uri)"/>
343 <xsl:with-param name="ann_segmentation.xml" as="document-node()"
344 select="doc($ann_segmentation.uri)"/>
345 <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +0200346 </xsl:call-template>-->
banspf2b24e62022-03-28 18:12:08 +0200347 </xsl:otherwise>
348 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200349 </xsl:for-each>
bansp8f6700b2022-03-27 05:27:09 +0200350 </xsl:template>
351
352 <xsl:template name="process_single_sample">
banspe726b4a2022-03-28 05:47:45 +0200353 <xsl:param name="text.xml" as="document-node()"/>
354 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
355 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspd1bf1db2022-04-04 02:16:24 +0200356 <xsl:param name="my_textID" as="xs:string" select="'0-BAD_textID'"/>
banspe726b4a2022-03-28 05:47:45 +0200357
358 <xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
359
360 <xsl:variable name="compoundID" as="xs:string"
361 select="$corpusID || '_' || $docID || '.' || $my_textID"/>
362 <!-- this is what occurs in the text and data layers as @docid -->
363
364
bansp5e2d1c02022-03-10 04:51:40 +0100365 <xsl:call-template name="create_data">
366 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200367 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
368 <xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100369 </xsl:call-template>
370
371 <xsl:call-template name="create_struct">
372 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200373 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100374 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
375 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200376 <xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
bansp5f841732022-03-16 06:27:31 +0100377 />
378 </xsl:call-template>
379
380 <xsl:call-template name="create_morpho">
381 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200382 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100383 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
384 as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100385 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
386 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200387 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100388 </xsl:call-template>
389
390 <xsl:call-template name="create_text_header">
391 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200392 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
393 <xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100394 </xsl:call-template>
395
bansp5e2d1c02022-03-10 04:51:40 +0100396 </xsl:template>
397
398 <!-- ************************** data.xml ******************* -->
399
400 <xsl:template name="create_data">
401 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200402 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100403 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100404 <!-- create the data.xml file -->
405 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100406 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100407
Akron9a8ee3e2022-01-31 13:51:49 +0100408 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100409 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100410 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100411 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
412 <xsl:attribute name="file" select="'metadata.xml'"/>
413 </xsl:element>
414
415 <xsl:element name="text" namespace="{$KorAP_namespace}">
banspd1bf1db2022-04-04 02:16:24 +0200416 <!--<xsl:value-of select="$text.xml//*[local-name() = 'ab']"/>-->
417 <xsl:apply-templates select="$text.xml//*[local-name() = 'ab']"/>
bansp5e2d1c02022-03-10 04:51:40 +0100418 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100419 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100420 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100421 </xsl:template>
422
bansp5f841732022-03-16 06:27:31 +0100423 <!-- ************************** struct ******************* -->
424
425 <xsl:template name="create_struct">
426 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200427 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100428 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
429 <xsl:param name="target" as="xs:string"/>
430
431 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
432 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
433 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
434 <xsl:element name="layer" namespace="{$KorAP_namespace}">
435 <xsl:attribute name="docid" select="$compoundID"/>
436 <xsl:attribute name="version" select="$KorAP-XML_version"/>
437
438 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
439 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>
440 </xsl:element>
441 </xsl:element>
442 </xsl:result-document>
443 </xsl:template>
444
445 <xsl:template match="tei:*" mode="struct">
446 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
447 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
448 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
449 <!-- I have made a major mess here, but it works... it's so spread out
450 because I wanted to make sure to be able to look up the individual
bansp3e5b20c2022-03-18 20:22:31 +0100451 constituent values, should anything go wrong; optimization will come when it's worked against a larger dataset -->
bansp5f841732022-03-16 06:27:31 +0100452 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
453 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
454 <xsl:variable name="preceding-count" select="count($preceding)"/>
455 <xsl:variable name="outside-preceding-count" as="xs:integer">
456 <xsl:choose>
457 <xsl:when test="self::tei:s or self::tei:p">
458 <xsl:choose>
459 <xsl:when test="$preceding-count">
460 <xsl:sequence select="
461 sum(for $p in $preceding
462 return
463 count($p/descendant::*))"/>
464 </xsl:when>
465 <xsl:otherwise>
466 <xsl:sequence select="0"/>
467 </xsl:otherwise>
468 </xsl:choose>
469 </xsl:when>
470 <xsl:otherwise>
471 <xsl:sequence select="0"/>
472 </xsl:otherwise>
473 </xsl:choose>
474 </xsl:variable>
475 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
476 as="xs:integer"/>
banspb5992532022-03-29 15:55:44 +0200477
478
479 <!--<xsl:copy select="//tei:seg[count(@nkjp:rejected) ne 0 and @nkjp:rejected ne 'true']"></xsl:copy>-->
bansp5f841732022-03-16 06:27:31 +0100480
481 <xsl:variable name="start" as="xs:integer">
482 <xsl:choose>
483 <xsl:when test="self::tei:text or self::tei:body">
484 <xsl:sequence select="0"/>
485 </xsl:when>
486 <xsl:when test="self::tei:p">
487 <xsl:variable name="first_corresp"
488 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
489 as="attribute(corresp)"/>
490 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
491 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
492 </xsl:when>
493 <xsl:when test="self::tei:s">
494 <xsl:variable name="first_corresp"
495 select="descendant::tei:seg[1]/attribute::corresp"
496 as="attribute(corresp)"/>
497 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
498 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
499 </xsl:when>
500 <xsl:when test="self::tei:seg">
501 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
502 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
503 </xsl:when>
504 </xsl:choose>
505 </xsl:variable>
506 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
507 </xsl:variable>
bansp3e5b20c2022-03-18 20:22:31 +0100508
bansp5f841732022-03-16 06:27:31 +0100509 <xsl:element name="span" namespace="{$KorAP_namespace}">
510 <xsl:attribute name="id" select="'s' || $my_index"/>
511 <xsl:attribute name="from" select="$start"/>
512 <xsl:attribute name="to" select="$end"/>
513 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
514 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100515 <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX -->
bansp5f841732022-03-16 06:27:31 +0100516 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100517 <xsl:attribute name="name" select="'name'"/>
518 <xsl:value-of select="local-name()"/>
bansp5f841732022-03-16 06:27:31 +0100519 </xsl:element>
520 <xsl:if test="count(@*)">
521 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
522 <xsl:attribute name="name" select="'attr'"/>
523 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
524 <xsl:attribute name="type" select="'attr'"/>
525 <xsl:for-each select="@*">
526 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
527 <xsl:attribute name="name" select="local-name(.)"/>
528 <xsl:value-of select="."/>
529 </xsl:element>
530 </xsl:for-each>
531 </xsl:element>
532 </xsl:element>
533 </xsl:if>
534 </xsl:element>
535 </xsl:element>
536 <xsl:apply-templates mode="struct">
537 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
538 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
539 <xsl:with-param name="index" select="$my_index"/>
540 </xsl:apply-templates>
541 </xsl:template>
542
543 <!-- ************************** morpho ******************* -->
544
545 <xsl:template name="create_morpho">
546 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200547 <xsl:param name="compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100548 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100549 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
550 <xsl:param name="target" as="xs:string"/>
551
552 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
553 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
554 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp3e5b20c2022-03-18 20:22:31 +0100555 <xsl:element name="layer" namespace="{$KorAP_namespace}">
556 <xsl:attribute name="docid" select="$compoundID"/>
557 <xsl:attribute name="version" select="$KorAP-XML_version"/>
558
559 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
560 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
561 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
562 </xsl:apply-templates>
563 </xsl:element>
564 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100565 </xsl:result-document>
566 </xsl:template>
567
bansp3e5b20c2022-03-18 20:22:31 +0100568 <xsl:template match="tei:*" mode="morpho">
569 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
570 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
571 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
572 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
573 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
574 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
575 <xsl:variable name="preceding-count" select="count($preceding)"/>
576 <xsl:variable name="outside-preceding-count" as="xs:integer">
577 <xsl:choose>
578 <xsl:when test="self::tei:s or self::tei:p">
579 <xsl:choose>
580 <xsl:when test="$preceding-count">
581 <xsl:sequence select="
582 sum(for $p in $preceding
583 return
584 count($p/descendant::*))"/>
585 </xsl:when>
586 <xsl:otherwise>
587 <xsl:sequence select="0"/>
588 </xsl:otherwise>
589 </xsl:choose>
590 </xsl:when>
591 <xsl:otherwise>
592 <xsl:sequence select="0"/>
593 </xsl:otherwise>
594 </xsl:choose>
595 </xsl:variable>
596 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
597 as="xs:integer"/>
598
599 <xsl:variable name="start" as="xs:integer">
600 <xsl:choose>
601 <xsl:when test="self::tei:text or self::tei:body">
602 <xsl:sequence select="0"/>
603 </xsl:when>
604 <xsl:when test="self::tei:p">
605 <xsl:variable name="first_corresp"
606 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
607 as="attribute(corresp)"/>
608 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
609 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
610 </xsl:when>
611 <xsl:when test="self::tei:s">
612 <xsl:variable name="first_corresp"
613 select="descendant::tei:seg[1]/attribute::corresp"
614 as="attribute(corresp)"/>
615 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
616 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
617 </xsl:when>
618 <!--<xsl:when test="self::tei:seg">
619 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
620 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
621 </xsl:when>-->
622 </xsl:choose>
623 </xsl:variable>
624 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
625 </xsl:variable>
626
627 <xsl:apply-templates mode="morpho">
628 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
629 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
630 <xsl:with-param name="index" select="$my_index"/>
631 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
632 </xsl:apply-templates>
633 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100634
bansp3e5b20c2022-03-18 20:22:31 +0100635 <xsl:template match="tei:seg" mode="morpho">
636 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
637 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
638 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
639 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
640 <!-- I have made a major mess here, but it works... it's so spread out
641 because I wanted to make sure to be able to look up the individual
642 constituent values, should anything go wrong -->
643 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
644 <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
645 <xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
646 <xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
647 <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
648 <xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
649 <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
650 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
651 <xsl:variable name="preceding-count" select="count($preceding)"/>
banspe726b4a2022-03-28 05:47:45 +0200652 <!--<xsl:variable name="outside-preceding-count" as="xs:integer">
bansp3e5b20c2022-03-18 20:22:31 +0100653 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200654 <xsl:when test="self::tei:s or self::tei:p"> <!-\- THIS NEEDS TO BE REVISITED AFTER THIS TEMPLATE HAS BECOME MORE SPECIFIC -\->
bansp3e5b20c2022-03-18 20:22:31 +0100655 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200656 <xsl:when test="$preceding-count"> commented out for now
bansp3e5b20c2022-03-18 20:22:31 +0100657 <xsl:sequence select="
658 sum(for $p in $preceding
659 return
660 count($p/descendant::*))"/>
661 </xsl:when>
662 <xsl:otherwise>
663 <xsl:sequence select="0"/>
664 </xsl:otherwise>
665 </xsl:choose>
666 </xsl:when>
667 <xsl:otherwise>
668 <xsl:sequence select="0"/>
669 </xsl:otherwise>
670 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200671 </xsl:variable>-->
672 <xsl:variable name="my_index" select="$index + 1 + $preceding-count" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100673
674 <xsl:variable name="start" as="xs:integer">
675 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
676 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
677 </xsl:variable>
678 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
679 </xsl:variable>
680 <xsl:element name="span" namespace="{$KorAP_namespace}">
681 <xsl:attribute name="id" select="'s' || $my_index"/>
682 <xsl:attribute name="from" select="$start"/>
683 <xsl:attribute name="to" select="$end"/>
684 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
685 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
686 <xsl:attribute name="type" select="'lex'"/>
687 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
688 <xsl:attribute name="name" select="'lex'"/>
689 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
690 <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
691
692
693 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
694 <xsl:attribute name="name" select="'lemma'"/>
695 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
696 </xsl:element>
697 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
698 <xsl:attribute name="name" select="'pos'"/>
699 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
700 </xsl:element>
701 <xsl:if test="string-length($chosen-msd)">
702 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
703 <xsl:attribute name="name" select="'msd'"/>
704 <xsl:value-of select="$chosen-msd"/>
705 </xsl:element>
706 </xsl:if>
707 <xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
708 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
709 <xsl:attribute name="name" select="'join'"/>
710 <xsl:value-of select="'left'"/>
711 </xsl:element>
712 </xsl:if>
713 </xsl:element>
714 </xsl:element>
715 </xsl:element>
716 </xsl:element>
banspe726b4a2022-03-28 05:47:45 +0200717 <xsl:apply-templates mode="morpho">
bansp3e5b20c2022-03-18 20:22:31 +0100718 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
719 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
720 <xsl:with-param name="index" select="$my_index"/>
721 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200722 </xsl:apply-templates>-->
bansp3e5b20c2022-03-18 20:22:31 +0100723 </xsl:template>
banspe726b4a2022-03-28 05:47:45 +0200724
bansp5f841732022-03-16 06:27:31 +0100725 <!-- ************************** TEXT header ******************* -->
726
727 <xsl:template name="create_text_header">
728 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200729 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100730 <xsl:param name="target" as="xs:string"/>
731
732 <!-- create the local header.xml file -->
733 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
734 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
735
736 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
banspe726b4a2022-03-28 05:47:45 +0200737 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text">
738 <xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
739 </xsl:apply-templates>
bansp5f841732022-03-16 06:27:31 +0100740 </idsHeader>
741 </xsl:result-document>
742 </xsl:template>
743
744 <xsl:template match="tei:fileDesc" mode="text">
bansp9103aab2022-03-19 05:10:21 +0100745 <xsl:element name="{local-name()}">
bansp5f841732022-03-16 06:27:31 +0100746 <xsl:apply-templates mode="text"/>
bansp9103aab2022-03-19 05:10:21 +0100747 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100748 </xsl:template>
749
750 <xsl:template match="tei:title" mode="text">
751 <t.title>
752 <xsl:apply-templates/>
753 </t.title>
754 </xsl:template>
755
756 <xsl:template match="tei:titleStmt" mode="text">
banspe726b4a2022-03-28 05:47:45 +0200757 <xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
bansp5f841732022-03-16 06:27:31 +0100758 <titleStmt>
759 <textSigle>
banspe726b4a2022-03-28 05:47:45 +0200760 <xsl:value-of select="$compoundID"/>
bansp5f841732022-03-16 06:27:31 +0100761 </textSigle>
762 <xsl:apply-templates mode="text"/>
763 </titleStmt>
764 </xsl:template>
765
bansp9103aab2022-03-19 05:10:21 +0100766 <xsl:template match="tei:publicationStmt" mode="text">
767 <xsl:element name="{local-name()}">
768 <xsl:apply-templates mode="text"/>
769 </xsl:element>
770 </xsl:template>
771
772 <xsl:template match="tei:availability" mode="text">
773 <xsl:element name="{local-name()}">
774 <xsl:apply-templates mode="text" select="@* | *"/>
775 </xsl:element>
776 </xsl:template>
777
778 <xsl:template match="tei:profileDesc" mode="text">
779 <xsl:element name="{local-name()}">
780 <xsl:apply-templates mode="text"/>
781 </xsl:element>
782 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100783
bansp9103aab2022-03-19 05:10:21 +0100784 <xsl:template match="tei:textClass" mode="text">
785 <xsl:element name="{local-name()}">
786 <xsl:apply-templates mode="text" select="@* | *"/>
787 </xsl:element>
788 </xsl:template>
789
790 <xsl:template match="tei:catRef" mode="text corpus">
791 <xsl:element name="{local-name()}">
792 <xsl:apply-templates mode="text" select="@* | *"/>
793 </xsl:element>
794 </xsl:template>
795
796 <xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
797 <xsl:copy-of select="."/>
798 </xsl:template>
799
800 <xsl:template match="tei:p" mode="text corpus">
801 <xsl:element name="{local-name()}">
802 <xsl:apply-templates mode="header-text"/>
803 </xsl:element>
804 </xsl:template>
805
806
807 <!-- OPTIMIZATION has to take modes into account -->
bansp5e2d1c02022-03-10 04:51:40 +0100808 <!-- ************************** CORPUS header ******************* -->
809 <xsl:template name="create_corpus_header">
810 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100811 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100812
813 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100814 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100815
816 <!--doctype-public="{$publicDoctypeI5}"
817 doctype-system="{$systemDoctypeI5}">
818 these are, sadly, useless
819 -->
820
821 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
bansp9103aab2022-03-19 05:10:21 +0100822 <xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
bansp5e2d1c02022-03-10 04:51:40 +0100823 </idsHeader>
824 </xsl:result-document>
825 </xsl:template>
826
827 <xsl:template match="tei:fileDesc" mode="corpus">
bansp9103aab2022-03-19 05:10:21 +0100828 <xsl:element name="{local-name()}">
bansp5e2d1c02022-03-10 04:51:40 +0100829 <xsl:apply-templates mode="corpus"/>
bansp9103aab2022-03-19 05:10:21 +0100830 </xsl:element>
bansp5e2d1c02022-03-10 04:51:40 +0100831 </xsl:template>
bansp9103aab2022-03-19 05:10:21 +0100832
bansp5e2d1c02022-03-10 04:51:40 +0100833
834 <xsl:template match="tei:title" mode="corpus">
835 <c.title>
bansp9103aab2022-03-19 05:10:21 +0100836 <xsl:apply-templates mode="corpus" select="@*"/>
837 <xsl:apply-templates mode="header-text"/>
bansp5e2d1c02022-03-10 04:51:40 +0100838 </c.title>
839 </xsl:template>
840
841 <xsl:template match="tei:titleStmt" mode="corpus">
842 <titleStmt>
843 <korpusSigle>
844 <xsl:value-of select="$corpusID"/>
845 </korpusSigle>
846 <xsl:apply-templates mode="corpus"/>
847 </titleStmt>
848 </xsl:template>
849
bansp9103aab2022-03-19 05:10:21 +0100850 <xsl:template match="tei:publicationStmt" mode="corpus">
851 <xsl:element name="{local-name()}">
852 <xsl:apply-templates mode="corpus"/>
853 </xsl:element>
854 </xsl:template>
855
856 <xsl:template match="tei:availability" mode="corpus">
857 <xsl:element name="{local-name()}">
858 <xsl:apply-templates mode="corpus" select="@* | *"/>
859 </xsl:element>
860 </xsl:template>
861
862 <xsl:template match="tei:encodingDesc" mode="corpus">
863 <xsl:element name="{local-name()}">
864 <xsl:apply-templates mode="corpus"/>
865 </xsl:element>
866 </xsl:template>
867
868 <xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
869 <xsl:element name="{local-name()}">
870 <xsl:apply-templates mode="corpus" select="@* | *"/>
871 </xsl:element>
872 </xsl:template>
873
874 <xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
875 <xsl:element name="{local-name()}">
876 <xsl:apply-templates mode="corpus" select="@*"/>
877 <xsl:apply-templates mode="header-text"/>
878 </xsl:element>
879 </xsl:template>
880<!--
881 <xsl:template match="tei:textClass" mode="corpus">
882 <xsl:element name="{local-name()}">
883 <xsl:apply-templates mode="corpus" select="@* | *"/>
884 </xsl:element>
885 </xsl:template>
886
887 <xsl:template match="tei:catRef" mode="corpus">
888 <xsl:element name="{local-name()}">
889 <xsl:apply-templates mode="corpus" select="@* | *"/>
890 </xsl:element>
891 </xsl:template>
892-->
bansp5e2d1c02022-03-10 04:51:40 +0100893
894
895
896 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
897 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
898 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
899
900 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
901 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
902
903 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
904 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
905 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
906 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
907
908 -->
909 <!--<xsl:template name="test_full">
910 <xsl:param name="corpusID"/>
911 <xsl:param name="docID"/>
912 <xsl:param name="textID"/>
913 <xsl:call-template name="xsl:initial-template"/>
914 </xsl:template>-->
915
Akron9a8ee3e2022-01-31 13:51:49 +0100916</xsl:stylesheet>