blob: 77e070a07a54879ad9a454766185e839ddf386a6 [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
5 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
6 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01007
banspe726b4a2022-03-28 05:47:45 +02008
9<!-- PARAMETERS -->
bansp5e2d1c02022-03-10 04:51:40 +010010
bansp8f6700b2022-03-27 05:27:09 +020011 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020012 <!-- the directory containing NKJP files, in the form of a collection of text-level dirs
13 (that is how we know both the $corpusID and the $docID) -->
Akron9a8ee3e2022-01-31 13:51:49 +010014
bansp8f6700b2022-03-27 05:27:09 +020015 <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020016 <!-- where the corpus/document/text/annotations hierarchy is going to be created -->
banspf2b24e62022-03-28 18:12:08 +020017
18 <xsl:param name="skip_docID" as="xs:string">
banspb5992532022-03-29 15:55:44 +020019 <xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>
20 </xsl:param>
21 <!-- comma-separated list of document IDs to be skipped from processing
banspf2b24e62022-03-28 18:12:08 +020022 example: HellerPodgladanie,KOT
banspd1bf1db2022-04-04 02:16:24 +020023 no functionality beyond string identity is supported
24 (this is just for testing) -->
banspb5992532022-03-29 15:55:44 +020025
bansp8f6700b2022-03-27 05:27:09 +020026
bansp9dc10002022-05-17 22:33:34 +020027<!-- VARIABLES (= constants...) -->
banspe726b4a2022-03-28 05:47:45 +020028
29 <xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
30 <xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
bansp8f6700b2022-03-27 05:27:09 +020031
32 <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +010033
banspd1bf1db2022-04-04 02:16:24 +020034 <xsl:variable name="systemDoctypeI5" as="xs:string"
35 select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" static="true"/>
bansp5e2d1c02022-03-10 04:51:40 +010036
banspd1bf1db2022-04-04 02:16:24 +020037 <xsl:variable name="publicDoctypeI5" as="xs:string" static="true"
38 select="'-//IDS//DTD I5 1.0//EN'"/>
bansp5e2d1c02022-03-10 04:51:40 +010039
banspd1bf1db2022-04-04 02:16:24 +020040 <xsl:variable name="KorAP_namespace" static="true" as="xs:string"
41 select="'http://ids-mannheim.de/ns/KorAP'"/>
bansp5e2d1c02022-03-10 04:51:40 +010042
bansp5f841732022-03-16 06:27:31 +010043 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
44 <!-- this is only a bit funny -->
45
banspe726b4a2022-03-28 05:47:45 +020046 <xsl:variable name="collection_params" as="xs:string" static="yes"
47 select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
48 />
49 <!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
50
51 <xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
banspd1bf1db2022-04-04 02:16:24 +020052
53<!-- these two 'flags' are meant to increase the readability of the code
54 they are used for the output of the calc_offsets() function, where the
55 returned value is a sequence, (start, end) -->
56 <xsl:variable name="OFFSET_START" as="xs:integer" static="yes" select="1"/>
57 <xsl:variable name="OFFSET_END" as="xs:integer" static="yes" select="2"/>
banspb5992532022-03-29 15:55:44 +020058
59
banspe726b4a2022-03-28 05:47:45 +020060<!-- MODES -->
bansp5e2d1c02022-03-10 04:51:40 +010061
62 <xsl:mode name="corpus" on-no-match="deep-skip"/>
63 <xsl:mode name="text" on-no-match="deep-skip"/>
bansp9103aab2022-03-19 05:10:21 +010064 <xsl:mode name="header-text" on-no-match="text-only-copy"/>
bansp5e2d1c02022-03-10 04:51:40 +010065
banspe726b4a2022-03-28 05:47:45 +020066
67 <!-- FUNCTIONS -->
68
bansp5f841732022-03-16 06:27:31 +010069 <xsl:function name="f:compute_nesting" as="xs:integer">
banspd1bf1db2022-04-04 02:16:24 +020070 <xsl:param name="node" as="element()"/>
bansp5f841732022-03-16 06:27:31 +010071 <xsl:variable name="rel_depth"
72 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
73 as="xs:integer"/>
bansp5f841732022-03-16 06:27:31 +010074 <xsl:sequence select="$rel_depth"/>
75 </xsl:function>
76
bansp9dc10002022-05-17 22:33:34 +020077<xsl:function name="f:is_preceded_by_ws" as="xs:boolean">
78 <xsl:param name="node" as="element()"/>
79 <xsl:choose>
80 <xsl:when test="local-name($node) eq 'seg'">
81 <xsl:sequence select="not(exists($node/@nkjp:nps))"/>
82 </xsl:when>
83 <xsl:when test="local-name($node) eq 's'">
84 <xsl:message select="'s - prec s: ' || $node/preceding-sibling::tei:s"/>
85 <xsl:message select="'same s - prec p: ' || $node/ancestor::tei:p[1]/preceding-sibling::tei:p || '&#10;'"/>
86
87 <xsl:sequence select="exists($node/preceding-sibling::tei:s) or exists($node/ancestor::tei:p[1]/preceding-sibling::tei:p)"/>
88 </xsl:when>
89 <xsl:when test="local-name($node) eq 'p'">
90 <xsl:message select="'p : ' || $node/preceding-sibling::tei:p"></xsl:message>
91 <xsl:sequence select="exists($node/preceding-sibling::tei:p)"/>
92 </xsl:when>
93 <xsl:otherwise>
94 <xsl:message terminate="yes" select="'Wrong argument passed to f:is_preceded_by_ws(): ' || local-name($node) || ' Only p, s, seg are allowed.'"></xsl:message>
95 </xsl:otherwise>
96 </xsl:choose>
97</xsl:function>
banspd1bf1db2022-04-04 02:16:24 +020098
99 <xsl:function name="f:calc_offsets" as="xs:integer+">
100 <xsl:param name="node" as="element()"/>
101 <xsl:param name="skip_start" as="xs:boolean" />
102
103 <xsl:variable name="start" as="xs:integer">
104 <xsl:choose>
105
106 <xsl:when test="$skip_start or $node/self::tei:text or $node/self::tei:body">
107 <xsl:sequence select="0"/>
108 </xsl:when>
109
110 <!-- handle p -->
111
112 <xsl:when test="$node/self::tei:p">
113 <xsl:variable name="my_pos" as="xs:integer" select="count($node/preceding-sibling::tei:p) + 1"/>
114 <xsl:variable name="preceding" as="node()*"
115 select="$node/ancestor::tei:body/tei:p[position() lt $my_pos]"/>
116
117 <xsl:choose>
118 <xsl:when test="count($preceding) eq 0">
119 <xsl:sequence select="0"/>
120 </xsl:when>
121 <xsl:otherwise>
122 <xsl:sequence select="sum(f:calc_offsets($preceding[last()],true())[$OFFSET_END],1)"/>
123
124<!-- BUG danger: I am not sure if a "1" should rather be added after each p; let me try to handle that in the return value of the $length variable,
125 and make it sensitive to the skip_start parameter
126
127 I will then have to remove the ",1" from here!
128
129 -->
130
131<!-- <xsl:variable name="last_corresps"
132 select="$preceding/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
133 as="attribute(corresp)+"/>
134 <xsl:variable name="end_offsets" as="xs:integer+">
135 <xsl:for-each select="$last_corresps">
136 <xsl:variable name="numbers"
137 select="substring-after(substring-before(., ')'), ',')"/>
138 <xsl:sequence
139 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
140 />
141 </xsl:for-each>
142 </xsl:variable>
143 <xsl:sequence select="sum($end_offsets, 1)"/>
144
145 this is a non-recursive variant that may turn out to be much less cpu-intensive, not sure
146 - but if it's plugged in, it will have to be adjusted to the current form of the recursive variant,
147 because it hasn't been maintained since it got commented out
148 -->
149 </xsl:otherwise>
150 </xsl:choose>
151 </xsl:when>
152
153 <!-- handle s -->
154
155<!-- the value for s gets counted since the start of the current p
156 - so we look at the preceding s's
157 + the preceding p's
158 -->
159 <xsl:when test="$node/self::tei:s">
160 <!--<xsl:variable name="last_corresp"
161 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
162 as="attribute(corresp)"/>
163 <xsl:variable name="numbers"
164 select="substring-after(substring-before($last_corresp, ')'), ',')"/>
165 <xsl:sequence
166 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
167 />
168 -->
169
170 <xsl:variable name="internal_start" as="xs:integer">
171 <xsl:variable name="my_pos" as="xs:integer" select="count($node/preceding-sibling::tei:s) + 1"/>
172 <xsl:variable name="preceding" as="node()*"
173 select="$node/ancestor::tei:p[1]/tei:s[position() lt $my_pos]"/>
174
175 <xsl:choose>
176 <xsl:when test="count($preceding) eq 0">
177 <xsl:sequence select="0"/>
178 </xsl:when>
179 <xsl:otherwise>
bansp9dc10002022-05-17 22:33:34 +0200180 <xsl:sequence select="f:calc_offsets($preceding[last()],true())[$OFFSET_END]"/>
181 <!--<xsl:sequence select="sum(f:calc_offsets($preceding[last()],true())[$OFFSET_END],1)"/>-->
banspd1bf1db2022-04-04 02:16:24 +0200182 <!-- again, CAREFUL ABOUT THE +1, it might need to vanish -->
183 </xsl:otherwise>
184 </xsl:choose>
185 </xsl:variable>
186
bansp9dc10002022-05-17 22:33:34 +0200187 <xsl:variable name="external_start" as="xs:integer" select="f:calc_offsets($node/ancestor::tei:p[1],false())[$OFFSET_START]"/>
banspd1bf1db2022-04-04 02:16:24 +0200188
189 <xsl:sequence select="$internal_start + $external_start"/>
190 </xsl:when>
191
192 <!-- handle seg -->
193
194 <xsl:when test="$node/self::tei:seg">
195 <!-- for segs, the s elements are irrelevant, and the local offset is immediately available on the @corresp -->
196
197 <xsl:variable name="numbers"
198 select="substring-after(substring-before($node/@corresp, ')'), ',')"/>
199
200 <xsl:variable name="internal_start" select="xs:integer(substring-before($numbers, ','))"
201 as="xs:integer"/>
202 <xsl:variable name="external_start" as="xs:integer"
bansp9dc10002022-05-17 22:33:34 +0200203 select="f:calc_offsets($node/ancestor::tei:p[1], false())[$OFFSET_START]"/>
banspd1bf1db2022-04-04 02:16:24 +0200204
205 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
206
207 <xsl:message select="'numbers: ' || $numbers"/>
208 </xsl:if>
209 <xsl:sequence select="$internal_start + $external_start"/>
210 </xsl:when>
211 </xsl:choose>
212 </xsl:variable>
213
214 <xsl:variable name="length" as="xs:integer">
215 <xsl:choose>
216
217 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
218 <xsl:variable name="last_corresps"
219 select="$node/descendant::tei:p/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
220 as="attribute(corresp)+"/>
221
222 <xsl:variable name="end_offsets" as="xs:integer+">
223 <xsl:for-each select="$last_corresps">
224 <xsl:variable name="numbers" select="substring-after(substring-before(., ')'), ',')"/>
225 <xsl:sequence
226 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
227 />
228 </xsl:for-each>
229 </xsl:variable>
230
231 <xsl:sequence select="sum($end_offsets)"/>
232
233 </xsl:when>
234 <xsl:when test="$node/self::tei:p">
235 <xsl:variable name="last_corresps"
236 select="$node/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
237 as="attribute(corresp)+"/>
238 <xsl:variable name="end_offsets" as="xs:integer+">
239 <xsl:for-each select="$last_corresps">
240 <xsl:variable name="numbers" select="substring-after(substring-before(., ')'), ',')"/>
241 <xsl:sequence
242 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
243 />
244 </xsl:for-each>
245 </xsl:variable>
246 <xsl:sequence select="sum($end_offsets)"/>
247 </xsl:when>
248
249
250
251
252 <xsl:when test="$node/self::tei:s">
253 <xsl:variable name="last_corresp"
254 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
255 as="attribute(corresp)"/>
256 <xsl:variable name="numbers"
257 select="substring-after(substring-before($last_corresp, ')'), ',')"/>
258 <xsl:sequence
259 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
260 />
261 </xsl:when>
262 <xsl:otherwise>
263 <xsl:variable name="numbers"
264 select="substring-after(substring-before($node/@corresp, ')'), ',')"/>
265 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
266 <!-- REMOVE THIS -->
bansp9dc10002022-05-17 22:33:34 +0200267 <xsl:message select="'rejected: ' || $numbers"/>
banspd1bf1db2022-04-04 02:16:24 +0200268 </xsl:if>
269 <xsl:sequence
270 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
271 />
272 </xsl:otherwise>
273 </xsl:choose>
274 </xsl:variable>
275
bansp9dc10002022-05-17 22:33:34 +0200276 <xsl:message select="local-name($node) || '[' || count($node/preceding-sibling::*[local-name() eq local-name($node)])+1 || '] length: ' || $length || ' skip_start: ' || $skip_start"/>
banspd1bf1db2022-04-04 02:16:24 +0200277
bansp9dc10002022-05-17 22:33:34 +0200278 <xsl:sequence select="$start, $start + $length -1 + xs:integer($skip_start)"/>
banspd1bf1db2022-04-04 02:16:24 +0200279 </xsl:function>
280
bansp5f841732022-03-16 06:27:31 +0100281 <xsl:function name="f:calc_content_length" as="xs:integer">
282 <xsl:param name="node" as="node()"/>
283 <xsl:choose>
284 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
285 <xsl:variable name="last_corresp"
banspd1bf1db2022-04-04 02:16:24 +0200286 select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
bansp5f841732022-03-16 06:27:31 +0100287 as="attribute(corresp)"/>
288 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
289 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
290 </xsl:when>
291 <xsl:when test="$node/self::tei:p">
292 <xsl:variable name="last_corresp"
banspd1bf1db2022-04-04 02:16:24 +0200293 select="$node/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
bansp5f841732022-03-16 06:27:31 +0100294 as="attribute(corresp)"/>
295 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
296 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
297 </xsl:when>
298 <xsl:when test="$node/self::tei:s">
299 <xsl:variable name="last_corresp"
banspd1bf1db2022-04-04 02:16:24 +0200300 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
bansp5f841732022-03-16 06:27:31 +0100301 as="attribute(corresp)"/>
302 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
303 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
304 </xsl:when>
305 <xsl:otherwise>
306 <xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
banspd1bf1db2022-04-04 02:16:24 +0200307 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
308 <!-- REMOVE THIS -->
309 <xsl:message select="$numbers"/>
310 </xsl:if>
bansp5f841732022-03-16 06:27:31 +0100311 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
312 </xsl:otherwise>
313 </xsl:choose>
314 </xsl:function>
bansp5e2d1c02022-03-10 04:51:40 +0100315
banspb5992532022-03-29 15:55:44 +0200316
317<!-- UTILITY TEMPLATES -->
318
bansp9103aab2022-03-19 05:10:21 +0100319 <xsl:template match="@default" mode="#all"/>
bansp97ba7ce2022-03-26 05:14:06 +0100320 <!-- this is to delete some auto-inserted attribute throughout -->
bansp9103aab2022-03-19 05:10:21 +0100321
bansp9dc10002022-05-17 22:33:34 +0200322 <!--<xsl:template match="tei:w" mode="#all"/> w is better than ab, now ... -->
banspe726b4a2022-03-28 05:47:45 +0200323<!-- NKJP-SGJP has apparently resigned from standoff representations by adding <w> everywhere;
324 for the time being, we'll just stick to the standoff offsets, although that may need to
325 be revisited as the NKJP format has now began to stray from its schemas and assumptions -->
bansp8f6700b2022-03-27 05:27:09 +0200326
banspe726b4a2022-03-28 05:47:45 +0200327 <xsl:template match="tei:choice" mode="#all"/>
328<!-- THIS IS ONLY TEMPORARY,
329 because an interesting challenge came up where I will
330 probably have to abandon straightforward mapping because of TOKENIZATION alternatives;
331
332 but now, I just want this stylesheet to work, even if it eats some occasional token (which it now does, 'komuÅ›' and 'czym' vanish)
333 -->
bansp8f6700b2022-03-27 05:27:09 +0200334
banspb5992532022-03-29 15:55:44 +0200335
336 <!-- MAIN PROCESSING -->
337
338
bansp5e2d1c02022-03-10 04:51:40 +0100339 <xsl:template name="xsl:initial-template">
banspf2b24e62022-03-28 18:12:08 +0200340 <xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
banspd1bf1db2022-04-04 02:16:24 +0200341
banspe726b4a2022-03-28 05:47:45 +0200342 <!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
bansp8f6700b2022-03-27 05:27:09 +0200343 because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
344 <xsl:call-template name="create_corpus_header">
banspe726b4a2022-03-28 05:47:45 +0200345 <xsl:with-param name="text.xml" select="$collection_of_text[1]" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200346 <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
347 </xsl:call-template>
348
banspe726b4a2022-03-28 05:47:45 +0200349 <xsl:for-each select="$collection_of_text">
350 <xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
351 <xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
352 <xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
353 <xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
354
banspf2b24e62022-03-28 18:12:08 +0200355 <xsl:choose>
356 <xsl:when test="$my_textID = $IDs_to_skip"/>
bansp9dc10002022-05-17 22:33:34 +0200357 <!-- this is a utility step, for when we want to ignore some texts for any reason (debugging, selective update) -->
358
banspf2b24e62022-03-28 18:12:08 +0200359 <xsl:otherwise>
banspd1bf1db2022-04-04 02:16:24 +0200360
bansp9dc10002022-05-17 22:33:34 +0200361 <!--<xsl:message select="f:calc_offsets(doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[2]/tei:seg[1],false())"/>-->
banspd1bf1db2022-04-04 02:16:24 +0200362
bansp9dc10002022-05-17 22:33:34 +0200363<!-- <xsl:message select="doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[1] || f:is_preceded_by_ws(doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[1])"/>
364 <xsl:message select="doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[3] || f:is_preceded_by_ws(doc($ann_segmentation.uri)//tei:body/tei:p[1]/tei:s[1]/tei:seg[3])"/>
365-->
366 <xsl:call-template name="process_single_sample">
banspf2b24e62022-03-28 18:12:08 +0200367 <xsl:with-param name="text.xml" as="document-node()" select="."/>
368 <xsl:with-param name="ann_morphosyntax.xml" as="document-node()"
369 select="doc($ann_morphosyntax.uri)"/>
370 <xsl:with-param name="ann_segmentation.xml" as="document-node()"
371 select="doc($ann_segmentation.uri)"/>
372 <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
bansp9dc10002022-05-17 22:33:34 +0200373 </xsl:call-template>
banspf2b24e62022-03-28 18:12:08 +0200374 </xsl:otherwise>
375 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200376 </xsl:for-each>
bansp8f6700b2022-03-27 05:27:09 +0200377 </xsl:template>
378
379 <xsl:template name="process_single_sample">
banspe726b4a2022-03-28 05:47:45 +0200380 <xsl:param name="text.xml" as="document-node()"/>
381 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
382 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspd1bf1db2022-04-04 02:16:24 +0200383 <xsl:param name="my_textID" as="xs:string" select="'0-BAD_textID'"/>
bansp9dc10002022-05-17 22:33:34 +0200384 <!-- empty textID should never happen, but if it does, it will be signalled at the top of the output -->
banspe726b4a2022-03-28 05:47:45 +0200385
386 <xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
387
388 <xsl:variable name="compoundID" as="xs:string"
389 select="$corpusID || '_' || $docID || '.' || $my_textID"/>
390 <!-- this is what occurs in the text and data layers as @docid -->
391
392
bansp5e2d1c02022-03-10 04:51:40 +0100393 <xsl:call-template name="create_data">
bansp9dc10002022-05-17 22:33:34 +0200394 <!--<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>-->
395 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200396 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
397 <xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100398 </xsl:call-template>
399
bansp9dc10002022-05-17 22:33:34 +0200400<!-- <xsl:call-template name="create_struct">
bansp5f841732022-03-16 06:27:31 +0100401 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200402 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100403 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
404 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200405 <xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
bansp5f841732022-03-16 06:27:31 +0100406 />
407 </xsl:call-template>
bansp9dc10002022-05-17 22:33:34 +0200408-->
409<!-- <xsl:call-template name="create_morpho">
bansp5f841732022-03-16 06:27:31 +0100410 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200411 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100412 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
413 as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100414 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
415 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200416 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100417 </xsl:call-template>
bansp9dc10002022-05-17 22:33:34 +0200418-->
bansp5e2d1c02022-03-10 04:51:40 +0100419 <xsl:call-template name="create_text_header">
420 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200421 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
422 <xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100423 </xsl:call-template>
424
bansp5e2d1c02022-03-10 04:51:40 +0100425 </xsl:template>
426
427 <!-- ************************** data.xml ******************* -->
428
429 <xsl:template name="create_data">
bansp9dc10002022-05-17 22:33:34 +0200430 <!--<xsl:param name="text.xml" as="document-node()"/>-->
431 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200432 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100433 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100434 <!-- create the data.xml file -->
435 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100436 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100437
Akron9a8ee3e2022-01-31 13:51:49 +0100438 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100439 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100440 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100441 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
442 <xsl:attribute name="file" select="'metadata.xml'"/>
443 </xsl:element>
444
445 <xsl:element name="text" namespace="{$KorAP_namespace}">
bansp9dc10002022-05-17 22:33:34 +0200446 <!--<xsl:value-of select="$text.xml//*[local-name() = 'ab']"/> preserves more whitespace -->
447 <!--<xsl:apply-templates select="$text.xml//*[local-name() = 'ab']"/>-->
448 <xsl:variable name="content" as="xs:string+">
449 <xsl:variable name="segs" select="$ann_segmentation.xml/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s//tei:seg[count(@nkjp:rejected) eq 0]" as="element()+"/>
450 <xsl:sequence select="head($segs)/tei:w"/>
451 <xsl:for-each select="tail($segs)">
452 <xsl:sequence select="
453 if (f:is_preceded_by_ws(.)) then
454 ' '
455 else
456 '', ./tei:w"/>
457 </xsl:for-each>
458 </xsl:variable>
459 <xsl:value-of select="string-join($content)"/>
bansp5e2d1c02022-03-10 04:51:40 +0100460 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100461 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100462 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100463 </xsl:template>
464
bansp5f841732022-03-16 06:27:31 +0100465 <!-- ************************** struct ******************* -->
466
467 <xsl:template name="create_struct">
468 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200469 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100470 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
471 <xsl:param name="target" as="xs:string"/>
472
473 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
474 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
475 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
476 <xsl:element name="layer" namespace="{$KorAP_namespace}">
477 <xsl:attribute name="docid" select="$compoundID"/>
478 <xsl:attribute name="version" select="$KorAP-XML_version"/>
479
480 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
481 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>
482 </xsl:element>
483 </xsl:element>
484 </xsl:result-document>
485 </xsl:template>
486
487 <xsl:template match="tei:*" mode="struct">
488 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
489 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
490 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
bansp9dc10002022-05-17 22:33:34 +0200491 <!-- It's so spread out because I want to make sure to be able to look up the individual
bansp3e5b20c2022-03-18 20:22:31 +0100492 constituent values, should anything go wrong; optimization will come when it's worked against a larger dataset -->
bansp5f841732022-03-16 06:27:31 +0100493 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
494 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
495 <xsl:variable name="preceding-count" select="count($preceding)"/>
bansp9dc10002022-05-17 22:33:34 +0200496
bansp5f841732022-03-16 06:27:31 +0100497 <xsl:variable name="outside-preceding-count" as="xs:integer">
498 <xsl:choose>
499 <xsl:when test="self::tei:s or self::tei:p">
500 <xsl:choose>
501 <xsl:when test="$preceding-count">
502 <xsl:sequence select="
503 sum(for $p in $preceding
504 return
505 count($p/descendant::*))"/>
506 </xsl:when>
507 <xsl:otherwise>
508 <xsl:sequence select="0"/>
509 </xsl:otherwise>
510 </xsl:choose>
511 </xsl:when>
512 <xsl:otherwise>
513 <xsl:sequence select="0"/>
514 </xsl:otherwise>
515 </xsl:choose>
516 </xsl:variable>
bansp9dc10002022-05-17 22:33:34 +0200517
bansp5f841732022-03-16 06:27:31 +0100518 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
519 as="xs:integer"/>
banspb5992532022-03-29 15:55:44 +0200520
521
522 <!--<xsl:copy select="//tei:seg[count(@nkjp:rejected) ne 0 and @nkjp:rejected ne 'true']"></xsl:copy>-->
bansp5f841732022-03-16 06:27:31 +0100523
524 <xsl:variable name="start" as="xs:integer">
525 <xsl:choose>
526 <xsl:when test="self::tei:text or self::tei:body">
527 <xsl:sequence select="0"/>
528 </xsl:when>
529 <xsl:when test="self::tei:p">
530 <xsl:variable name="first_corresp"
531 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
532 as="attribute(corresp)"/>
533 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
534 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
535 </xsl:when>
536 <xsl:when test="self::tei:s">
537 <xsl:variable name="first_corresp"
538 select="descendant::tei:seg[1]/attribute::corresp"
539 as="attribute(corresp)"/>
540 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
541 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
542 </xsl:when>
543 <xsl:when test="self::tei:seg">
544 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
545 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
546 </xsl:when>
547 </xsl:choose>
548 </xsl:variable>
549 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
550 </xsl:variable>
bansp3e5b20c2022-03-18 20:22:31 +0100551
bansp5f841732022-03-16 06:27:31 +0100552 <xsl:element name="span" namespace="{$KorAP_namespace}">
553 <xsl:attribute name="id" select="'s' || $my_index"/>
554 <xsl:attribute name="from" select="$start"/>
555 <xsl:attribute name="to" select="$end"/>
556 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
557 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100558 <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX -->
bansp5f841732022-03-16 06:27:31 +0100559 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100560 <xsl:attribute name="name" select="'name'"/>
561 <xsl:value-of select="local-name()"/>
bansp5f841732022-03-16 06:27:31 +0100562 </xsl:element>
563 <xsl:if test="count(@*)">
564 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
565 <xsl:attribute name="name" select="'attr'"/>
566 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
567 <xsl:attribute name="type" select="'attr'"/>
568 <xsl:for-each select="@*">
569 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
570 <xsl:attribute name="name" select="local-name(.)"/>
571 <xsl:value-of select="."/>
572 </xsl:element>
573 </xsl:for-each>
574 </xsl:element>
575 </xsl:element>
576 </xsl:if>
577 </xsl:element>
578 </xsl:element>
579 <xsl:apply-templates mode="struct">
580 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
581 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
582 <xsl:with-param name="index" select="$my_index"/>
583 </xsl:apply-templates>
584 </xsl:template>
585
586 <!-- ************************** morpho ******************* -->
587
588 <xsl:template name="create_morpho">
589 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200590 <xsl:param name="compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100591 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100592 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
593 <xsl:param name="target" as="xs:string"/>
594
595 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
596 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
597 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp3e5b20c2022-03-18 20:22:31 +0100598 <xsl:element name="layer" namespace="{$KorAP_namespace}">
599 <xsl:attribute name="docid" select="$compoundID"/>
600 <xsl:attribute name="version" select="$KorAP-XML_version"/>
601
602 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
603 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
604 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
605 </xsl:apply-templates>
606 </xsl:element>
607 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100608 </xsl:result-document>
609 </xsl:template>
610
bansp3e5b20c2022-03-18 20:22:31 +0100611 <xsl:template match="tei:*" mode="morpho">
612 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
613 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
614 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
615 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
616 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
617 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
618 <xsl:variable name="preceding-count" select="count($preceding)"/>
619 <xsl:variable name="outside-preceding-count" as="xs:integer">
620 <xsl:choose>
621 <xsl:when test="self::tei:s or self::tei:p">
622 <xsl:choose>
623 <xsl:when test="$preceding-count">
624 <xsl:sequence select="
625 sum(for $p in $preceding
626 return
627 count($p/descendant::*))"/>
628 </xsl:when>
629 <xsl:otherwise>
630 <xsl:sequence select="0"/>
631 </xsl:otherwise>
632 </xsl:choose>
633 </xsl:when>
634 <xsl:otherwise>
635 <xsl:sequence select="0"/>
636 </xsl:otherwise>
637 </xsl:choose>
638 </xsl:variable>
639 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
640 as="xs:integer"/>
641
642 <xsl:variable name="start" as="xs:integer">
643 <xsl:choose>
644 <xsl:when test="self::tei:text or self::tei:body">
645 <xsl:sequence select="0"/>
646 </xsl:when>
647 <xsl:when test="self::tei:p">
648 <xsl:variable name="first_corresp"
649 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
650 as="attribute(corresp)"/>
651 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
652 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
653 </xsl:when>
654 <xsl:when test="self::tei:s">
655 <xsl:variable name="first_corresp"
656 select="descendant::tei:seg[1]/attribute::corresp"
657 as="attribute(corresp)"/>
658 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
659 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
660 </xsl:when>
661 <!--<xsl:when test="self::tei:seg">
662 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
663 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
664 </xsl:when>-->
665 </xsl:choose>
666 </xsl:variable>
667 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
668 </xsl:variable>
669
670 <xsl:apply-templates mode="morpho">
671 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
672 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
673 <xsl:with-param name="index" select="$my_index"/>
674 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
675 </xsl:apply-templates>
676 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100677
bansp3e5b20c2022-03-18 20:22:31 +0100678 <xsl:template match="tei:seg" mode="morpho">
679 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
680 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
681 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
682 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
683 <!-- I have made a major mess here, but it works... it's so spread out
684 because I wanted to make sure to be able to look up the individual
685 constituent values, should anything go wrong -->
686 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
687 <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
688 <xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
689 <xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
690 <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
691 <xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
692 <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
693 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
694 <xsl:variable name="preceding-count" select="count($preceding)"/>
banspe726b4a2022-03-28 05:47:45 +0200695 <!--<xsl:variable name="outside-preceding-count" as="xs:integer">
bansp3e5b20c2022-03-18 20:22:31 +0100696 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200697 <xsl:when test="self::tei:s or self::tei:p"> <!-\- THIS NEEDS TO BE REVISITED AFTER THIS TEMPLATE HAS BECOME MORE SPECIFIC -\->
bansp3e5b20c2022-03-18 20:22:31 +0100698 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200699 <xsl:when test="$preceding-count"> commented out for now
bansp3e5b20c2022-03-18 20:22:31 +0100700 <xsl:sequence select="
701 sum(for $p in $preceding
702 return
703 count($p/descendant::*))"/>
704 </xsl:when>
705 <xsl:otherwise>
706 <xsl:sequence select="0"/>
707 </xsl:otherwise>
708 </xsl:choose>
709 </xsl:when>
710 <xsl:otherwise>
711 <xsl:sequence select="0"/>
712 </xsl:otherwise>
713 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200714 </xsl:variable>-->
715 <xsl:variable name="my_index" select="$index + 1 + $preceding-count" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100716
717 <xsl:variable name="start" as="xs:integer">
718 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
719 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
720 </xsl:variable>
721 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
722 </xsl:variable>
723 <xsl:element name="span" namespace="{$KorAP_namespace}">
724 <xsl:attribute name="id" select="'s' || $my_index"/>
725 <xsl:attribute name="from" select="$start"/>
726 <xsl:attribute name="to" select="$end"/>
727 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
728 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
729 <xsl:attribute name="type" select="'lex'"/>
730 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
731 <xsl:attribute name="name" select="'lex'"/>
732 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
733 <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
734
735
736 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
737 <xsl:attribute name="name" select="'lemma'"/>
738 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
739 </xsl:element>
740 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
741 <xsl:attribute name="name" select="'pos'"/>
742 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
743 </xsl:element>
744 <xsl:if test="string-length($chosen-msd)">
745 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
746 <xsl:attribute name="name" select="'msd'"/>
747 <xsl:value-of select="$chosen-msd"/>
748 </xsl:element>
749 </xsl:if>
750 <xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
751 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
752 <xsl:attribute name="name" select="'join'"/>
753 <xsl:value-of select="'left'"/>
754 </xsl:element>
755 </xsl:if>
756 </xsl:element>
757 </xsl:element>
758 </xsl:element>
759 </xsl:element>
banspe726b4a2022-03-28 05:47:45 +0200760 <xsl:apply-templates mode="morpho">
bansp3e5b20c2022-03-18 20:22:31 +0100761 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
762 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
763 <xsl:with-param name="index" select="$my_index"/>
764 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200765 </xsl:apply-templates>-->
bansp3e5b20c2022-03-18 20:22:31 +0100766 </xsl:template>
banspe726b4a2022-03-28 05:47:45 +0200767
bansp5f841732022-03-16 06:27:31 +0100768 <!-- ************************** TEXT header ******************* -->
769
770 <xsl:template name="create_text_header">
771 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200772 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100773 <xsl:param name="target" as="xs:string"/>
774
775 <!-- create the local header.xml file -->
776 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
777 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
778
779 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
banspe726b4a2022-03-28 05:47:45 +0200780 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text">
781 <xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
782 </xsl:apply-templates>
bansp5f841732022-03-16 06:27:31 +0100783 </idsHeader>
784 </xsl:result-document>
785 </xsl:template>
786
787 <xsl:template match="tei:fileDesc" mode="text">
bansp9103aab2022-03-19 05:10:21 +0100788 <xsl:element name="{local-name()}">
bansp5f841732022-03-16 06:27:31 +0100789 <xsl:apply-templates mode="text"/>
bansp9103aab2022-03-19 05:10:21 +0100790 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100791 </xsl:template>
792
793 <xsl:template match="tei:title" mode="text">
794 <t.title>
795 <xsl:apply-templates/>
796 </t.title>
797 </xsl:template>
798
799 <xsl:template match="tei:titleStmt" mode="text">
banspe726b4a2022-03-28 05:47:45 +0200800 <xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
bansp5f841732022-03-16 06:27:31 +0100801 <titleStmt>
802 <textSigle>
banspe726b4a2022-03-28 05:47:45 +0200803 <xsl:value-of select="$compoundID"/>
bansp5f841732022-03-16 06:27:31 +0100804 </textSigle>
805 <xsl:apply-templates mode="text"/>
806 </titleStmt>
807 </xsl:template>
808
bansp9103aab2022-03-19 05:10:21 +0100809 <xsl:template match="tei:publicationStmt" mode="text">
810 <xsl:element name="{local-name()}">
811 <xsl:apply-templates mode="text"/>
812 </xsl:element>
813 </xsl:template>
814
815 <xsl:template match="tei:availability" mode="text">
816 <xsl:element name="{local-name()}">
817 <xsl:apply-templates mode="text" select="@* | *"/>
818 </xsl:element>
819 </xsl:template>
820
821 <xsl:template match="tei:profileDesc" mode="text">
822 <xsl:element name="{local-name()}">
823 <xsl:apply-templates mode="text"/>
824 </xsl:element>
825 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100826
bansp9103aab2022-03-19 05:10:21 +0100827 <xsl:template match="tei:textClass" mode="text">
828 <xsl:element name="{local-name()}">
829 <xsl:apply-templates mode="text" select="@* | *"/>
830 </xsl:element>
831 </xsl:template>
832
833 <xsl:template match="tei:catRef" mode="text corpus">
834 <xsl:element name="{local-name()}">
835 <xsl:apply-templates mode="text" select="@* | *"/>
836 </xsl:element>
837 </xsl:template>
838
839 <xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
840 <xsl:copy-of select="."/>
841 </xsl:template>
842
843 <xsl:template match="tei:p" mode="text corpus">
844 <xsl:element name="{local-name()}">
845 <xsl:apply-templates mode="header-text"/>
846 </xsl:element>
847 </xsl:template>
848
849
850 <!-- OPTIMIZATION has to take modes into account -->
bansp5e2d1c02022-03-10 04:51:40 +0100851 <!-- ************************** CORPUS header ******************* -->
852 <xsl:template name="create_corpus_header">
853 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100854 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100855
856 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100857 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100858
859 <!--doctype-public="{$publicDoctypeI5}"
860 doctype-system="{$systemDoctypeI5}">
861 these are, sadly, useless
862 -->
863
864 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
bansp9103aab2022-03-19 05:10:21 +0100865 <xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
bansp5e2d1c02022-03-10 04:51:40 +0100866 </idsHeader>
867 </xsl:result-document>
868 </xsl:template>
869
870 <xsl:template match="tei:fileDesc" mode="corpus">
bansp9103aab2022-03-19 05:10:21 +0100871 <xsl:element name="{local-name()}">
bansp5e2d1c02022-03-10 04:51:40 +0100872 <xsl:apply-templates mode="corpus"/>
bansp9103aab2022-03-19 05:10:21 +0100873 </xsl:element>
bansp5e2d1c02022-03-10 04:51:40 +0100874 </xsl:template>
bansp9103aab2022-03-19 05:10:21 +0100875
bansp5e2d1c02022-03-10 04:51:40 +0100876
877 <xsl:template match="tei:title" mode="corpus">
878 <c.title>
bansp9103aab2022-03-19 05:10:21 +0100879 <xsl:apply-templates mode="corpus" select="@*"/>
880 <xsl:apply-templates mode="header-text"/>
bansp5e2d1c02022-03-10 04:51:40 +0100881 </c.title>
882 </xsl:template>
883
884 <xsl:template match="tei:titleStmt" mode="corpus">
885 <titleStmt>
886 <korpusSigle>
887 <xsl:value-of select="$corpusID"/>
888 </korpusSigle>
889 <xsl:apply-templates mode="corpus"/>
890 </titleStmt>
891 </xsl:template>
892
bansp9103aab2022-03-19 05:10:21 +0100893 <xsl:template match="tei:publicationStmt" mode="corpus">
894 <xsl:element name="{local-name()}">
895 <xsl:apply-templates mode="corpus"/>
896 </xsl:element>
897 </xsl:template>
898
899 <xsl:template match="tei:availability" mode="corpus">
900 <xsl:element name="{local-name()}">
901 <xsl:apply-templates mode="corpus" select="@* | *"/>
902 </xsl:element>
903 </xsl:template>
904
905 <xsl:template match="tei:encodingDesc" mode="corpus">
906 <xsl:element name="{local-name()}">
907 <xsl:apply-templates mode="corpus"/>
908 </xsl:element>
909 </xsl:template>
910
911 <xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
912 <xsl:element name="{local-name()}">
913 <xsl:apply-templates mode="corpus" select="@* | *"/>
914 </xsl:element>
915 </xsl:template>
916
917 <xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
918 <xsl:element name="{local-name()}">
919 <xsl:apply-templates mode="corpus" select="@*"/>
920 <xsl:apply-templates mode="header-text"/>
921 </xsl:element>
922 </xsl:template>
923<!--
924 <xsl:template match="tei:textClass" mode="corpus">
925 <xsl:element name="{local-name()}">
926 <xsl:apply-templates mode="corpus" select="@* | *"/>
927 </xsl:element>
928 </xsl:template>
929
930 <xsl:template match="tei:catRef" mode="corpus">
931 <xsl:element name="{local-name()}">
932 <xsl:apply-templates mode="corpus" select="@* | *"/>
933 </xsl:element>
934 </xsl:template>
935-->
bansp5e2d1c02022-03-10 04:51:40 +0100936
937
938
939 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
940 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
941 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
942
943 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
944 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
945
946 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
947 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
948 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
949 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
950
951 -->
952 <!--<xsl:template name="test_full">
953 <xsl:param name="corpusID"/>
954 <xsl:param name="docID"/>
955 <xsl:param name="textID"/>
956 <xsl:call-template name="xsl:initial-template"/>
957 </xsl:template>-->
958
Akron9a8ee3e2022-01-31 13:51:49 +0100959</xsl:stylesheet>