blob: 4a377df363523f7a863544c2fd67555960af386e [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
5 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
6 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01007
banspe726b4a2022-03-28 05:47:45 +02008
9<!-- PARAMETERS -->
bansp5e2d1c02022-03-10 04:51:40 +010010
bansp8f6700b2022-03-27 05:27:09 +020011 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
12 <!-- the directory containing NKJP files, in the form of a collection of text-level dirs -->
Akron9a8ee3e2022-01-31 13:51:49 +010013
bansp8f6700b2022-03-27 05:27:09 +020014 <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
banspf2b24e62022-03-28 18:12:08 +020015
16 <xsl:param name="skip_docID" as="xs:string">
17 <xsl:value-of select="''"/>
18 <!-- comma-separated list of document IDs to be skipped from processing
19 example: HellerPodgladanie,KOT
20 no functionality beyond string identity is supported -->
21 </xsl:param>
bansp8f6700b2022-03-27 05:27:09 +020022
banspe726b4a2022-03-28 05:47:45 +020023<!-- VARIABLES -->
24
25 <xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
26 <xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
bansp8f6700b2022-03-27 05:27:09 +020027
28 <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +010029
30 <xsl:variable name="systemDoctypeI5"
bansp97ba7ce2022-03-26 05:14:06 +010031 select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" as="xs:string"
bansp5e2d1c02022-03-10 04:51:40 +010032 static="true"/>
33
bansp97ba7ce2022-03-26 05:14:06 +010034 <xsl:variable name="publicDoctypeI5" select="'-//IDS//DTD I5 1.0//EN'" as="xs:string"
bansp5e2d1c02022-03-10 04:51:40 +010035 static="true"/>
36
37 <xsl:variable name="KorAP_namespace" select="'http://ids-mannheim.de/ns/KorAP'" static="true"
38 as="xs:string"/>
39
bansp5f841732022-03-16 06:27:31 +010040 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
41 <!-- this is only a bit funny -->
42
banspe726b4a2022-03-28 05:47:45 +020043 <xsl:variable name="collection_params" as="xs:string" static="yes"
44 select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
45 />
46 <!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
47
48 <xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
49
50<!-- MODES -->
bansp5e2d1c02022-03-10 04:51:40 +010051
52 <xsl:mode name="corpus" on-no-match="deep-skip"/>
53 <xsl:mode name="text" on-no-match="deep-skip"/>
bansp9103aab2022-03-19 05:10:21 +010054 <xsl:mode name="header-text" on-no-match="text-only-copy"/>
bansp5e2d1c02022-03-10 04:51:40 +010055
banspe726b4a2022-03-28 05:47:45 +020056
57 <!-- FUNCTIONS -->
58
bansp5f841732022-03-16 06:27:31 +010059 <xsl:function name="f:compute_nesting" as="xs:integer">
60 <xsl:param name="node" as="node()"/>
61 <xsl:variable name="rel_depth"
62 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
63 as="xs:integer"/>
64<!-- I think my skills are lacking -->
65 <xsl:sequence select="$rel_depth"/>
66 </xsl:function>
67
68 <xsl:function name="f:calc_content_length" as="xs:integer">
69 <xsl:param name="node" as="node()"/>
70 <xsl:choose>
71 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
72 <xsl:variable name="last_corresp"
73 select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
74 as="attribute(corresp)"/>
75 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
76 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
77 </xsl:when>
78 <xsl:when test="$node/self::tei:p">
79 <xsl:variable name="last_corresp"
80 select="$node/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
81 as="attribute(corresp)"/>
82 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
83 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
84 </xsl:when>
85 <xsl:when test="$node/self::tei:s">
86 <xsl:variable name="last_corresp"
87 select="$node/descendant::tei:seg[last()]/attribute::corresp"
88 as="attribute(corresp)"/>
89 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
90 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
91 </xsl:when>
92 <xsl:otherwise>
93 <xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
94 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
95 </xsl:otherwise>
96 </xsl:choose>
97 </xsl:function>
bansp5e2d1c02022-03-10 04:51:40 +010098
bansp9103aab2022-03-19 05:10:21 +010099 <xsl:template match="@default" mode="#all"/>
bansp97ba7ce2022-03-26 05:14:06 +0100100 <!-- this is to delete some auto-inserted attribute throughout -->
bansp9103aab2022-03-19 05:10:21 +0100101
banspe726b4a2022-03-28 05:47:45 +0200102 <xsl:template match="tei:w" mode="#all"/>
103<!-- NKJP-SGJP has apparently resigned from standoff representations by adding <w> everywhere;
104 for the time being, we'll just stick to the standoff offsets, although that may need to
105 be revisited as the NKJP format has now began to stray from its schemas and assumptions -->
bansp8f6700b2022-03-27 05:27:09 +0200106
banspe726b4a2022-03-28 05:47:45 +0200107 <xsl:template match="tei:choice" mode="#all"/>
108<!-- THIS IS ONLY TEMPORARY,
109 because an interesting challenge came up where I will
110 probably have to abandon straightforward mapping because of TOKENIZATION alternatives;
111
112 but now, I just want this stylesheet to work, even if it eats some occasional token (which it now does, 'komuĊ›' and 'czym' vanish)
113 -->
bansp8f6700b2022-03-27 05:27:09 +0200114
bansp5e2d1c02022-03-10 04:51:40 +0100115 <xsl:template name="xsl:initial-template">
banspf2b24e62022-03-28 18:12:08 +0200116 <xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
117
banspe726b4a2022-03-28 05:47:45 +0200118 <!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
bansp8f6700b2022-03-27 05:27:09 +0200119 because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
120 <xsl:call-template name="create_corpus_header">
banspe726b4a2022-03-28 05:47:45 +0200121 <xsl:with-param name="text.xml" select="$collection_of_text[1]" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200122 <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
123 </xsl:call-template>
124
banspe726b4a2022-03-28 05:47:45 +0200125 <xsl:for-each select="$collection_of_text">
126 <xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
127 <xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
128 <xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
129 <xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
130
banspf2b24e62022-03-28 18:12:08 +0200131 <xsl:choose>
132 <xsl:when test="$my_textID = $IDs_to_skip"/>
133 <xsl:otherwise>
134 <xsl:message select="$my_textID"></xsl:message>
135 <!--<xsl:call-template name="process_single_sample">
136 <xsl:with-param name="text.xml" as="document-node()" select="."/>
137 <xsl:with-param name="ann_morphosyntax.xml" as="document-node()"
138 select="doc($ann_morphosyntax.uri)"/>
139 <xsl:with-param name="ann_segmentation.xml" as="document-node()"
140 select="doc($ann_segmentation.uri)"/>
141 <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
142 </xsl:call-template>-->
143 </xsl:otherwise>
144 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200145 </xsl:for-each>
bansp8f6700b2022-03-27 05:27:09 +0200146 </xsl:template>
147
148 <xsl:template name="process_single_sample">
banspe726b4a2022-03-28 05:47:45 +0200149 <xsl:param name="text.xml" as="document-node()"/>
150 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
151 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
152 <xsl:param name="my_textID" as="xs:string" select="'0BAD_textID'"/>
153
154 <xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
155
156 <xsl:variable name="compoundID" as="xs:string"
157 select="$corpusID || '_' || $docID || '.' || $my_textID"/>
158 <!-- this is what occurs in the text and data layers as @docid -->
159
160
bansp5e2d1c02022-03-10 04:51:40 +0100161 <xsl:call-template name="create_data">
162 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200163 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
164 <xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100165 </xsl:call-template>
166
167 <xsl:call-template name="create_struct">
168 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200169 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100170 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
171 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200172 <xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
bansp5f841732022-03-16 06:27:31 +0100173 />
174 </xsl:call-template>
175
176 <xsl:call-template name="create_morpho">
177 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200178 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100179 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
180 as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100181 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
182 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200183 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100184 </xsl:call-template>
185
186 <xsl:call-template name="create_text_header">
187 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200188 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
189 <xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100190 </xsl:call-template>
191
bansp5e2d1c02022-03-10 04:51:40 +0100192 </xsl:template>
193
194 <!-- ************************** data.xml ******************* -->
195
196 <xsl:template name="create_data">
197 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200198 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100199 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100200 <!-- create the data.xml file -->
201 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100202 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100203
Akron9a8ee3e2022-01-31 13:51:49 +0100204 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100205 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100206 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100207 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
208 <xsl:attribute name="file" select="'metadata.xml'"/>
209 </xsl:element>
210
211 <xsl:element name="text" namespace="{$KorAP_namespace}">
banspf79443e2022-02-25 14:25:33 +0100212 <xsl:value-of select="$text.xml//*[local-name() = 'ab']"/>
bansp5e2d1c02022-03-10 04:51:40 +0100213 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100214 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100215 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100216 </xsl:template>
217
bansp5f841732022-03-16 06:27:31 +0100218 <!-- ************************** struct ******************* -->
219
220 <xsl:template name="create_struct">
221 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200222 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100223 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
224 <xsl:param name="target" as="xs:string"/>
225
226 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
227 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
228 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
229 <xsl:element name="layer" namespace="{$KorAP_namespace}">
230 <xsl:attribute name="docid" select="$compoundID"/>
231 <xsl:attribute name="version" select="$KorAP-XML_version"/>
232
233 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
234 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>
235 </xsl:element>
236 </xsl:element>
237 </xsl:result-document>
238 </xsl:template>
239
240 <xsl:template match="tei:*" mode="struct">
241 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
242 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
243 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
244 <!-- I have made a major mess here, but it works... it's so spread out
245 because I wanted to make sure to be able to look up the individual
bansp3e5b20c2022-03-18 20:22:31 +0100246 constituent values, should anything go wrong; optimization will come when it's worked against a larger dataset -->
bansp5f841732022-03-16 06:27:31 +0100247 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
248 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
249 <xsl:variable name="preceding-count" select="count($preceding)"/>
250 <xsl:variable name="outside-preceding-count" as="xs:integer">
251 <xsl:choose>
252 <xsl:when test="self::tei:s or self::tei:p">
253 <xsl:choose>
254 <xsl:when test="$preceding-count">
255 <xsl:sequence select="
256 sum(for $p in $preceding
257 return
258 count($p/descendant::*))"/>
259 </xsl:when>
260 <xsl:otherwise>
261 <xsl:sequence select="0"/>
262 </xsl:otherwise>
263 </xsl:choose>
264 </xsl:when>
265 <xsl:otherwise>
266 <xsl:sequence select="0"/>
267 </xsl:otherwise>
268 </xsl:choose>
269 </xsl:variable>
270 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
271 as="xs:integer"/>
272
273 <xsl:variable name="start" as="xs:integer">
274 <xsl:choose>
275 <xsl:when test="self::tei:text or self::tei:body">
276 <xsl:sequence select="0"/>
277 </xsl:when>
278 <xsl:when test="self::tei:p">
279 <xsl:variable name="first_corresp"
280 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
281 as="attribute(corresp)"/>
282 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
283 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
284 </xsl:when>
285 <xsl:when test="self::tei:s">
286 <xsl:variable name="first_corresp"
287 select="descendant::tei:seg[1]/attribute::corresp"
288 as="attribute(corresp)"/>
289 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
290 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
291 </xsl:when>
292 <xsl:when test="self::tei:seg">
293 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
294 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
295 </xsl:when>
296 </xsl:choose>
297 </xsl:variable>
298 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
299 </xsl:variable>
bansp3e5b20c2022-03-18 20:22:31 +0100300
bansp5f841732022-03-16 06:27:31 +0100301 <xsl:element name="span" namespace="{$KorAP_namespace}">
302 <xsl:attribute name="id" select="'s' || $my_index"/>
303 <xsl:attribute name="from" select="$start"/>
304 <xsl:attribute name="to" select="$end"/>
305 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
306 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100307 <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX -->
bansp5f841732022-03-16 06:27:31 +0100308 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100309 <xsl:attribute name="name" select="'name'"/>
310 <xsl:value-of select="local-name()"/>
bansp5f841732022-03-16 06:27:31 +0100311 </xsl:element>
312 <xsl:if test="count(@*)">
313 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
314 <xsl:attribute name="name" select="'attr'"/>
315 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
316 <xsl:attribute name="type" select="'attr'"/>
317 <xsl:for-each select="@*">
318 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
319 <xsl:attribute name="name" select="local-name(.)"/>
320 <xsl:value-of select="."/>
321 </xsl:element>
322 </xsl:for-each>
323 </xsl:element>
324 </xsl:element>
325 </xsl:if>
326 </xsl:element>
327 </xsl:element>
328 <xsl:apply-templates mode="struct">
329 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
330 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
331 <xsl:with-param name="index" select="$my_index"/>
332 </xsl:apply-templates>
333 </xsl:template>
334
335 <!-- ************************** morpho ******************* -->
336
337 <xsl:template name="create_morpho">
338 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200339 <xsl:param name="compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100340 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100341 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
342 <xsl:param name="target" as="xs:string"/>
343
344 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
345 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
346 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp3e5b20c2022-03-18 20:22:31 +0100347 <xsl:element name="layer" namespace="{$KorAP_namespace}">
348 <xsl:attribute name="docid" select="$compoundID"/>
349 <xsl:attribute name="version" select="$KorAP-XML_version"/>
350
351 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
352 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
353 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
354 </xsl:apply-templates>
355 </xsl:element>
356 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100357 </xsl:result-document>
358 </xsl:template>
359
bansp3e5b20c2022-03-18 20:22:31 +0100360 <xsl:template match="tei:*" mode="morpho">
361 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
362 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
363 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
364 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
365 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
366 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
367 <xsl:variable name="preceding-count" select="count($preceding)"/>
368 <xsl:variable name="outside-preceding-count" as="xs:integer">
369 <xsl:choose>
370 <xsl:when test="self::tei:s or self::tei:p">
371 <xsl:choose>
372 <xsl:when test="$preceding-count">
373 <xsl:sequence select="
374 sum(for $p in $preceding
375 return
376 count($p/descendant::*))"/>
377 </xsl:when>
378 <xsl:otherwise>
379 <xsl:sequence select="0"/>
380 </xsl:otherwise>
381 </xsl:choose>
382 </xsl:when>
383 <xsl:otherwise>
384 <xsl:sequence select="0"/>
385 </xsl:otherwise>
386 </xsl:choose>
387 </xsl:variable>
388 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
389 as="xs:integer"/>
390
391 <xsl:variable name="start" as="xs:integer">
392 <xsl:choose>
393 <xsl:when test="self::tei:text or self::tei:body">
394 <xsl:sequence select="0"/>
395 </xsl:when>
396 <xsl:when test="self::tei:p">
397 <xsl:variable name="first_corresp"
398 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
399 as="attribute(corresp)"/>
400 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
401 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
402 </xsl:when>
403 <xsl:when test="self::tei:s">
404 <xsl:variable name="first_corresp"
405 select="descendant::tei:seg[1]/attribute::corresp"
406 as="attribute(corresp)"/>
407 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
408 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
409 </xsl:when>
410 <!--<xsl:when test="self::tei:seg">
411 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
412 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
413 </xsl:when>-->
414 </xsl:choose>
415 </xsl:variable>
416 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
417 </xsl:variable>
418
419 <xsl:apply-templates mode="morpho">
420 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
421 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
422 <xsl:with-param name="index" select="$my_index"/>
423 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
424 </xsl:apply-templates>
425 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100426
bansp3e5b20c2022-03-18 20:22:31 +0100427 <xsl:template match="tei:seg" mode="morpho">
428 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
429 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
430 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
431 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
432 <!-- I have made a major mess here, but it works... it's so spread out
433 because I wanted to make sure to be able to look up the individual
434 constituent values, should anything go wrong -->
435 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
436 <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
437 <xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
438 <xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
439 <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
440 <xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
441 <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
442 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
443 <xsl:variable name="preceding-count" select="count($preceding)"/>
banspe726b4a2022-03-28 05:47:45 +0200444 <!--<xsl:variable name="outside-preceding-count" as="xs:integer">
bansp3e5b20c2022-03-18 20:22:31 +0100445 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200446 <xsl:when test="self::tei:s or self::tei:p"> <!-\- THIS NEEDS TO BE REVISITED AFTER THIS TEMPLATE HAS BECOME MORE SPECIFIC -\->
bansp3e5b20c2022-03-18 20:22:31 +0100447 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200448 <xsl:when test="$preceding-count"> commented out for now
bansp3e5b20c2022-03-18 20:22:31 +0100449 <xsl:sequence select="
450 sum(for $p in $preceding
451 return
452 count($p/descendant::*))"/>
453 </xsl:when>
454 <xsl:otherwise>
455 <xsl:sequence select="0"/>
456 </xsl:otherwise>
457 </xsl:choose>
458 </xsl:when>
459 <xsl:otherwise>
460 <xsl:sequence select="0"/>
461 </xsl:otherwise>
462 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200463 </xsl:variable>-->
464 <xsl:variable name="my_index" select="$index + 1 + $preceding-count" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100465
466 <xsl:variable name="start" as="xs:integer">
467 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
468 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
469 </xsl:variable>
470 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
471 </xsl:variable>
472 <xsl:element name="span" namespace="{$KorAP_namespace}">
473 <xsl:attribute name="id" select="'s' || $my_index"/>
474 <xsl:attribute name="from" select="$start"/>
475 <xsl:attribute name="to" select="$end"/>
476 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
477 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
478 <xsl:attribute name="type" select="'lex'"/>
479 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
480 <xsl:attribute name="name" select="'lex'"/>
481 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
482 <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
483
484
485 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
486 <xsl:attribute name="name" select="'lemma'"/>
487 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
488 </xsl:element>
489 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
490 <xsl:attribute name="name" select="'pos'"/>
491 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
492 </xsl:element>
493 <xsl:if test="string-length($chosen-msd)">
494 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
495 <xsl:attribute name="name" select="'msd'"/>
496 <xsl:value-of select="$chosen-msd"/>
497 </xsl:element>
498 </xsl:if>
499 <xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
500 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
501 <xsl:attribute name="name" select="'join'"/>
502 <xsl:value-of select="'left'"/>
503 </xsl:element>
504 </xsl:if>
505 </xsl:element>
506 </xsl:element>
507 </xsl:element>
508 </xsl:element>
banspe726b4a2022-03-28 05:47:45 +0200509 <xsl:apply-templates mode="morpho">
bansp3e5b20c2022-03-18 20:22:31 +0100510 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
511 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
512 <xsl:with-param name="index" select="$my_index"/>
513 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200514 </xsl:apply-templates>-->
bansp3e5b20c2022-03-18 20:22:31 +0100515 </xsl:template>
banspe726b4a2022-03-28 05:47:45 +0200516
bansp5f841732022-03-16 06:27:31 +0100517 <!-- ************************** TEXT header ******************* -->
518
519 <xsl:template name="create_text_header">
520 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200521 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100522 <xsl:param name="target" as="xs:string"/>
523
524 <!-- create the local header.xml file -->
525 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
526 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
527
528 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
banspe726b4a2022-03-28 05:47:45 +0200529 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text">
530 <xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
531 </xsl:apply-templates>
bansp5f841732022-03-16 06:27:31 +0100532 </idsHeader>
533 </xsl:result-document>
534 </xsl:template>
535
536 <xsl:template match="tei:fileDesc" mode="text">
bansp9103aab2022-03-19 05:10:21 +0100537 <xsl:element name="{local-name()}">
bansp5f841732022-03-16 06:27:31 +0100538 <xsl:apply-templates mode="text"/>
bansp9103aab2022-03-19 05:10:21 +0100539 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100540 </xsl:template>
541
542 <xsl:template match="tei:title" mode="text">
543 <t.title>
544 <xsl:apply-templates/>
545 </t.title>
546 </xsl:template>
547
548 <xsl:template match="tei:titleStmt" mode="text">
banspe726b4a2022-03-28 05:47:45 +0200549 <xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
bansp5f841732022-03-16 06:27:31 +0100550 <titleStmt>
551 <textSigle>
banspe726b4a2022-03-28 05:47:45 +0200552 <xsl:value-of select="$compoundID"/>
bansp5f841732022-03-16 06:27:31 +0100553 </textSigle>
554 <xsl:apply-templates mode="text"/>
555 </titleStmt>
556 </xsl:template>
557
bansp9103aab2022-03-19 05:10:21 +0100558 <xsl:template match="tei:publicationStmt" mode="text">
559 <xsl:element name="{local-name()}">
560 <xsl:apply-templates mode="text"/>
561 </xsl:element>
562 </xsl:template>
563
564 <xsl:template match="tei:availability" mode="text">
565 <xsl:element name="{local-name()}">
566 <xsl:apply-templates mode="text" select="@* | *"/>
567 </xsl:element>
568 </xsl:template>
569
570 <xsl:template match="tei:profileDesc" mode="text">
571 <xsl:element name="{local-name()}">
572 <xsl:apply-templates mode="text"/>
573 </xsl:element>
574 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100575
bansp9103aab2022-03-19 05:10:21 +0100576 <xsl:template match="tei:textClass" mode="text">
577 <xsl:element name="{local-name()}">
578 <xsl:apply-templates mode="text" select="@* | *"/>
579 </xsl:element>
580 </xsl:template>
581
582 <xsl:template match="tei:catRef" mode="text corpus">
583 <xsl:element name="{local-name()}">
584 <xsl:apply-templates mode="text" select="@* | *"/>
585 </xsl:element>
586 </xsl:template>
587
588 <xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
589 <xsl:copy-of select="."/>
590 </xsl:template>
591
592 <xsl:template match="tei:p" mode="text corpus">
593 <xsl:element name="{local-name()}">
594 <xsl:apply-templates mode="header-text"/>
595 </xsl:element>
596 </xsl:template>
597
598
599 <!-- OPTIMIZATION has to take modes into account -->
bansp5e2d1c02022-03-10 04:51:40 +0100600 <!-- ************************** CORPUS header ******************* -->
601 <xsl:template name="create_corpus_header">
602 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100603 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100604
605 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100606 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100607
608 <!--doctype-public="{$publicDoctypeI5}"
609 doctype-system="{$systemDoctypeI5}">
610 these are, sadly, useless
611 -->
612
613 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
bansp9103aab2022-03-19 05:10:21 +0100614 <xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
bansp5e2d1c02022-03-10 04:51:40 +0100615 </idsHeader>
616 </xsl:result-document>
617 </xsl:template>
618
619 <xsl:template match="tei:fileDesc" mode="corpus">
bansp9103aab2022-03-19 05:10:21 +0100620 <xsl:element name="{local-name()}">
bansp5e2d1c02022-03-10 04:51:40 +0100621 <xsl:apply-templates mode="corpus"/>
bansp9103aab2022-03-19 05:10:21 +0100622 </xsl:element>
bansp5e2d1c02022-03-10 04:51:40 +0100623 </xsl:template>
bansp9103aab2022-03-19 05:10:21 +0100624
bansp5e2d1c02022-03-10 04:51:40 +0100625
626 <xsl:template match="tei:title" mode="corpus">
627 <c.title>
bansp9103aab2022-03-19 05:10:21 +0100628 <xsl:apply-templates mode="corpus" select="@*"/>
629 <xsl:apply-templates mode="header-text"/>
bansp5e2d1c02022-03-10 04:51:40 +0100630 </c.title>
631 </xsl:template>
632
633 <xsl:template match="tei:titleStmt" mode="corpus">
634 <titleStmt>
635 <korpusSigle>
636 <xsl:value-of select="$corpusID"/>
637 </korpusSigle>
638 <xsl:apply-templates mode="corpus"/>
639 </titleStmt>
640 </xsl:template>
641
bansp9103aab2022-03-19 05:10:21 +0100642 <xsl:template match="tei:publicationStmt" mode="corpus">
643 <xsl:element name="{local-name()}">
644 <xsl:apply-templates mode="corpus"/>
645 </xsl:element>
646 </xsl:template>
647
648 <xsl:template match="tei:availability" mode="corpus">
649 <xsl:element name="{local-name()}">
650 <xsl:apply-templates mode="corpus" select="@* | *"/>
651 </xsl:element>
652 </xsl:template>
653
654 <xsl:template match="tei:encodingDesc" mode="corpus">
655 <xsl:element name="{local-name()}">
656 <xsl:apply-templates mode="corpus"/>
657 </xsl:element>
658 </xsl:template>
659
660 <xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
661 <xsl:element name="{local-name()}">
662 <xsl:apply-templates mode="corpus" select="@* | *"/>
663 </xsl:element>
664 </xsl:template>
665
666 <xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
667 <xsl:element name="{local-name()}">
668 <xsl:apply-templates mode="corpus" select="@*"/>
669 <xsl:apply-templates mode="header-text"/>
670 </xsl:element>
671 </xsl:template>
672<!--
673 <xsl:template match="tei:textClass" mode="corpus">
674 <xsl:element name="{local-name()}">
675 <xsl:apply-templates mode="corpus" select="@* | *"/>
676 </xsl:element>
677 </xsl:template>
678
679 <xsl:template match="tei:catRef" mode="corpus">
680 <xsl:element name="{local-name()}">
681 <xsl:apply-templates mode="corpus" select="@* | *"/>
682 </xsl:element>
683 </xsl:template>
684-->
bansp5e2d1c02022-03-10 04:51:40 +0100685
686
687
688 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
689 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
690 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
691
692 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
693 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
694
695 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
696 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
697 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
698 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
699
700 -->
701 <!--<xsl:template name="test_full">
702 <xsl:param name="corpusID"/>
703 <xsl:param name="docID"/>
704 <xsl:param name="textID"/>
705 <xsl:call-template name="xsl:initial-template"/>
706 </xsl:template>-->
707
Akron9a8ee3e2022-01-31 13:51:49 +0100708</xsl:stylesheet>