blob: 4bfdb7ac1f27c8c697821c4b67f303199c7aff20 [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
5 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
6 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01007
banspe726b4a2022-03-28 05:47:45 +02008
9<!-- PARAMETERS -->
bansp5e2d1c02022-03-10 04:51:40 +010010
bansp8f6700b2022-03-27 05:27:09 +020011 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
12 <!-- the directory containing NKJP files, in the form of a collection of text-level dirs -->
Akron9a8ee3e2022-01-31 13:51:49 +010013
bansp8f6700b2022-03-27 05:27:09 +020014 <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
15
banspe726b4a2022-03-28 05:47:45 +020016<!-- VARIABLES -->
17
18 <xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
19 <xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
bansp8f6700b2022-03-27 05:27:09 +020020
21 <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +010022
23 <xsl:variable name="systemDoctypeI5"
bansp97ba7ce2022-03-26 05:14:06 +010024 select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" as="xs:string"
bansp5e2d1c02022-03-10 04:51:40 +010025 static="true"/>
26
bansp97ba7ce2022-03-26 05:14:06 +010027 <xsl:variable name="publicDoctypeI5" select="'-//IDS//DTD I5 1.0//EN'" as="xs:string"
bansp5e2d1c02022-03-10 04:51:40 +010028 static="true"/>
29
30 <xsl:variable name="KorAP_namespace" select="'http://ids-mannheim.de/ns/KorAP'" static="true"
31 as="xs:string"/>
32
bansp5f841732022-03-16 06:27:31 +010033 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
34 <!-- this is only a bit funny -->
35
banspe726b4a2022-03-28 05:47:45 +020036 <xsl:variable name="collection_params" as="xs:string" static="yes"
37 select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
38 />
39 <!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
40
41 <xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
42
43<!-- MODES -->
bansp5e2d1c02022-03-10 04:51:40 +010044
45 <xsl:mode name="corpus" on-no-match="deep-skip"/>
46 <xsl:mode name="text" on-no-match="deep-skip"/>
bansp9103aab2022-03-19 05:10:21 +010047 <xsl:mode name="header-text" on-no-match="text-only-copy"/>
bansp5e2d1c02022-03-10 04:51:40 +010048
banspe726b4a2022-03-28 05:47:45 +020049
50 <!-- FUNCTIONS -->
51
bansp5f841732022-03-16 06:27:31 +010052 <xsl:function name="f:compute_nesting" as="xs:integer">
53 <xsl:param name="node" as="node()"/>
54 <xsl:variable name="rel_depth"
55 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
56 as="xs:integer"/>
57<!-- I think my skills are lacking -->
58 <xsl:sequence select="$rel_depth"/>
59 </xsl:function>
60
61 <xsl:function name="f:calc_content_length" as="xs:integer">
62 <xsl:param name="node" as="node()"/>
63 <xsl:choose>
64 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
65 <xsl:variable name="last_corresp"
66 select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
67 as="attribute(corresp)"/>
68 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
69 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
70 </xsl:when>
71 <xsl:when test="$node/self::tei:p">
72 <xsl:variable name="last_corresp"
73 select="$node/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
74 as="attribute(corresp)"/>
75 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
76 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
77 </xsl:when>
78 <xsl:when test="$node/self::tei:s">
79 <xsl:variable name="last_corresp"
80 select="$node/descendant::tei:seg[last()]/attribute::corresp"
81 as="attribute(corresp)"/>
82 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
83 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
84 </xsl:when>
85 <xsl:otherwise>
86 <xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
87 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
88 </xsl:otherwise>
89 </xsl:choose>
90 </xsl:function>
bansp5e2d1c02022-03-10 04:51:40 +010091
bansp9103aab2022-03-19 05:10:21 +010092 <xsl:template match="@default" mode="#all"/>
bansp97ba7ce2022-03-26 05:14:06 +010093 <!-- this is to delete some auto-inserted attribute throughout -->
bansp9103aab2022-03-19 05:10:21 +010094
banspe726b4a2022-03-28 05:47:45 +020095 <xsl:template match="tei:w" mode="#all"/>
96<!-- NKJP-SGJP has apparently resigned from standoff representations by adding <w> everywhere;
97 for the time being, we'll just stick to the standoff offsets, although that may need to
98 be revisited as the NKJP format has now began to stray from its schemas and assumptions -->
bansp8f6700b2022-03-27 05:27:09 +020099
banspe726b4a2022-03-28 05:47:45 +0200100 <xsl:template match="tei:choice" mode="#all"/>
101<!-- THIS IS ONLY TEMPORARY,
102 because an interesting challenge came up where I will
103 probably have to abandon straightforward mapping because of TOKENIZATION alternatives;
104
105 but now, I just want this stylesheet to work, even if it eats some occasional token (which it now does, 'komuĊ›' and 'czym' vanish)
106 -->
bansp8f6700b2022-03-27 05:27:09 +0200107
bansp5e2d1c02022-03-10 04:51:40 +0100108 <xsl:template name="xsl:initial-template">
banspe726b4a2022-03-28 05:47:45 +0200109 <!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
bansp8f6700b2022-03-27 05:27:09 +0200110 because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
111 <xsl:call-template name="create_corpus_header">
banspe726b4a2022-03-28 05:47:45 +0200112 <xsl:with-param name="text.xml" select="$collection_of_text[1]" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200113 <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
114 </xsl:call-template>
115
banspe726b4a2022-03-28 05:47:45 +0200116 <xsl:for-each select="$collection_of_text">
117 <xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
118 <xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
119 <xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
120 <xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
121
122 <xsl:call-template name="process_single_sample">
123 <xsl:with-param name="text.xml" as="document-node()" select="."/>
124 <xsl:with-param name="ann_morphosyntax.xml" as="document-node()" select="doc($ann_morphosyntax.uri)"/>
125 <xsl:with-param name="ann_segmentation.xml" as="document-node()" select="doc($ann_segmentation.uri)"/>
126 <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
127 </xsl:call-template>
128 </xsl:for-each>
bansp8f6700b2022-03-27 05:27:09 +0200129 </xsl:template>
130
131 <xsl:template name="process_single_sample">
banspe726b4a2022-03-28 05:47:45 +0200132 <xsl:param name="text.xml" as="document-node()"/>
133 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
134 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
135 <xsl:param name="my_textID" as="xs:string" select="'0BAD_textID'"/>
136
137 <xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
138
139 <xsl:variable name="compoundID" as="xs:string"
140 select="$corpusID || '_' || $docID || '.' || $my_textID"/>
141 <!-- this is what occurs in the text and data layers as @docid -->
142
143
bansp5e2d1c02022-03-10 04:51:40 +0100144 <xsl:call-template name="create_data">
145 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200146 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
147 <xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100148 </xsl:call-template>
149
150 <xsl:call-template name="create_struct">
151 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200152 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100153 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
154 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200155 <xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
bansp5f841732022-03-16 06:27:31 +0100156 />
157 </xsl:call-template>
158
159 <xsl:call-template name="create_morpho">
160 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200161 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100162 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
163 as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100164 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
165 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200166 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100167 </xsl:call-template>
168
169 <xsl:call-template name="create_text_header">
170 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200171 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
172 <xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100173 </xsl:call-template>
174
bansp5e2d1c02022-03-10 04:51:40 +0100175 </xsl:template>
176
177 <!-- ************************** data.xml ******************* -->
178
179 <xsl:template name="create_data">
180 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200181 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100182 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100183 <!-- create the data.xml file -->
184 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100185 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100186
Akron9a8ee3e2022-01-31 13:51:49 +0100187 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100188 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100189 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100190 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
191 <xsl:attribute name="file" select="'metadata.xml'"/>
192 </xsl:element>
193
194 <xsl:element name="text" namespace="{$KorAP_namespace}">
banspf79443e2022-02-25 14:25:33 +0100195 <xsl:value-of select="$text.xml//*[local-name() = 'ab']"/>
bansp5e2d1c02022-03-10 04:51:40 +0100196 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100197 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100198 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100199 </xsl:template>
200
bansp5f841732022-03-16 06:27:31 +0100201 <!-- ************************** struct ******************* -->
202
203 <xsl:template name="create_struct">
204 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200205 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100206 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
207 <xsl:param name="target" as="xs:string"/>
208
209 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
210 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
211 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
212 <xsl:element name="layer" namespace="{$KorAP_namespace}">
213 <xsl:attribute name="docid" select="$compoundID"/>
214 <xsl:attribute name="version" select="$KorAP-XML_version"/>
215
216 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
217 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>
218 </xsl:element>
219 </xsl:element>
220 </xsl:result-document>
221 </xsl:template>
222
223 <xsl:template match="tei:*" mode="struct">
224 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
225 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
226 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
227 <!-- I have made a major mess here, but it works... it's so spread out
228 because I wanted to make sure to be able to look up the individual
bansp3e5b20c2022-03-18 20:22:31 +0100229 constituent values, should anything go wrong; optimization will come when it's worked against a larger dataset -->
bansp5f841732022-03-16 06:27:31 +0100230 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
231 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
232 <xsl:variable name="preceding-count" select="count($preceding)"/>
233 <xsl:variable name="outside-preceding-count" as="xs:integer">
234 <xsl:choose>
235 <xsl:when test="self::tei:s or self::tei:p">
236 <xsl:choose>
237 <xsl:when test="$preceding-count">
238 <xsl:sequence select="
239 sum(for $p in $preceding
240 return
241 count($p/descendant::*))"/>
242 </xsl:when>
243 <xsl:otherwise>
244 <xsl:sequence select="0"/>
245 </xsl:otherwise>
246 </xsl:choose>
247 </xsl:when>
248 <xsl:otherwise>
249 <xsl:sequence select="0"/>
250 </xsl:otherwise>
251 </xsl:choose>
252 </xsl:variable>
253 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
254 as="xs:integer"/>
255
256 <xsl:variable name="start" as="xs:integer">
257 <xsl:choose>
258 <xsl:when test="self::tei:text or self::tei:body">
259 <xsl:sequence select="0"/>
260 </xsl:when>
261 <xsl:when test="self::tei:p">
262 <xsl:variable name="first_corresp"
263 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
264 as="attribute(corresp)"/>
265 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
266 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
267 </xsl:when>
268 <xsl:when test="self::tei:s">
269 <xsl:variable name="first_corresp"
270 select="descendant::tei:seg[1]/attribute::corresp"
271 as="attribute(corresp)"/>
272 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
273 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
274 </xsl:when>
275 <xsl:when test="self::tei:seg">
276 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
277 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
278 </xsl:when>
279 </xsl:choose>
280 </xsl:variable>
281 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
282 </xsl:variable>
bansp3e5b20c2022-03-18 20:22:31 +0100283
bansp5f841732022-03-16 06:27:31 +0100284 <xsl:element name="span" namespace="{$KorAP_namespace}">
285 <xsl:attribute name="id" select="'s' || $my_index"/>
286 <xsl:attribute name="from" select="$start"/>
287 <xsl:attribute name="to" select="$end"/>
288 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
289 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100290 <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX -->
bansp5f841732022-03-16 06:27:31 +0100291 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100292 <xsl:attribute name="name" select="'name'"/>
293 <xsl:value-of select="local-name()"/>
bansp5f841732022-03-16 06:27:31 +0100294 </xsl:element>
295 <xsl:if test="count(@*)">
296 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
297 <xsl:attribute name="name" select="'attr'"/>
298 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
299 <xsl:attribute name="type" select="'attr'"/>
300 <xsl:for-each select="@*">
301 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
302 <xsl:attribute name="name" select="local-name(.)"/>
303 <xsl:value-of select="."/>
304 </xsl:element>
305 </xsl:for-each>
306 </xsl:element>
307 </xsl:element>
308 </xsl:if>
309 </xsl:element>
310 </xsl:element>
311 <xsl:apply-templates mode="struct">
312 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
313 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
314 <xsl:with-param name="index" select="$my_index"/>
315 </xsl:apply-templates>
316 </xsl:template>
317
318 <!-- ************************** morpho ******************* -->
319
320 <xsl:template name="create_morpho">
321 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200322 <xsl:param name="compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100323 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100324 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
325 <xsl:param name="target" as="xs:string"/>
326
327 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
328 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
329 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp3e5b20c2022-03-18 20:22:31 +0100330 <xsl:element name="layer" namespace="{$KorAP_namespace}">
331 <xsl:attribute name="docid" select="$compoundID"/>
332 <xsl:attribute name="version" select="$KorAP-XML_version"/>
333
334 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
335 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
336 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
337 </xsl:apply-templates>
338 </xsl:element>
339 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100340 </xsl:result-document>
341 </xsl:template>
342
bansp3e5b20c2022-03-18 20:22:31 +0100343 <xsl:template match="tei:*" mode="morpho">
344 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
345 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
346 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
347 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
348 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
349 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
350 <xsl:variable name="preceding-count" select="count($preceding)"/>
351 <xsl:variable name="outside-preceding-count" as="xs:integer">
352 <xsl:choose>
353 <xsl:when test="self::tei:s or self::tei:p">
354 <xsl:choose>
355 <xsl:when test="$preceding-count">
356 <xsl:sequence select="
357 sum(for $p in $preceding
358 return
359 count($p/descendant::*))"/>
360 </xsl:when>
361 <xsl:otherwise>
362 <xsl:sequence select="0"/>
363 </xsl:otherwise>
364 </xsl:choose>
365 </xsl:when>
366 <xsl:otherwise>
367 <xsl:sequence select="0"/>
368 </xsl:otherwise>
369 </xsl:choose>
370 </xsl:variable>
371 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
372 as="xs:integer"/>
373
374 <xsl:variable name="start" as="xs:integer">
375 <xsl:choose>
376 <xsl:when test="self::tei:text or self::tei:body">
377 <xsl:sequence select="0"/>
378 </xsl:when>
379 <xsl:when test="self::tei:p">
380 <xsl:variable name="first_corresp"
381 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
382 as="attribute(corresp)"/>
383 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
384 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
385 </xsl:when>
386 <xsl:when test="self::tei:s">
387 <xsl:variable name="first_corresp"
388 select="descendant::tei:seg[1]/attribute::corresp"
389 as="attribute(corresp)"/>
390 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
391 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
392 </xsl:when>
393 <!--<xsl:when test="self::tei:seg">
394 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
395 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
396 </xsl:when>-->
397 </xsl:choose>
398 </xsl:variable>
399 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
400 </xsl:variable>
401
402 <xsl:apply-templates mode="morpho">
403 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
404 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
405 <xsl:with-param name="index" select="$my_index"/>
406 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
407 </xsl:apply-templates>
408 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100409
bansp3e5b20c2022-03-18 20:22:31 +0100410 <xsl:template match="tei:seg" mode="morpho">
411 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
412 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
413 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
414 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
415 <!-- I have made a major mess here, but it works... it's so spread out
416 because I wanted to make sure to be able to look up the individual
417 constituent values, should anything go wrong -->
418 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
419 <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
420 <xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
421 <xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
422 <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
423 <xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
424 <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
425 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
426 <xsl:variable name="preceding-count" select="count($preceding)"/>
banspe726b4a2022-03-28 05:47:45 +0200427 <!--<xsl:variable name="outside-preceding-count" as="xs:integer">
bansp3e5b20c2022-03-18 20:22:31 +0100428 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200429 <xsl:when test="self::tei:s or self::tei:p"> <!-\- THIS NEEDS TO BE REVISITED AFTER THIS TEMPLATE HAS BECOME MORE SPECIFIC -\->
bansp3e5b20c2022-03-18 20:22:31 +0100430 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200431 <xsl:when test="$preceding-count"> commented out for now
bansp3e5b20c2022-03-18 20:22:31 +0100432 <xsl:sequence select="
433 sum(for $p in $preceding
434 return
435 count($p/descendant::*))"/>
436 </xsl:when>
437 <xsl:otherwise>
438 <xsl:sequence select="0"/>
439 </xsl:otherwise>
440 </xsl:choose>
441 </xsl:when>
442 <xsl:otherwise>
443 <xsl:sequence select="0"/>
444 </xsl:otherwise>
445 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200446 </xsl:variable>-->
447 <xsl:variable name="my_index" select="$index + 1 + $preceding-count" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100448
449 <xsl:variable name="start" as="xs:integer">
450 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
451 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
452 </xsl:variable>
453 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
454 </xsl:variable>
455 <xsl:element name="span" namespace="{$KorAP_namespace}">
456 <xsl:attribute name="id" select="'s' || $my_index"/>
457 <xsl:attribute name="from" select="$start"/>
458 <xsl:attribute name="to" select="$end"/>
459 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
460 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
461 <xsl:attribute name="type" select="'lex'"/>
462 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
463 <xsl:attribute name="name" select="'lex'"/>
464 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
465 <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
466
467
468 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
469 <xsl:attribute name="name" select="'lemma'"/>
470 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
471 </xsl:element>
472 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
473 <xsl:attribute name="name" select="'pos'"/>
474 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
475 </xsl:element>
476 <xsl:if test="string-length($chosen-msd)">
477 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
478 <xsl:attribute name="name" select="'msd'"/>
479 <xsl:value-of select="$chosen-msd"/>
480 </xsl:element>
481 </xsl:if>
482 <xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
483 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
484 <xsl:attribute name="name" select="'join'"/>
485 <xsl:value-of select="'left'"/>
486 </xsl:element>
487 </xsl:if>
488 </xsl:element>
489 </xsl:element>
490 </xsl:element>
491 </xsl:element>
banspe726b4a2022-03-28 05:47:45 +0200492 <xsl:apply-templates mode="morpho">
bansp3e5b20c2022-03-18 20:22:31 +0100493 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
494 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
495 <xsl:with-param name="index" select="$my_index"/>
496 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200497 </xsl:apply-templates>-->
bansp3e5b20c2022-03-18 20:22:31 +0100498 </xsl:template>
banspe726b4a2022-03-28 05:47:45 +0200499
bansp5f841732022-03-16 06:27:31 +0100500 <!-- ************************** TEXT header ******************* -->
501
502 <xsl:template name="create_text_header">
503 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200504 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100505 <xsl:param name="target" as="xs:string"/>
506
507 <!-- create the local header.xml file -->
508 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
509 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
510
511 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
banspe726b4a2022-03-28 05:47:45 +0200512 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text">
513 <xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
514 </xsl:apply-templates>
bansp5f841732022-03-16 06:27:31 +0100515 </idsHeader>
516 </xsl:result-document>
517 </xsl:template>
518
519 <xsl:template match="tei:fileDesc" mode="text">
bansp9103aab2022-03-19 05:10:21 +0100520 <xsl:element name="{local-name()}">
bansp5f841732022-03-16 06:27:31 +0100521 <xsl:apply-templates mode="text"/>
bansp9103aab2022-03-19 05:10:21 +0100522 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100523 </xsl:template>
524
525 <xsl:template match="tei:title" mode="text">
526 <t.title>
527 <xsl:apply-templates/>
528 </t.title>
529 </xsl:template>
530
531 <xsl:template match="tei:titleStmt" mode="text">
banspe726b4a2022-03-28 05:47:45 +0200532 <xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
bansp5f841732022-03-16 06:27:31 +0100533 <titleStmt>
534 <textSigle>
banspe726b4a2022-03-28 05:47:45 +0200535 <xsl:value-of select="$compoundID"/>
bansp5f841732022-03-16 06:27:31 +0100536 </textSigle>
537 <xsl:apply-templates mode="text"/>
538 </titleStmt>
539 </xsl:template>
540
bansp9103aab2022-03-19 05:10:21 +0100541 <xsl:template match="tei:publicationStmt" mode="text">
542 <xsl:element name="{local-name()}">
543 <xsl:apply-templates mode="text"/>
544 </xsl:element>
545 </xsl:template>
546
547 <xsl:template match="tei:availability" mode="text">
548 <xsl:element name="{local-name()}">
549 <xsl:apply-templates mode="text" select="@* | *"/>
550 </xsl:element>
551 </xsl:template>
552
553 <xsl:template match="tei:profileDesc" mode="text">
554 <xsl:element name="{local-name()}">
555 <xsl:apply-templates mode="text"/>
556 </xsl:element>
557 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100558
bansp9103aab2022-03-19 05:10:21 +0100559 <xsl:template match="tei:textClass" mode="text">
560 <xsl:element name="{local-name()}">
561 <xsl:apply-templates mode="text" select="@* | *"/>
562 </xsl:element>
563 </xsl:template>
564
565 <xsl:template match="tei:catRef" mode="text corpus">
566 <xsl:element name="{local-name()}">
567 <xsl:apply-templates mode="text" select="@* | *"/>
568 </xsl:element>
569 </xsl:template>
570
571 <xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
572 <xsl:copy-of select="."/>
573 </xsl:template>
574
575 <xsl:template match="tei:p" mode="text corpus">
576 <xsl:element name="{local-name()}">
577 <xsl:apply-templates mode="header-text"/>
578 </xsl:element>
579 </xsl:template>
580
581
582 <!-- OPTIMIZATION has to take modes into account -->
bansp5e2d1c02022-03-10 04:51:40 +0100583 <!-- ************************** CORPUS header ******************* -->
584 <xsl:template name="create_corpus_header">
585 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100586 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100587
588 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100589 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100590
591 <!--doctype-public="{$publicDoctypeI5}"
592 doctype-system="{$systemDoctypeI5}">
593 these are, sadly, useless
594 -->
595
596 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
bansp9103aab2022-03-19 05:10:21 +0100597 <xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
bansp5e2d1c02022-03-10 04:51:40 +0100598 </idsHeader>
599 </xsl:result-document>
600 </xsl:template>
601
602 <xsl:template match="tei:fileDesc" mode="corpus">
bansp9103aab2022-03-19 05:10:21 +0100603 <xsl:element name="{local-name()}">
bansp5e2d1c02022-03-10 04:51:40 +0100604 <xsl:apply-templates mode="corpus"/>
bansp9103aab2022-03-19 05:10:21 +0100605 </xsl:element>
bansp5e2d1c02022-03-10 04:51:40 +0100606 </xsl:template>
bansp9103aab2022-03-19 05:10:21 +0100607
bansp5e2d1c02022-03-10 04:51:40 +0100608
609 <xsl:template match="tei:title" mode="corpus">
610 <c.title>
bansp9103aab2022-03-19 05:10:21 +0100611 <xsl:apply-templates mode="corpus" select="@*"/>
612 <xsl:apply-templates mode="header-text"/>
bansp5e2d1c02022-03-10 04:51:40 +0100613 </c.title>
614 </xsl:template>
615
616 <xsl:template match="tei:titleStmt" mode="corpus">
617 <titleStmt>
618 <korpusSigle>
619 <xsl:value-of select="$corpusID"/>
620 </korpusSigle>
621 <xsl:apply-templates mode="corpus"/>
622 </titleStmt>
623 </xsl:template>
624
bansp9103aab2022-03-19 05:10:21 +0100625 <xsl:template match="tei:publicationStmt" mode="corpus">
626 <xsl:element name="{local-name()}">
627 <xsl:apply-templates mode="corpus"/>
628 </xsl:element>
629 </xsl:template>
630
631 <xsl:template match="tei:availability" mode="corpus">
632 <xsl:element name="{local-name()}">
633 <xsl:apply-templates mode="corpus" select="@* | *"/>
634 </xsl:element>
635 </xsl:template>
636
637 <xsl:template match="tei:encodingDesc" mode="corpus">
638 <xsl:element name="{local-name()}">
639 <xsl:apply-templates mode="corpus"/>
640 </xsl:element>
641 </xsl:template>
642
643 <xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
644 <xsl:element name="{local-name()}">
645 <xsl:apply-templates mode="corpus" select="@* | *"/>
646 </xsl:element>
647 </xsl:template>
648
649 <xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
650 <xsl:element name="{local-name()}">
651 <xsl:apply-templates mode="corpus" select="@*"/>
652 <xsl:apply-templates mode="header-text"/>
653 </xsl:element>
654 </xsl:template>
655<!--
656 <xsl:template match="tei:textClass" mode="corpus">
657 <xsl:element name="{local-name()}">
658 <xsl:apply-templates mode="corpus" select="@* | *"/>
659 </xsl:element>
660 </xsl:template>
661
662 <xsl:template match="tei:catRef" mode="corpus">
663 <xsl:element name="{local-name()}">
664 <xsl:apply-templates mode="corpus" select="@* | *"/>
665 </xsl:element>
666 </xsl:template>
667-->
bansp5e2d1c02022-03-10 04:51:40 +0100668
669
670
671 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
672 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
673 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
674
675 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
676 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
677
678 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
679 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
680 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
681 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
682
683 -->
684 <!--<xsl:template name="test_full">
685 <xsl:param name="corpusID"/>
686 <xsl:param name="docID"/>
687 <xsl:param name="textID"/>
688 <xsl:call-template name="xsl:initial-template"/>
689 </xsl:template>-->
690
Akron9a8ee3e2022-01-31 13:51:49 +0100691</xsl:stylesheet>