blob: 46b4bb625474e25f1be556bb9ba040c646f2478f [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
5 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
6 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01007
bansp5e2d1c02022-03-10 04:51:40 +01008 <xsl:param name="corpusID" as="xs:string"/>
9 <xsl:param name="docID" as="xs:string"/>
10 <xsl:param name="textID" as="xs:string"/>
11
12 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample1/KOT/'" as="xs:string"/>
banspf79443e2022-02-25 14:25:33 +010013 <!-- the select is a fallback, so that it's clear to the reviewer what I'm passing in the param -->
Akron9a8ee3e2022-01-31 13:51:49 +010014
bansp5e2d1c02022-03-10 04:51:40 +010015 <xsl:variable name="targetTextDir"
16 select="'test/output/' || $corpusID || '/' || $docID || '/' || $textID || '/'" as="xs:string"/>
17 <xsl:variable name="targetCorpusDir" select="'test/output/' || $corpusID || '/'" as="xs:string"/>
18
19 <xsl:variable name="systemDoctypeI5"
20 select="'http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd'" as="xs:string"
21 static="true"/>
22
23 <xsl:variable name="publicDoctypeI5" select="'-//IDS//DTD IDS-XCES 1.0//EN'" as="xs:string"
24 static="true"/>
25
26 <xsl:variable name="KorAP_namespace" select="'http://ids-mannheim.de/ns/KorAP'" static="true"
27 as="xs:string"/>
28
bansp5f841732022-03-16 06:27:31 +010029 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
30 <!-- this is only a bit funny -->
31
32 <xsl:variable name="compoundID" as="xs:string"
33 select="$corpusID || '_' || $docID || '.' || $textID"/>
34 <!-- this is what occurs in the text and data layers as @docid -->
bansp5e2d1c02022-03-10 04:51:40 +010035
36 <xsl:mode name="corpus" on-no-match="deep-skip"/>
37 <xsl:mode name="text" on-no-match="deep-skip"/>
38
bansp5f841732022-03-16 06:27:31 +010039 <!-- <xsl:variable name="text_depth" as="xs:integer" select="xs:integer('2')" static="true"/>
40 <!-\- this magic number indicates the depth of the <TEI> element inside teiCorpus/TEI -\->
41-->
42 <xsl:function name="f:compute_nesting" as="xs:integer">
43 <xsl:param name="node" as="node()"/>
44 <xsl:variable name="rel_depth"
45 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
46 as="xs:integer"/>
47<!-- I think my skills are lacking -->
48 <xsl:sequence select="$rel_depth"/>
49 </xsl:function>
50
51 <xsl:function name="f:calc_content_length" as="xs:integer">
52 <xsl:param name="node" as="node()"/>
53 <xsl:choose>
54 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
55 <xsl:variable name="last_corresp"
56 select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
57 as="attribute(corresp)"/>
58 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
59 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
60 </xsl:when>
61 <xsl:when test="$node/self::tei:p">
62 <xsl:variable name="last_corresp"
63 select="$node/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
64 as="attribute(corresp)"/>
65 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
66 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
67 </xsl:when>
68 <xsl:when test="$node/self::tei:s">
69 <xsl:variable name="last_corresp"
70 select="$node/descendant::tei:seg[last()]/attribute::corresp"
71 as="attribute(corresp)"/>
72 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
73 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
74 </xsl:when>
75 <xsl:otherwise>
76 <xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
77 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
78 </xsl:otherwise>
79 </xsl:choose>
80 </xsl:function>
bansp5e2d1c02022-03-10 04:51:40 +010081
82 <xsl:template name="xsl:initial-template">
83 <xsl:variable name="text.xml" as="document-node()" select="doc($sourceDir || 'text.xml')"/>
bansp5f841732022-03-16 06:27:31 +010084 <xsl:variable name="ann_morphosyntax.xml" as="document-node()"
85 select="doc($sourceDir || 'ann_morphosyntax.xml')"/>
86 <xsl:variable name="ann_segmentation.xml" as="document-node()"
87 select="doc($sourceDir || 'ann_segmentation.xml')"/>
bansp5e2d1c02022-03-10 04:51:40 +010088
89 <xsl:call-template name="create_data">
90 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +010091 <xsl:with-param name="target" select="$targetTextDir || 'data.xml'" as="xs:string"/>
92 </xsl:call-template>
93
94 <xsl:call-template name="create_struct">
95 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
96 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
97 as="document-node()"/>
98 <xsl:with-param name="target" select="$targetTextDir || 'struct/structure.xml'" as="xs:string"
99 />
100 </xsl:call-template>
101
102 <xsl:call-template name="create_morpho">
103 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
104 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
105 as="document-node()"/>
106 <xsl:with-param name="target" select="$targetTextDir || 'nkjp/morpho.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100107 </xsl:call-template>
108
109 <xsl:call-template name="create_text_header">
110 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100111 <xsl:with-param name="target" select="$targetTextDir || 'header.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100112 </xsl:call-template>
113
114 <xsl:call-template name="create_corpus_header">
115 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100116 <xsl:with-param name="target" select="$targetCorpusDir || 'header.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100117 </xsl:call-template>
118 </xsl:template>
119
120 <!-- ************************** data.xml ******************* -->
121
122 <xsl:template name="create_data">
123 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100124 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100125 <!-- create the data.xml file -->
126 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100127 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100128
Akron9a8ee3e2022-01-31 13:51:49 +0100129 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100130 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100131 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100132 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
133 <xsl:attribute name="file" select="'metadata.xml'"/>
134 </xsl:element>
135
136 <xsl:element name="text" namespace="{$KorAP_namespace}">
banspf79443e2022-02-25 14:25:33 +0100137 <xsl:value-of select="$text.xml//*[local-name() = 'ab']"/>
bansp5e2d1c02022-03-10 04:51:40 +0100138 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100139 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100140 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100141 </xsl:template>
142
bansp5f841732022-03-16 06:27:31 +0100143 <!-- ************************** struct ******************* -->
144
145 <xsl:template name="create_struct">
146 <xsl:param name="text.xml" as="document-node()"/>
147 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
148 <xsl:param name="target" as="xs:string"/>
149
150 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
151 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
152 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
153 <xsl:element name="layer" namespace="{$KorAP_namespace}">
154 <xsl:attribute name="docid" select="$compoundID"/>
155 <xsl:attribute name="version" select="$KorAP-XML_version"/>
156
157 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
158 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>
159 </xsl:element>
160 </xsl:element>
161 </xsl:result-document>
162 </xsl:template>
163
164 <xsl:template match="tei:*" mode="struct">
165 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
166 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
167 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
168 <!-- I have made a major mess here, but it works... it's so spread out
169 because I wanted to make sure to be able to look up the individual
170 constituent values, should anything go wrong -->
171 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
172 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
173 <xsl:variable name="preceding-count" select="count($preceding)"/>
174 <xsl:variable name="outside-preceding-count" as="xs:integer">
175 <xsl:choose>
176 <xsl:when test="self::tei:s or self::tei:p">
177 <xsl:choose>
178 <xsl:when test="$preceding-count">
179 <xsl:sequence select="
180 sum(for $p in $preceding
181 return
182 count($p/descendant::*))"/>
183 </xsl:when>
184 <xsl:otherwise>
185 <xsl:sequence select="0"/>
186 </xsl:otherwise>
187 </xsl:choose>
188 </xsl:when>
189 <xsl:otherwise>
190 <xsl:sequence select="0"/>
191 </xsl:otherwise>
192 </xsl:choose>
193 </xsl:variable>
194 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
195 as="xs:integer"/>
196
197 <xsl:variable name="start" as="xs:integer">
198 <xsl:choose>
199 <xsl:when test="self::tei:text or self::tei:body">
200 <xsl:sequence select="0"/>
201 </xsl:when>
202 <xsl:when test="self::tei:p">
203 <xsl:variable name="first_corresp"
204 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
205 as="attribute(corresp)"/>
206 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
207 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
208 </xsl:when>
209 <xsl:when test="self::tei:s">
210 <xsl:variable name="first_corresp"
211 select="descendant::tei:seg[1]/attribute::corresp"
212 as="attribute(corresp)"/>
213 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
214 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
215 </xsl:when>
216 <xsl:when test="self::tei:seg">
217 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
218 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
219 </xsl:when>
220 </xsl:choose>
221 </xsl:variable>
222 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
223 </xsl:variable>
224 <xsl:element name="span" namespace="{$KorAP_namespace}">
225 <xsl:attribute name="id" select="'s' || $my_index"/>
226 <xsl:attribute name="from" select="$start"/>
227 <xsl:attribute name="to" select="$end"/>
228 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
229 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
230 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
231 <xsl:attribute name="name" select="local-name()"/>
232 </xsl:element>
233 <xsl:if test="count(@*)">
234 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
235 <xsl:attribute name="name" select="'attr'"/>
236 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
237 <xsl:attribute name="type" select="'attr'"/>
238 <xsl:for-each select="@*">
239 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
240 <xsl:attribute name="name" select="local-name(.)"/>
241 <xsl:value-of select="."/>
242 </xsl:element>
243 </xsl:for-each>
244 </xsl:element>
245 </xsl:element>
246 </xsl:if>
247 </xsl:element>
248 </xsl:element>
249 <xsl:apply-templates mode="struct">
250 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
251 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
252 <xsl:with-param name="index" select="$my_index"/>
253 </xsl:apply-templates>
254 </xsl:template>
255
256 <!-- ************************** morpho ******************* -->
257
258 <xsl:template name="create_morpho">
259 <xsl:param name="text.xml" as="document-node()"/>
260 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
261 <xsl:param name="target" as="xs:string"/>
262
263 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
264 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
265 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
266
267 </xsl:result-document>
268 </xsl:template>
269
270
271 <!-- ************************** TEXT header ******************* -->
272
273 <xsl:template name="create_text_header">
274 <xsl:param name="text.xml" as="document-node()"/>
275 <xsl:param name="target" as="xs:string"/>
276
277 <!-- create the local header.xml file -->
278 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
279 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
280
281 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
282 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc" mode="text"/>
283 </idsHeader>
284 </xsl:result-document>
285 </xsl:template>
286
287 <xsl:template match="tei:fileDesc" mode="text">
288 <fileDesc>
289 <xsl:apply-templates mode="text"/>
290 </fileDesc>
291 </xsl:template>
292
293 <xsl:template match="tei:title" mode="text">
294 <t.title>
295 <xsl:apply-templates/>
296 </t.title>
297 </xsl:template>
298
299 <xsl:template match="tei:titleStmt" mode="text">
300 <titleStmt>
301 <textSigle>
302 <xsl:value-of select="$corpusID || '/' || $textID"/>
303 </textSigle>
304 <xsl:apply-templates mode="text"/>
305 </titleStmt>
306 </xsl:template>
307
308
bansp5e2d1c02022-03-10 04:51:40 +0100309 <!-- ************************** CORPUS header ******************* -->
310 <xsl:template name="create_corpus_header">
311 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100312 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100313
314 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100315 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100316
317 <!--doctype-public="{$publicDoctypeI5}"
318 doctype-system="{$systemDoctypeI5}">
319 these are, sadly, useless
320 -->
321
322 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
323 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc" mode="corpus"/>
324 </idsHeader>
325 </xsl:result-document>
326 </xsl:template>
327
328 <xsl:template match="tei:fileDesc" mode="corpus">
329 <fileDesc>
330 <xsl:apply-templates mode="corpus"/>
331 </fileDesc>
332 </xsl:template>
333
334 <xsl:template match="tei:title" mode="corpus">
335 <c.title>
336 <xsl:apply-templates/>
337 </c.title>
338 </xsl:template>
339
340 <xsl:template match="tei:titleStmt" mode="corpus">
341 <titleStmt>
342 <korpusSigle>
343 <xsl:value-of select="$corpusID"/>
344 </korpusSigle>
345 <xsl:apply-templates mode="corpus"/>
346 </titleStmt>
347 </xsl:template>
348
349
bansp5e2d1c02022-03-10 04:51:40 +0100350
351
352
353 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
354 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
355 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
356
357 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
358 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
359
360 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
361 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
362 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
363 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
364
365 -->
366 <!--<xsl:template name="test_full">
367 <xsl:param name="corpusID"/>
368 <xsl:param name="docID"/>
369 <xsl:param name="textID"/>
370 <xsl:call-template name="xsl:initial-template"/>
371 </xsl:template>-->
372
Akron9a8ee3e2022-01-31 13:51:49 +0100373</xsl:stylesheet>