blob: d10ea89f17ae3ed26140cfc25e55059d5d4c3a6b [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
Piotr Banskif8af3a92022-05-23 03:20:10 +02005 xmlns:fn="http://www.w3.org/2005/xpath-functions"
Piotr Banski6a4a2522022-05-24 01:16:47 +02006 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f fn map nkjp tei"
bansp5e2d1c02022-03-10 04:51:40 +01007 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01008
banspe726b4a2022-03-28 05:47:45 +02009
10<!-- PARAMETERS -->
bansp5e2d1c02022-03-10 04:51:40 +010011
bansp8f6700b2022-03-27 05:27:09 +020012 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020013 <!-- the directory containing NKJP files, in the form of a collection of text-level dirs
14 (that is how we know both the $corpusID and the $docID) -->
Akron9a8ee3e2022-01-31 13:51:49 +010015
bansp8f6700b2022-03-27 05:27:09 +020016 <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020017 <!-- where the corpus/document/text/annotations hierarchy is going to be created -->
banspf2b24e62022-03-28 18:12:08 +020018
19 <xsl:param name="skip_docID" as="xs:string">
banspb5992532022-03-29 15:55:44 +020020 <xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>
21 </xsl:param>
22 <!-- comma-separated list of document IDs to be skipped from processing
banspf2b24e62022-03-28 18:12:08 +020023 example: HellerPodgladanie,KOT
banspd1bf1db2022-04-04 02:16:24 +020024 no functionality beyond string identity is supported
25 (this is just for testing) -->
banspb5992532022-03-29 15:55:44 +020026
Piotr Banski09096ee2022-05-25 13:41:03 +020027 <xsl:param name="SHOW_ORTH_IN_STRUCT" as="xs:boolean" select="true()"/>
28 <!-- for debugging structure.xml production -->
29
bansp8f6700b2022-03-27 05:27:09 +020030
bansp9dc10002022-05-17 22:33:34 +020031<!-- VARIABLES (= constants...) -->
banspe726b4a2022-03-28 05:47:45 +020032
33 <xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
34 <xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
bansp8f6700b2022-03-27 05:27:09 +020035
36 <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +010037
banspd1bf1db2022-04-04 02:16:24 +020038 <xsl:variable name="systemDoctypeI5" as="xs:string"
39 select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" static="true"/>
bansp5e2d1c02022-03-10 04:51:40 +010040
banspd1bf1db2022-04-04 02:16:24 +020041 <xsl:variable name="publicDoctypeI5" as="xs:string" static="true"
42 select="'-//IDS//DTD I5 1.0//EN'"/>
bansp5e2d1c02022-03-10 04:51:40 +010043
banspd1bf1db2022-04-04 02:16:24 +020044 <xsl:variable name="KorAP_namespace" static="true" as="xs:string"
45 select="'http://ids-mannheim.de/ns/KorAP'"/>
bansp5e2d1c02022-03-10 04:51:40 +010046
bansp5f841732022-03-16 06:27:31 +010047 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
48 <!-- this is only a bit funny -->
49
banspe726b4a2022-03-28 05:47:45 +020050 <xsl:variable name="collection_params" as="xs:string" static="yes"
51 select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
52 />
53 <!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
54
55 <xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
banspd1bf1db2022-04-04 02:16:24 +020056
banspe726b4a2022-03-28 05:47:45 +020057<!-- MODES -->
bansp5e2d1c02022-03-10 04:51:40 +010058
59 <xsl:mode name="corpus" on-no-match="deep-skip"/>
60 <xsl:mode name="text" on-no-match="deep-skip"/>
bansp9103aab2022-03-19 05:10:21 +010061 <xsl:mode name="header-text" on-no-match="text-only-copy"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +020062 <xsl:mode use-accumulators="#all"/>
Piotr Banski09096ee2022-05-25 13:41:03 +020063
64 <xsl:accumulator name="element-index" as="xs:integer" initial-value="0">
65 <xsl:accumulator-rule match="tei:*[ancestor-or-self::tei:text]" select="$value + 1" phase="start"/>
66 </xsl:accumulator>
Piotr Banskifdc858a2022-05-25 02:40:32 +020067
68 <xsl:accumulator name="elem-offset-seq" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
69
70 <xsl:accumulator-rule match="tei:body/tei:p" phase="start">
71 <xsl:variable name="preceding_index" as="xs:integer">
72 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
73 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +020074 </xsl:variable>
75
Piotr Banskifdc858a2022-05-25 02:40:32 +020076 <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + 1"/>
77 <!-- for paragraphs, it's in either being initial or not -->
Piotr Banski09096ee2022-05-25 13:41:03 +020078
Piotr Banskifdc858a2022-05-25 02:40:32 +020079 <xsl:sequence select="
80 $value,
81 map {
82 string(@xml:id): ($preceding_index,$our_base)
83 }"/>
84 </xsl:accumulator-rule>
85
86 <xsl:accumulator-rule match="tei:s" phase="start">
87 <xsl:variable name="preceding_index" as="xs:integer">
88 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
89 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +020090 </xsl:variable>
Piotr Banskifdc858a2022-05-25 02:40:32 +020091 <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + + xs:integer(f:is_preceded_by_ws(.,true()))"/>
Piotr Banski09096ee2022-05-25 13:41:03 +020092
Piotr Banskifdc858a2022-05-25 02:40:32 +020093 <xsl:sequence select="
94 $value,
95 map {
96 string(@xml:id): ($preceding_index,$our_base)
97 }"/>
98 </xsl:accumulator-rule>
99
100 <xsl:accumulator-rule match="tei:w[parent::tei:seg[count(@nkjp:rejected) eq 0]]" phase="end">
101 <xsl:variable name="preceding_index" as="xs:integer">
102 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
103 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200104 </xsl:variable>
105 <xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(parent::tei:seg,true()))"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200106
Piotr Banski6a4a2522022-05-24 01:16:47 +0200107 <xsl:sequence select="
108 $value,
109 map {
Piotr Banski69f3c5f2022-05-24 10:52:09 +0200110 string(parent::tei:seg/@xml:id): ($our_base,$our_base + string-length())
Piotr Banski6a4a2522022-05-24 01:16:47 +0200111 }"/>
112 </xsl:accumulator-rule>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200113
114 <xsl:accumulator-rule match="tei:s" phase="end">
115 <xsl:variable name="preceding_index" as="xs:integer">
116 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
117 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200118
Piotr Banskifdc858a2022-05-25 02:40:32 +0200119 </xsl:variable>
120 <xsl:variable name="our_base" as="xs:integer">
121 <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
122 <xsl:sequence select="$incomplete[2]"/>
123 </xsl:variable>
Piotr Banski09096ee2022-05-25 13:41:03 +0200124
Piotr Banskifdc858a2022-05-25 02:40:32 +0200125 <xsl:sequence select="
126 $value,
127 map {
128 string(@xml:id): ($our_base,$preceding_index)
129 }"/>
130 </xsl:accumulator-rule>
131
132 <xsl:accumulator-rule match="tei:body/tei:p" phase="end">
133 <xsl:variable name="preceding_index" as="xs:integer">
134 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
135 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
136
Piotr Banskifdc858a2022-05-25 02:40:32 +0200137 </xsl:variable>
138 <xsl:variable name="our_base" as="xs:integer">
139 <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
140 <xsl:sequence select="$incomplete[2]"/>
141 </xsl:variable>
Piotr Banski09096ee2022-05-25 13:41:03 +0200142
Piotr Banskifdc858a2022-05-25 02:40:32 +0200143 <xsl:sequence select="
144 $value,
145 map {
146 string(@xml:id): ($our_base,$preceding_index)
147 }"/>
148 </xsl:accumulator-rule>
149
150 <xsl:accumulator-rule match="tei:body" phase="end">
151 <xsl:variable name="preceding_index" as="xs:integer">
152 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
153 <xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
154 </xsl:variable>
155
156 <xsl:sequence select="
157 $value,
158 map {
159 string(@xml:id): (0, $preceding_index)
160 }"/>
161 </xsl:accumulator-rule>
162
163 <xsl:accumulator-rule match="tei:text" phase="end">
164 <xsl:variable name="preceding_index" as="xs:integer">
165 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
166 <xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
167 </xsl:variable>
168
169 <xsl:sequence select="
170 $value,
171 map {
172 string(@xml:id): (0, $preceding_index)
173 }"/>
174 </xsl:accumulator-rule>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200175 </xsl:accumulator>
bansp5e2d1c02022-03-10 04:51:40 +0100176
banspe726b4a2022-03-28 05:47:45 +0200177
178 <!-- FUNCTIONS -->
179
bansp5f841732022-03-16 06:27:31 +0100180 <xsl:function name="f:compute_nesting" as="xs:integer">
banspd1bf1db2022-04-04 02:16:24 +0200181 <xsl:param name="node" as="element()"/>
bansp5f841732022-03-16 06:27:31 +0100182 <xsl:variable name="rel_depth"
183 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
184 as="xs:integer"/>
bansp5f841732022-03-16 06:27:31 +0100185 <xsl:sequence select="$rel_depth"/>
186 </xsl:function>
187
bansp9dc10002022-05-17 22:33:34 +0200188<xsl:function name="f:is_preceded_by_ws" as="xs:boolean">
189 <xsl:param name="node" as="element()"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200190 <xsl:param name="suppress_initial" as="xs:boolean"/>
bansp9dc10002022-05-17 22:33:34 +0200191 <xsl:choose>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200192 <xsl:when test="local-name($node) eq 'seg'">
193 <xsl:choose>
194 <xsl:when test="$node/@nkjp:nps"><xsl:sequence select="fn:false()"/></xsl:when>
Piotr Banski09096ee2022-05-25 13:41:03 +0200195 <xsl:when test="$node/preceding::tei:seg[count(@nkjp:rejected) eq 0][ancestor::tei:s[descendant::tei:seg = $node]]"><xsl:sequence select="fn:true()"/></xsl:when>
196 <!-- this is checking if we're preceding by a seg under the same tei:s, modulo choice/paren -->
197 <!-- THIS CHECK should be streamlined for efficiency, maybe try comparing positions in the descendant axis -->
Piotr Banskifdc858a2022-05-25 02:40:32 +0200198 <xsl:otherwise>
199 <xsl:sequence
200 select="not($suppress_initial) and not($node[count(preceding-sibling::tei:seg) eq 0]/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0]/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0])"
201 />
202 </xsl:otherwise>
203 </xsl:choose>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200204
bansp9dc10002022-05-17 22:33:34 +0200205 </xsl:when>
206 <xsl:when test="local-name($node) eq 's'">
Piotr Banskifdc858a2022-05-25 02:40:32 +0200207 <xsl:choose>
208 <xsl:when test="exists($node/preceding-sibling::tei:s)"><xsl:sequence select="fn:true()"/></xsl:when>
209 <xsl:otherwise>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200210 <xsl:sequence select="not($suppress_initial) and exists($node/ancestor::tei:p[1]/preceding-sibling::tei:p)"/>
211 </xsl:otherwise>
212 </xsl:choose>
bansp9dc10002022-05-17 22:33:34 +0200213
bansp9dc10002022-05-17 22:33:34 +0200214 </xsl:when>
215 <xsl:when test="local-name($node) eq 'p'">
bansp9dc10002022-05-17 22:33:34 +0200216 <xsl:sequence select="exists($node/preceding-sibling::tei:p)"/>
217 </xsl:when>
218 <xsl:otherwise>
219 <xsl:message terminate="yes" select="'Wrong argument passed to f:is_preceded_by_ws(): ' || local-name($node) || ' Only p, s, seg are allowed.'"></xsl:message>
220 </xsl:otherwise>
221 </xsl:choose>
222</xsl:function>
banspd1bf1db2022-04-04 02:16:24 +0200223
banspb5992532022-03-29 15:55:44 +0200224
225<!-- UTILITY TEMPLATES -->
226
bansp9103aab2022-03-19 05:10:21 +0100227 <xsl:template match="@default" mode="#all"/>
bansp97ba7ce2022-03-26 05:14:06 +0100228 <!-- this is to delete some auto-inserted attribute throughout -->
bansp9103aab2022-03-19 05:10:21 +0100229
Piotr Banski6a4a2522022-05-24 01:16:47 +0200230 <xsl:template match="tei:w" mode="#all"/>
banspe726b4a2022-03-28 05:47:45 +0200231<!-- NKJP-SGJP has apparently resigned from standoff representations by adding <w> everywhere;
Piotr Banskifdc858a2022-05-25 02:40:32 +0200232 we reach for them, but from the level of <seg>, so we don't need to process <w> separately -->
bansp8f6700b2022-03-27 05:27:09 +0200233
Piotr Banski09096ee2022-05-25 13:41:03 +0200234 <!-- fall-thru, skipping the potential <paren> element and filtering out the bad guys -->
Piotr Banski6a4a2522022-05-24 01:16:47 +0200235 <xsl:template match="tei:choice" mode="struct">
Piotr Banski09096ee2022-05-25 13:41:03 +0200236 <xsl:apply-templates select="descendant::tei:seg[count(@nkjp:rejected) eq 0]" mode="struct"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200237 </xsl:template>
banspb5992532022-03-29 15:55:44 +0200238
239 <!-- MAIN PROCESSING -->
240
241
bansp5e2d1c02022-03-10 04:51:40 +0100242 <xsl:template name="xsl:initial-template">
banspf2b24e62022-03-28 18:12:08 +0200243 <xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
banspd1bf1db2022-04-04 02:16:24 +0200244
banspe726b4a2022-03-28 05:47:45 +0200245 <!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
bansp8f6700b2022-03-27 05:27:09 +0200246 because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
247 <xsl:call-template name="create_corpus_header">
banspe726b4a2022-03-28 05:47:45 +0200248 <xsl:with-param name="text.xml" select="$collection_of_text[1]" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200249 <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
250 </xsl:call-template>
251
banspe726b4a2022-03-28 05:47:45 +0200252 <xsl:for-each select="$collection_of_text">
253 <xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
254 <xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
255 <xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
256 <xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
257
banspf2b24e62022-03-28 18:12:08 +0200258 <xsl:choose>
259 <xsl:when test="$my_textID = $IDs_to_skip"/>
bansp9dc10002022-05-17 22:33:34 +0200260 <!-- this is a utility step, for when we want to ignore some texts for any reason (debugging, selective update) -->
banspf2b24e62022-03-28 18:12:08 +0200261 <xsl:otherwise>
banspd1bf1db2022-04-04 02:16:24 +0200262
bansp9dc10002022-05-17 22:33:34 +0200263 <xsl:call-template name="process_single_sample">
banspf2b24e62022-03-28 18:12:08 +0200264 <xsl:with-param name="text.xml" as="document-node()" select="."/>
265 <xsl:with-param name="ann_morphosyntax.xml" as="document-node()"
266 select="doc($ann_morphosyntax.uri)"/>
267 <xsl:with-param name="ann_segmentation.xml" as="document-node()"
268 select="doc($ann_segmentation.uri)"/>
269 <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
bansp9dc10002022-05-17 22:33:34 +0200270 </xsl:call-template>
banspf2b24e62022-03-28 18:12:08 +0200271 </xsl:otherwise>
272 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200273 </xsl:for-each>
bansp8f6700b2022-03-27 05:27:09 +0200274 </xsl:template>
275
276 <xsl:template name="process_single_sample">
banspe726b4a2022-03-28 05:47:45 +0200277 <xsl:param name="text.xml" as="document-node()"/>
278 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
279 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspd1bf1db2022-04-04 02:16:24 +0200280 <xsl:param name="my_textID" as="xs:string" select="'0-BAD_textID'"/>
bansp9dc10002022-05-17 22:33:34 +0200281 <!-- empty textID should never happen, but if it does, it will be signalled at the top of the output -->
banspe726b4a2022-03-28 05:47:45 +0200282
283 <xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
284
285 <xsl:variable name="compoundID" as="xs:string"
286 select="$corpusID || '_' || $docID || '.' || $my_textID"/>
287 <!-- this is what occurs in the text and data layers as @docid -->
288
bansp5e2d1c02022-03-10 04:51:40 +0100289 <xsl:call-template name="create_data">
bansp9dc10002022-05-17 22:33:34 +0200290 <!--<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>-->
291 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200292 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
293 <xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100294 </xsl:call-template>
295
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200296 <xsl:call-template name="create_struct">
banspe726b4a2022-03-28 05:47:45 +0200297 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100298 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
299 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200300 <xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
bansp5f841732022-03-16 06:27:31 +0100301 />
302 </xsl:call-template>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200303
bansp9dc10002022-05-17 22:33:34 +0200304<!-- <xsl:call-template name="create_morpho">
bansp5f841732022-03-16 06:27:31 +0100305 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200306 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100307 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
308 as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100309 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
310 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200311 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100312 </xsl:call-template>
bansp9dc10002022-05-17 22:33:34 +0200313-->
Piotr Banski09096ee2022-05-25 13:41:03 +0200314 <xsl:call-template name="create_text_header">
bansp5e2d1c02022-03-10 04:51:40 +0100315 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200316 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
317 <xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200318 </xsl:call-template>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200319
bansp5e2d1c02022-03-10 04:51:40 +0100320 </xsl:template>
321
322 <!-- ************************** data.xml ******************* -->
323
324 <xsl:template name="create_data">
bansp9dc10002022-05-17 22:33:34 +0200325 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200326 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100327 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100328 <!-- create the data.xml file -->
329 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100330 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100331
Akron9a8ee3e2022-01-31 13:51:49 +0100332 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100333 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100334 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100335 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
336 <xsl:attribute name="file" select="'metadata.xml'"/>
337 </xsl:element>
338
339 <xsl:element name="text" namespace="{$KorAP_namespace}">
bansp9dc10002022-05-17 22:33:34 +0200340 <xsl:variable name="content" as="xs:string+">
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200341 <xsl:for-each select="$ann_segmentation.xml/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s//tei:seg[count(@nkjp:rejected) eq 0]">
bansp9dc10002022-05-17 22:33:34 +0200342 <xsl:sequence select="
Piotr Banskifdc858a2022-05-25 02:40:32 +0200343 if (f:is_preceded_by_ws(.,false())) then
bansp9dc10002022-05-17 22:33:34 +0200344 ' '
345 else
346 '', ./tei:w"/>
347 </xsl:for-each>
348 </xsl:variable>
349 <xsl:value-of select="string-join($content)"/>
bansp5e2d1c02022-03-10 04:51:40 +0100350 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100351 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100352 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100353 </xsl:template>
354
bansp5f841732022-03-16 06:27:31 +0100355 <!-- ************************** struct ******************* -->
356
357 <xsl:template name="create_struct">
banspe726b4a2022-03-28 05:47:45 +0200358 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100359 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
360 <xsl:param name="target" as="xs:string"/>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200361
bansp5f841732022-03-16 06:27:31 +0100362 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
363 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
364 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
365 <xsl:element name="layer" namespace="{$KorAP_namespace}">
366 <xsl:attribute name="docid" select="$compoundID"/>
367 <xsl:attribute name="version" select="$KorAP-XML_version"/>
368
369 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
Piotr Banski09096ee2022-05-25 13:41:03 +0200370 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>
bansp5f841732022-03-16 06:27:31 +0100371 </xsl:element>
372 </xsl:element>
373 </xsl:result-document>
374 </xsl:template>
375
376 <xsl:template match="tei:*" mode="struct">
Piotr Banski09096ee2022-05-25 13:41:03 +0200377 <xsl:variable name="offsets" as="xs:integer+">
378 <xsl:variable name="akku"
379 select="map:merge(tail(fn:accumulator-after('elem-offset-seq')), map {'duplicates': 'use-last'})"
380 as="map(xs:string,xs:integer+)"/> <!--test later if the merger is really needed here, given how accus work-->
381 <xsl:sequence select="map:get($akku, string(@xml:id))"/>
382 </xsl:variable>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200383
bansp5f841732022-03-16 06:27:31 +0100384 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200385 <xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100386
bansp5f841732022-03-16 06:27:31 +0100387 <xsl:element name="span" namespace="{$KorAP_namespace}">
388 <xsl:attribute name="id" select="'s' || $my_index"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200389 <xsl:attribute name="from" select="$offsets[1]"/>
390 <xsl:attribute name="to" select="$offsets[2]"/>
bansp5f841732022-03-16 06:27:31 +0100391 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200392 <xsl:if test="local-name() eq 'seg' and $SHOW_ORTH_IN_STRUCT">
393 <xsl:comment><xsl:value-of select="fn:normalize-space(.)"/></xsl:comment>
394 </xsl:if>
bansp5f841732022-03-16 06:27:31 +0100395 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
Piotr Banskifdc858a2022-05-25 02:40:32 +0200396 <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX for morpho -->
bansp5f841732022-03-16 06:27:31 +0100397 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100398 <xsl:attribute name="name" select="'name'"/>
399 <xsl:value-of select="local-name()"/>
bansp5f841732022-03-16 06:27:31 +0100400 </xsl:element>
401 <xsl:if test="count(@*)">
402 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
403 <xsl:attribute name="name" select="'attr'"/>
404 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
405 <xsl:attribute name="type" select="'attr'"/>
406 <xsl:for-each select="@*">
407 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
408 <xsl:attribute name="name" select="local-name(.)"/>
409 <xsl:value-of select="."/>
410 </xsl:element>
411 </xsl:for-each>
412 </xsl:element>
413 </xsl:element>
414 </xsl:if>
415 </xsl:element>
416 </xsl:element>
417 <xsl:apply-templates mode="struct">
Piotr Banski09096ee2022-05-25 13:41:03 +0200418 <!--<xsl:with-param name="index" select="$my_index"/>-->
bansp5f841732022-03-16 06:27:31 +0100419 </xsl:apply-templates>
420 </xsl:template>
421
422 <!-- ************************** morpho ******************* -->
423
424 <xsl:template name="create_morpho">
425 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200426 <xsl:param name="compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100427 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100428 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
429 <xsl:param name="target" as="xs:string"/>
430
431 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
432 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
433 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp3e5b20c2022-03-18 20:22:31 +0100434 <xsl:element name="layer" namespace="{$KorAP_namespace}">
435 <xsl:attribute name="docid" select="$compoundID"/>
436 <xsl:attribute name="version" select="$KorAP-XML_version"/>
437
438 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
439 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
440 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
441 </xsl:apply-templates>
442 </xsl:element>
443 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100444 </xsl:result-document>
445 </xsl:template>
446
bansp3e5b20c2022-03-18 20:22:31 +0100447 <xsl:template match="tei:*" mode="morpho">
448 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
449 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
bansp3e5b20c2022-03-18 20:22:31 +0100450 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
451 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
452 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
453 <xsl:variable name="preceding-count" select="count($preceding)"/>
454 <xsl:variable name="outside-preceding-count" as="xs:integer">
455 <xsl:choose>
456 <xsl:when test="self::tei:s or self::tei:p">
457 <xsl:choose>
458 <xsl:when test="$preceding-count">
459 <xsl:sequence select="
460 sum(for $p in $preceding
461 return
462 count($p/descendant::*))"/>
463 </xsl:when>
464 <xsl:otherwise>
465 <xsl:sequence select="0"/>
466 </xsl:otherwise>
467 </xsl:choose>
468 </xsl:when>
469 <xsl:otherwise>
470 <xsl:sequence select="0"/>
471 </xsl:otherwise>
472 </xsl:choose>
473 </xsl:variable>
Piotr Banski09096ee2022-05-25 13:41:03 +0200474 <xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100475
476 <xsl:variable name="start" as="xs:integer">
477 <xsl:choose>
478 <xsl:when test="self::tei:text or self::tei:body">
479 <xsl:sequence select="0"/>
480 </xsl:when>
481 <xsl:when test="self::tei:p">
482 <xsl:variable name="first_corresp"
483 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
484 as="attribute(corresp)"/>
485 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
486 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
487 </xsl:when>
488 <xsl:when test="self::tei:s">
489 <xsl:variable name="first_corresp"
490 select="descendant::tei:seg[1]/attribute::corresp"
491 as="attribute(corresp)"/>
492 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
493 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
494 </xsl:when>
495 <!--<xsl:when test="self::tei:seg">
496 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
497 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
498 </xsl:when>-->
499 </xsl:choose>
500 </xsl:variable>
501 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
502 </xsl:variable>
503
504 <xsl:apply-templates mode="morpho">
505 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
506 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100507 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
508 </xsl:apply-templates>
509 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100510
bansp3e5b20c2022-03-18 20:22:31 +0100511 <xsl:template match="tei:seg" mode="morpho">
512 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
513 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
514 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
515 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
516 <!-- I have made a major mess here, but it works... it's so spread out
517 because I wanted to make sure to be able to look up the individual
518 constituent values, should anything go wrong -->
519 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
520 <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
521 <xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
522 <xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
523 <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
524 <xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
525 <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
526 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
527 <xsl:variable name="preceding-count" select="count($preceding)"/>
banspe726b4a2022-03-28 05:47:45 +0200528 <!--<xsl:variable name="outside-preceding-count" as="xs:integer">
bansp3e5b20c2022-03-18 20:22:31 +0100529 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200530 <xsl:when test="self::tei:s or self::tei:p"> <!-\- THIS NEEDS TO BE REVISITED AFTER THIS TEMPLATE HAS BECOME MORE SPECIFIC -\->
bansp3e5b20c2022-03-18 20:22:31 +0100531 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200532 <xsl:when test="$preceding-count"> commented out for now
bansp3e5b20c2022-03-18 20:22:31 +0100533 <xsl:sequence select="
534 sum(for $p in $preceding
535 return
536 count($p/descendant::*))"/>
537 </xsl:when>
538 <xsl:otherwise>
539 <xsl:sequence select="0"/>
540 </xsl:otherwise>
541 </xsl:choose>
542 </xsl:when>
543 <xsl:otherwise>
544 <xsl:sequence select="0"/>
545 </xsl:otherwise>
546 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200547 </xsl:variable>-->
548 <xsl:variable name="my_index" select="$index + 1 + $preceding-count" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100549
550 <xsl:variable name="start" as="xs:integer">
551 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
552 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
553 </xsl:variable>
554 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
555 </xsl:variable>
556 <xsl:element name="span" namespace="{$KorAP_namespace}">
557 <xsl:attribute name="id" select="'s' || $my_index"/>
558 <xsl:attribute name="from" select="$start"/>
559 <xsl:attribute name="to" select="$end"/>
560 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
561 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
562 <xsl:attribute name="type" select="'lex'"/>
563 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
564 <xsl:attribute name="name" select="'lex'"/>
565 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
566 <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
567
568
569 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
570 <xsl:attribute name="name" select="'lemma'"/>
571 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
572 </xsl:element>
573 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
574 <xsl:attribute name="name" select="'pos'"/>
575 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
576 </xsl:element>
577 <xsl:if test="string-length($chosen-msd)">
578 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
579 <xsl:attribute name="name" select="'msd'"/>
580 <xsl:value-of select="$chosen-msd"/>
581 </xsl:element>
582 </xsl:if>
583 <xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
584 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
585 <xsl:attribute name="name" select="'join'"/>
586 <xsl:value-of select="'left'"/>
587 </xsl:element>
588 </xsl:if>
589 </xsl:element>
590 </xsl:element>
591 </xsl:element>
592 </xsl:element>
banspe726b4a2022-03-28 05:47:45 +0200593 <xsl:apply-templates mode="morpho">
bansp3e5b20c2022-03-18 20:22:31 +0100594 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
595 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
596 <xsl:with-param name="index" select="$my_index"/>
597 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200598 </xsl:apply-templates>-->
bansp3e5b20c2022-03-18 20:22:31 +0100599 </xsl:template>
banspe726b4a2022-03-28 05:47:45 +0200600
bansp5f841732022-03-16 06:27:31 +0100601 <!-- ************************** TEXT header ******************* -->
602
603 <xsl:template name="create_text_header">
604 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200605 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100606 <xsl:param name="target" as="xs:string"/>
607
608 <!-- create the local header.xml file -->
609 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
610 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
611
612 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
banspe726b4a2022-03-28 05:47:45 +0200613 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text">
614 <xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
615 </xsl:apply-templates>
bansp5f841732022-03-16 06:27:31 +0100616 </idsHeader>
617 </xsl:result-document>
618 </xsl:template>
619
620 <xsl:template match="tei:fileDesc" mode="text">
bansp9103aab2022-03-19 05:10:21 +0100621 <xsl:element name="{local-name()}">
bansp5f841732022-03-16 06:27:31 +0100622 <xsl:apply-templates mode="text"/>
bansp9103aab2022-03-19 05:10:21 +0100623 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100624 </xsl:template>
625
626 <xsl:template match="tei:title" mode="text">
627 <t.title>
628 <xsl:apply-templates/>
629 </t.title>
630 </xsl:template>
631
632 <xsl:template match="tei:titleStmt" mode="text">
banspe726b4a2022-03-28 05:47:45 +0200633 <xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
bansp5f841732022-03-16 06:27:31 +0100634 <titleStmt>
635 <textSigle>
banspe726b4a2022-03-28 05:47:45 +0200636 <xsl:value-of select="$compoundID"/>
bansp5f841732022-03-16 06:27:31 +0100637 </textSigle>
638 <xsl:apply-templates mode="text"/>
639 </titleStmt>
640 </xsl:template>
641
bansp9103aab2022-03-19 05:10:21 +0100642 <xsl:template match="tei:publicationStmt" mode="text">
643 <xsl:element name="{local-name()}">
644 <xsl:apply-templates mode="text"/>
645 </xsl:element>
646 </xsl:template>
647
648 <xsl:template match="tei:availability" mode="text">
649 <xsl:element name="{local-name()}">
650 <xsl:apply-templates mode="text" select="@* | *"/>
651 </xsl:element>
652 </xsl:template>
653
654 <xsl:template match="tei:profileDesc" mode="text">
655 <xsl:element name="{local-name()}">
656 <xsl:apply-templates mode="text"/>
657 </xsl:element>
658 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100659
bansp9103aab2022-03-19 05:10:21 +0100660 <xsl:template match="tei:textClass" mode="text">
661 <xsl:element name="{local-name()}">
662 <xsl:apply-templates mode="text" select="@* | *"/>
663 </xsl:element>
664 </xsl:template>
665
666 <xsl:template match="tei:catRef" mode="text corpus">
667 <xsl:element name="{local-name()}">
668 <xsl:apply-templates mode="text" select="@* | *"/>
669 </xsl:element>
670 </xsl:template>
671
672 <xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
673 <xsl:copy-of select="."/>
674 </xsl:template>
675
676 <xsl:template match="tei:p" mode="text corpus">
677 <xsl:element name="{local-name()}">
678 <xsl:apply-templates mode="header-text"/>
679 </xsl:element>
680 </xsl:template>
681
682
683 <!-- OPTIMIZATION has to take modes into account -->
bansp5e2d1c02022-03-10 04:51:40 +0100684 <!-- ************************** CORPUS header ******************* -->
685 <xsl:template name="create_corpus_header">
686 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100687 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100688
689 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100690 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100691
692 <!--doctype-public="{$publicDoctypeI5}"
693 doctype-system="{$systemDoctypeI5}">
694 these are, sadly, useless
695 -->
696
697 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
bansp9103aab2022-03-19 05:10:21 +0100698 <xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
bansp5e2d1c02022-03-10 04:51:40 +0100699 </idsHeader>
700 </xsl:result-document>
701 </xsl:template>
702
703 <xsl:template match="tei:fileDesc" mode="corpus">
bansp9103aab2022-03-19 05:10:21 +0100704 <xsl:element name="{local-name()}">
bansp5e2d1c02022-03-10 04:51:40 +0100705 <xsl:apply-templates mode="corpus"/>
bansp9103aab2022-03-19 05:10:21 +0100706 </xsl:element>
bansp5e2d1c02022-03-10 04:51:40 +0100707 </xsl:template>
bansp9103aab2022-03-19 05:10:21 +0100708
bansp5e2d1c02022-03-10 04:51:40 +0100709
710 <xsl:template match="tei:title" mode="corpus">
711 <c.title>
bansp9103aab2022-03-19 05:10:21 +0100712 <xsl:apply-templates mode="corpus" select="@*"/>
713 <xsl:apply-templates mode="header-text"/>
bansp5e2d1c02022-03-10 04:51:40 +0100714 </c.title>
715 </xsl:template>
716
717 <xsl:template match="tei:titleStmt" mode="corpus">
718 <titleStmt>
719 <korpusSigle>
720 <xsl:value-of select="$corpusID"/>
721 </korpusSigle>
722 <xsl:apply-templates mode="corpus"/>
723 </titleStmt>
724 </xsl:template>
725
bansp9103aab2022-03-19 05:10:21 +0100726 <xsl:template match="tei:publicationStmt" mode="corpus">
727 <xsl:element name="{local-name()}">
728 <xsl:apply-templates mode="corpus"/>
729 </xsl:element>
730 </xsl:template>
731
732 <xsl:template match="tei:availability" mode="corpus">
733 <xsl:element name="{local-name()}">
734 <xsl:apply-templates mode="corpus" select="@* | *"/>
735 </xsl:element>
736 </xsl:template>
737
738 <xsl:template match="tei:encodingDesc" mode="corpus">
739 <xsl:element name="{local-name()}">
740 <xsl:apply-templates mode="corpus"/>
741 </xsl:element>
742 </xsl:template>
743
744 <xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
745 <xsl:element name="{local-name()}">
746 <xsl:apply-templates mode="corpus" select="@* | *"/>
747 </xsl:element>
748 </xsl:template>
749
750 <xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
751 <xsl:element name="{local-name()}">
752 <xsl:apply-templates mode="corpus" select="@*"/>
753 <xsl:apply-templates mode="header-text"/>
754 </xsl:element>
755 </xsl:template>
756<!--
757 <xsl:template match="tei:textClass" mode="corpus">
758 <xsl:element name="{local-name()}">
759 <xsl:apply-templates mode="corpus" select="@* | *"/>
760 </xsl:element>
761 </xsl:template>
762
763 <xsl:template match="tei:catRef" mode="corpus">
764 <xsl:element name="{local-name()}">
765 <xsl:apply-templates mode="corpus" select="@* | *"/>
766 </xsl:element>
767 </xsl:template>
768-->
bansp5e2d1c02022-03-10 04:51:40 +0100769
770
771
772 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
773 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
774 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
775
776 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
777 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
778
779 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
780 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
781 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
782 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
783
784 -->
785 <!--<xsl:template name="test_full">
786 <xsl:param name="corpusID"/>
787 <xsl:param name="docID"/>
788 <xsl:param name="textID"/>
789 <xsl:call-template name="xsl:initial-template"/>
790 </xsl:template>-->
791
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200792 <xsl:function name="f:calc_content_length" as="xs:integer">
793 <xsl:param name="node" as="node()"/>
794 <xsl:choose>
795 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
796 <xsl:variable name="last_corresp"
797 select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
798 as="attribute(corresp)"/>
799 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
800 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
801 </xsl:when>
802 <xsl:when test="$node/self::tei:p">
803 <xsl:variable name="last_corresp"
804 select="$node/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
805 as="attribute(corresp)"/>
806 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
807 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
808 </xsl:when>
809 <xsl:when test="$node/self::tei:s">
810 <xsl:variable name="last_corresp"
811 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
812 as="attribute(corresp)"/>
813 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
814 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
815 </xsl:when>
816 <xsl:otherwise>
817 <xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
818 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
819 <!-- REMOVE THIS -->
820 <xsl:message select="$numbers"/>
821 </xsl:if>
822 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
823 </xsl:otherwise>
824 </xsl:choose>
825 </xsl:function>
826
Akron9a8ee3e2022-01-31 13:51:49 +0100827</xsl:stylesheet>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200828
Piotr Banskifdc858a2022-05-25 02:40:32 +0200829<!-- template for serializing maps in messages <xsl:message select="('map:',serialize($map, map{'method':'adaptive'}))"/> -->