blob: c0012032303eafa5184e6a55bb650dcb064e94cb [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
Piotr Banskif8af3a92022-05-23 03:20:10 +02005 xmlns:fn="http://www.w3.org/2005/xpath-functions"
Piotr Banski6a4a2522022-05-24 01:16:47 +02006 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f fn map nkjp tei"
bansp5e2d1c02022-03-10 04:51:40 +01007 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01008
banspe726b4a2022-03-28 05:47:45 +02009
10<!-- PARAMETERS -->
bansp5e2d1c02022-03-10 04:51:40 +010011
bansp8f6700b2022-03-27 05:27:09 +020012 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020013 <!-- the directory containing NKJP files, in the form of a collection of text-level dirs
14 (that is how we know both the $corpusID and the $docID) -->
Akron9a8ee3e2022-01-31 13:51:49 +010015
bansp8f6700b2022-03-27 05:27:09 +020016 <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020017 <!-- where the corpus/document/text/annotations hierarchy is going to be created -->
banspf2b24e62022-03-28 18:12:08 +020018
19 <xsl:param name="skip_docID" as="xs:string">
banspb5992532022-03-29 15:55:44 +020020 <xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>
21 </xsl:param>
22 <!-- comma-separated list of document IDs to be skipped from processing
banspf2b24e62022-03-28 18:12:08 +020023 example: HellerPodgladanie,KOT
banspd1bf1db2022-04-04 02:16:24 +020024 no functionality beyond string identity is supported
25 (this is just for testing) -->
banspb5992532022-03-29 15:55:44 +020026
bansp8f6700b2022-03-27 05:27:09 +020027
bansp9dc10002022-05-17 22:33:34 +020028<!-- VARIABLES (= constants...) -->
banspe726b4a2022-03-28 05:47:45 +020029
30 <xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
31 <xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
bansp8f6700b2022-03-27 05:27:09 +020032
33 <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +010034
banspd1bf1db2022-04-04 02:16:24 +020035 <xsl:variable name="systemDoctypeI5" as="xs:string"
36 select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" static="true"/>
bansp5e2d1c02022-03-10 04:51:40 +010037
banspd1bf1db2022-04-04 02:16:24 +020038 <xsl:variable name="publicDoctypeI5" as="xs:string" static="true"
39 select="'-//IDS//DTD I5 1.0//EN'"/>
bansp5e2d1c02022-03-10 04:51:40 +010040
banspd1bf1db2022-04-04 02:16:24 +020041 <xsl:variable name="KorAP_namespace" static="true" as="xs:string"
42 select="'http://ids-mannheim.de/ns/KorAP'"/>
bansp5e2d1c02022-03-10 04:51:40 +010043
bansp5f841732022-03-16 06:27:31 +010044 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
45 <!-- this is only a bit funny -->
46
banspe726b4a2022-03-28 05:47:45 +020047 <xsl:variable name="collection_params" as="xs:string" static="yes"
48 select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
49 />
50 <!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
51
52 <xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
banspd1bf1db2022-04-04 02:16:24 +020053
54<!-- these two 'flags' are meant to increase the readability of the code
55 they are used for the output of the calc_offsets() function, where the
Piotr Banski4f4c2d22022-05-19 01:44:32 +020056 returned value is a sequence, (start, end)
57
58 remove together with the function!
59
60 -->
banspd1bf1db2022-04-04 02:16:24 +020061 <xsl:variable name="OFFSET_START" as="xs:integer" static="yes" select="1"/>
62 <xsl:variable name="OFFSET_END" as="xs:integer" static="yes" select="2"/>
banspb5992532022-03-29 15:55:44 +020063
Piotr Banskifdc858a2022-05-25 02:40:32 +020064 <xsl:variable name="INCOMPLETE_MAP" as="xs:integer" static="yes" select="-1"/>
65<!-- this is for travering the accumulator sequences, and skipping incomplete maps -->
66
banspb5992532022-03-29 15:55:44 +020067
banspe726b4a2022-03-28 05:47:45 +020068<!-- MODES -->
bansp5e2d1c02022-03-10 04:51:40 +010069
70 <xsl:mode name="corpus" on-no-match="deep-skip"/>
71 <xsl:mode name="text" on-no-match="deep-skip"/>
bansp9103aab2022-03-19 05:10:21 +010072 <xsl:mode name="header-text" on-no-match="text-only-copy"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +020073 <xsl:mode use-accumulators="#all"/>
74
Piotr Banskifdc858a2022-05-25 02:40:32 +020075 <!--<xsl:function name="f:grab_the_tail" as="map(*)">
76 <xsl:param name="seq" as="map(*)+"/>
77 <xsl:choose>
78 <xsl:when test="map:get(head($seq),map:keys(head($seq))[1])[2] eq $INCOMPLETE_MAP">
79 <xsl:sequence select="f:grab_the_tail(tail($seq))"/>
80 <xsl:message select="'skip ' || map:keys(head($seq))[1]"/>
81 </xsl:when>
82 <xsl:otherwise><xsl:sequence select="$seq"/></xsl:otherwise>
83 </xsl:choose>
84 </xsl:function>-->
85
86 <xsl:accumulator name="elem-offset-seq" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
87
88 <xsl:accumulator-rule match="tei:body/tei:p" phase="start">
89 <xsl:variable name="preceding_index" as="xs:integer">
90 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
91 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
92 <xsl:message select="local-name() || ' below ' || parent::node()/@xml:id ||' start, previous element: ' || map:keys($the_tail)[1]"></xsl:message>
Piotr Banski6a4a2522022-05-24 01:16:47 +020093 </xsl:variable>
94
Piotr Banskifdc858a2022-05-25 02:40:32 +020095 <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + 1"/>
96 <!-- for paragraphs, it's in either being initial or not -->
Piotr Banski6a4a2522022-05-24 01:16:47 +020097
Piotr Banskifdc858a2022-05-25 02:40:32 +020098 <xsl:message select="@xml:id || ' start, preceding_index: ' || $preceding_index || ', our_base: ' || $our_base"/>
99
100 <xsl:sequence select="
101 $value,
102 map {
103 string(@xml:id): ($preceding_index,$our_base)
104 }"/>
105 </xsl:accumulator-rule>
106
107 <xsl:accumulator-rule match="tei:s" phase="start">
108 <xsl:variable name="preceding_index" as="xs:integer">
109 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
110 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
111 <xsl:message select="'s start, previous element: ' || map:keys($the_tail)[1]"></xsl:message>
112 </xsl:variable>
113
114 <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + + xs:integer(f:is_preceded_by_ws(.,true()))"/>
115 <xsl:message select="@xml:id || ' start, preceding_index: ' || $preceding_index || ', our_base: ' || $our_base"/>
116
117 <xsl:sequence select="
118 $value,
119 map {
120 string(@xml:id): ($preceding_index,$our_base)
121 }"/>
122 </xsl:accumulator-rule>
123
124 <xsl:accumulator-rule match="tei:w[parent::tei:seg[count(@nkjp:rejected) eq 0]]" phase="end">
125 <xsl:variable name="preceding_index" as="xs:integer">
126 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
127 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
128 <xsl:message select="'w, previous element: ' || map:keys($the_tail)[1]"></xsl:message>
129 </xsl:variable>
130 <xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(parent::tei:seg,true()))"/>
131
132 <xsl:message select="'w, preceding_index: ' || $preceding_index || ', our_base: ' || $our_base"/>
133 <!--<xsl:message select="('VALUE at w-end:',serialize($value, map{'method':'adaptive'}))"/>-->
Piotr Banski6a4a2522022-05-24 01:16:47 +0200134 <xsl:sequence select="
135 $value,
136 map {
Piotr Banski69f3c5f2022-05-24 10:52:09 +0200137 string(parent::tei:seg/@xml:id): ($our_base,$our_base + string-length())
Piotr Banski6a4a2522022-05-24 01:16:47 +0200138 }"/>
139 </xsl:accumulator-rule>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200140
141 <xsl:accumulator-rule match="tei:s" phase="end">
142 <xsl:variable name="preceding_index" as="xs:integer">
143 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
144 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
145
146 <!--<xsl:message select="('VALUE at s-end:',serialize($value, map{'method':'adaptive'}))"/>-->
147
148 </xsl:variable>
149 <xsl:variable name="our_base" as="xs:integer">
150 <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
151 <xsl:sequence select="$incomplete[2]"/>
152 </xsl:variable>
153
154 <xsl:message select="'s end, preceding_index: ' || $preceding_index || ', our_base: ' || $our_base"/>
155
156 <xsl:sequence select="
157 $value,
158 map {
159 string(@xml:id): ($our_base,$preceding_index)
160 }"/>
161 </xsl:accumulator-rule>
162
163 <xsl:accumulator-rule match="tei:body/tei:p" phase="end">
164 <xsl:variable name="preceding_index" as="xs:integer">
165 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
166 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
167
168 <!--<xsl:message select="('VALUE at p-end:',serialize($value, map{'method':'adaptive'}))"/>-->
169
170 </xsl:variable>
171 <xsl:variable name="our_base" as="xs:integer">
172 <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
173 <xsl:sequence select="$incomplete[2]"/>
174 </xsl:variable>
175
176 <xsl:message select="'p end, preceding_index: ' || $preceding_index || ', our_base: ' || $our_base"/>
177
178 <xsl:sequence select="
179 $value,
180 map {
181 string(@xml:id): ($our_base,$preceding_index)
182 }"/>
183 </xsl:accumulator-rule>
184
185 <xsl:accumulator-rule match="tei:body" phase="end">
186 <xsl:variable name="preceding_index" as="xs:integer">
187 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
188 <xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
189 </xsl:variable>
190
191 <xsl:sequence select="
192 $value,
193 map {
194 string(@xml:id): (0, $preceding_index)
195 }"/>
196 </xsl:accumulator-rule>
197
198 <xsl:accumulator-rule match="tei:text" phase="end">
199 <xsl:variable name="preceding_index" as="xs:integer">
200 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
201 <xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
202 </xsl:variable>
203
204 <xsl:sequence select="
205 $value,
206 map {
207 string(@xml:id): (0, $preceding_index)
208 }"/>
209 </xsl:accumulator-rule>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200210 </xsl:accumulator>
bansp5e2d1c02022-03-10 04:51:40 +0100211
banspe726b4a2022-03-28 05:47:45 +0200212
213 <!-- FUNCTIONS -->
214
bansp5f841732022-03-16 06:27:31 +0100215 <xsl:function name="f:compute_nesting" as="xs:integer">
banspd1bf1db2022-04-04 02:16:24 +0200216 <xsl:param name="node" as="element()"/>
bansp5f841732022-03-16 06:27:31 +0100217 <xsl:variable name="rel_depth"
218 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
219 as="xs:integer"/>
bansp5f841732022-03-16 06:27:31 +0100220 <xsl:sequence select="$rel_depth"/>
221 </xsl:function>
222
bansp9dc10002022-05-17 22:33:34 +0200223<xsl:function name="f:is_preceded_by_ws" as="xs:boolean">
224 <xsl:param name="node" as="element()"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200225 <xsl:param name="suppress_initial" as="xs:boolean"/>
bansp9dc10002022-05-17 22:33:34 +0200226 <xsl:choose>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200227 <xsl:when test="local-name($node) eq 'seg'">
228 <xsl:choose>
229 <xsl:when test="$node/@nkjp:nps"><xsl:sequence select="fn:false()"/></xsl:when>
230 <xsl:otherwise>
231 <xsl:sequence
232 select="not($suppress_initial) and not($node[count(preceding-sibling::tei:seg) eq 0]/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0]/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0])"
233 />
234 </xsl:otherwise>
235 </xsl:choose>
236
237 <!--<xsl:sequence
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200238 select="not(exists($node/@nkjp:nps)) and not($node[count(preceding-sibling::tei:seg) eq 0]/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0]/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0])"
Piotr Banskifdc858a2022-05-25 02:40:32 +0200239 />-->
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200240 <!--and not($node/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0]/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0])-->
241
bansp9dc10002022-05-17 22:33:34 +0200242 </xsl:when>
243 <xsl:when test="local-name($node) eq 's'">
Piotr Banskifdc858a2022-05-25 02:40:32 +0200244 <xsl:choose>
245 <xsl:when test="exists($node/preceding-sibling::tei:s)"><xsl:sequence select="fn:true()"/></xsl:when>
246 <xsl:otherwise>
247 <xsl:message select="'. s indented: ' || xs:string(not($suppress_initial) and exists($node/ancestor::tei:p[1]/preceding-sibling::tei:p))"></xsl:message>
248 <xsl:sequence select="not($suppress_initial) and exists($node/ancestor::tei:p[1]/preceding-sibling::tei:p)"/>
249 </xsl:otherwise>
250 </xsl:choose>
bansp9dc10002022-05-17 22:33:34 +0200251
bansp9dc10002022-05-17 22:33:34 +0200252 </xsl:when>
253 <xsl:when test="local-name($node) eq 'p'">
bansp9dc10002022-05-17 22:33:34 +0200254 <xsl:sequence select="exists($node/preceding-sibling::tei:p)"/>
255 </xsl:when>
256 <xsl:otherwise>
257 <xsl:message terminate="yes" select="'Wrong argument passed to f:is_preceded_by_ws(): ' || local-name($node) || ' Only p, s, seg are allowed.'"></xsl:message>
258 </xsl:otherwise>
259 </xsl:choose>
260</xsl:function>
banspd1bf1db2022-04-04 02:16:24 +0200261
banspb5992532022-03-29 15:55:44 +0200262
263<!-- UTILITY TEMPLATES -->
264
bansp9103aab2022-03-19 05:10:21 +0100265 <xsl:template match="@default" mode="#all"/>
bansp97ba7ce2022-03-26 05:14:06 +0100266 <!-- this is to delete some auto-inserted attribute throughout -->
bansp9103aab2022-03-19 05:10:21 +0100267
Piotr Banski6a4a2522022-05-24 01:16:47 +0200268 <xsl:template match="tei:w" mode="#all"/>
banspe726b4a2022-03-28 05:47:45 +0200269<!-- NKJP-SGJP has apparently resigned from standoff representations by adding <w> everywhere;
Piotr Banskifdc858a2022-05-25 02:40:32 +0200270 we reach for them, but from the level of <seg>, so we don't need to process <w> separately -->
bansp8f6700b2022-03-27 05:27:09 +0200271
Piotr Banskifdc858a2022-05-25 02:40:32 +0200272 <!--fall-thru, skipping the potential <paren> element -->
Piotr Banski6a4a2522022-05-24 01:16:47 +0200273 <xsl:template match="tei:choice" mode="struct">
274 <xsl:apply-templates select="descendant::tei:seg"/>
275 </xsl:template>
banspb5992532022-03-29 15:55:44 +0200276
277 <!-- MAIN PROCESSING -->
278
279
bansp5e2d1c02022-03-10 04:51:40 +0100280 <xsl:template name="xsl:initial-template">
banspf2b24e62022-03-28 18:12:08 +0200281 <xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
banspd1bf1db2022-04-04 02:16:24 +0200282
banspe726b4a2022-03-28 05:47:45 +0200283 <!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
bansp8f6700b2022-03-27 05:27:09 +0200284 because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
285 <xsl:call-template name="create_corpus_header">
banspe726b4a2022-03-28 05:47:45 +0200286 <xsl:with-param name="text.xml" select="$collection_of_text[1]" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200287 <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
288 </xsl:call-template>
289
banspe726b4a2022-03-28 05:47:45 +0200290 <xsl:for-each select="$collection_of_text">
291 <xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
292 <xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
293 <xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
294 <xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
295
banspf2b24e62022-03-28 18:12:08 +0200296 <xsl:choose>
297 <xsl:when test="$my_textID = $IDs_to_skip"/>
bansp9dc10002022-05-17 22:33:34 +0200298 <!-- this is a utility step, for when we want to ignore some texts for any reason (debugging, selective update) -->
banspf2b24e62022-03-28 18:12:08 +0200299 <xsl:otherwise>
banspd1bf1db2022-04-04 02:16:24 +0200300
bansp9dc10002022-05-17 22:33:34 +0200301 <xsl:call-template name="process_single_sample">
banspf2b24e62022-03-28 18:12:08 +0200302 <xsl:with-param name="text.xml" as="document-node()" select="."/>
303 <xsl:with-param name="ann_morphosyntax.xml" as="document-node()"
304 select="doc($ann_morphosyntax.uri)"/>
305 <xsl:with-param name="ann_segmentation.xml" as="document-node()"
306 select="doc($ann_segmentation.uri)"/>
307 <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
bansp9dc10002022-05-17 22:33:34 +0200308 </xsl:call-template>
banspf2b24e62022-03-28 18:12:08 +0200309 </xsl:otherwise>
310 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200311 </xsl:for-each>
bansp8f6700b2022-03-27 05:27:09 +0200312 </xsl:template>
313
314 <xsl:template name="process_single_sample">
banspe726b4a2022-03-28 05:47:45 +0200315 <xsl:param name="text.xml" as="document-node()"/>
316 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
317 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspd1bf1db2022-04-04 02:16:24 +0200318 <xsl:param name="my_textID" as="xs:string" select="'0-BAD_textID'"/>
bansp9dc10002022-05-17 22:33:34 +0200319 <!-- empty textID should never happen, but if it does, it will be signalled at the top of the output -->
banspe726b4a2022-03-28 05:47:45 +0200320
321 <xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
322
323 <xsl:variable name="compoundID" as="xs:string"
324 select="$corpusID || '_' || $docID || '.' || $my_textID"/>
325 <!-- this is what occurs in the text and data layers as @docid -->
326
bansp5e2d1c02022-03-10 04:51:40 +0100327 <xsl:call-template name="create_data">
bansp9dc10002022-05-17 22:33:34 +0200328 <!--<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>-->
329 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200330 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
331 <xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100332 </xsl:call-template>
333
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200334 <xsl:call-template name="create_struct">
banspe726b4a2022-03-28 05:47:45 +0200335 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100336 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
337 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200338 <xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
bansp5f841732022-03-16 06:27:31 +0100339 />
340 </xsl:call-template>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200341
bansp9dc10002022-05-17 22:33:34 +0200342<!-- <xsl:call-template name="create_morpho">
bansp5f841732022-03-16 06:27:31 +0100343 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200344 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100345 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
346 as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100347 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
348 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200349 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100350 </xsl:call-template>
bansp9dc10002022-05-17 22:33:34 +0200351-->
Piotr Banski6a4a2522022-05-24 01:16:47 +0200352 <!--<xsl:call-template name="create_text_header">
bansp5e2d1c02022-03-10 04:51:40 +0100353 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200354 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
355 <xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200356 </xsl:call-template>-->
357
bansp5e2d1c02022-03-10 04:51:40 +0100358 </xsl:template>
359
360 <!-- ************************** data.xml ******************* -->
361
362 <xsl:template name="create_data">
bansp9dc10002022-05-17 22:33:34 +0200363 <!--<xsl:param name="text.xml" as="document-node()"/>-->
364 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200365 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100366 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100367 <!-- create the data.xml file -->
368 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100369 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100370
Akron9a8ee3e2022-01-31 13:51:49 +0100371 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100372 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100373 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100374 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
375 <xsl:attribute name="file" select="'metadata.xml'"/>
376 </xsl:element>
377
378 <xsl:element name="text" namespace="{$KorAP_namespace}">
bansp9dc10002022-05-17 22:33:34 +0200379 <xsl:variable name="content" as="xs:string+">
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200380 <xsl:for-each select="$ann_segmentation.xml/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s//tei:seg[count(@nkjp:rejected) eq 0]">
bansp9dc10002022-05-17 22:33:34 +0200381 <xsl:sequence select="
Piotr Banskifdc858a2022-05-25 02:40:32 +0200382 if (f:is_preceded_by_ws(.,false())) then
bansp9dc10002022-05-17 22:33:34 +0200383 ' '
384 else
385 '', ./tei:w"/>
386 </xsl:for-each>
387 </xsl:variable>
388 <xsl:value-of select="string-join($content)"/>
bansp5e2d1c02022-03-10 04:51:40 +0100389 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100390 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100391 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100392 </xsl:template>
393
bansp5f841732022-03-16 06:27:31 +0100394 <!-- ************************** struct ******************* -->
395
396 <xsl:template name="create_struct">
banspe726b4a2022-03-28 05:47:45 +0200397 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100398 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
399 <xsl:param name="target" as="xs:string"/>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200400
bansp5f841732022-03-16 06:27:31 +0100401 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
402 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
403 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
404 <xsl:element name="layer" namespace="{$KorAP_namespace}">
405 <xsl:attribute name="docid" select="$compoundID"/>
406 <xsl:attribute name="version" select="$KorAP-XML_version"/>
407
408 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
Piotr Banski6a4a2522022-05-24 01:16:47 +0200409 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct">
410 <!--<xsl:with-param name="offsets" as="map(xs:string, xs:integer+)" tunnel="yes">
411 <xsl:map>
412 <xsl:for-each select="tail(fn:accumulator-after('elem-offset-seq'))">
413 <xsl:map-entry key="map:get(., 'id')" select="map:get(., 'start'), map:get(., 'end')"/>
414 </xsl:for-each>
415 </xsl:map>
416 </xsl:with-param>-->
417 </xsl:apply-templates>
bansp5f841732022-03-16 06:27:31 +0100418 </xsl:element>
419 </xsl:element>
420 </xsl:result-document>
421 </xsl:template>
422
423 <xsl:template match="tei:*" mode="struct">
424 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
425 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
426 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200427
Piotr Banski5fe4bae2022-05-24 02:40:13 +0200428 <xsl:variable name="offsets" as="map(xs:string, xs:integer+)"
Piotr Banskifdc858a2022-05-25 02:40:32 +0200429 select="map:merge(tail(fn:accumulator-after('elem-offset-seq')),map{'duplicates':'use-last'})"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200430
bansp5f841732022-03-16 06:27:31 +0100431 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
432 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
433 <xsl:variable name="preceding-count" select="count($preceding)"/>
bansp9dc10002022-05-17 22:33:34 +0200434
bansp5f841732022-03-16 06:27:31 +0100435 <xsl:variable name="outside-preceding-count" as="xs:integer">
436 <xsl:choose>
437 <xsl:when test="self::tei:s or self::tei:p">
438 <xsl:choose>
439 <xsl:when test="$preceding-count">
440 <xsl:sequence select="
441 sum(for $p in $preceding
442 return
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200443 count($p/descendant::*))"/> <!--mind @nkjp:rejected -->
bansp5f841732022-03-16 06:27:31 +0100444 </xsl:when>
445 <xsl:otherwise>
446 <xsl:sequence select="0"/>
447 </xsl:otherwise>
448 </xsl:choose>
449 </xsl:when>
450 <xsl:otherwise>
451 <xsl:sequence select="0"/>
452 </xsl:otherwise>
453 </xsl:choose>
454 </xsl:variable>
bansp9dc10002022-05-17 22:33:34 +0200455
bansp5f841732022-03-16 06:27:31 +0100456 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
457 as="xs:integer"/>
458
459 <xsl:variable name="start" as="xs:integer">
460 <xsl:choose>
461 <xsl:when test="self::tei:text or self::tei:body">
462 <xsl:sequence select="0"/>
463 </xsl:when>
464 <xsl:when test="self::tei:p">
465 <xsl:variable name="first_corresp"
466 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
467 as="attribute(corresp)"/>
468 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
469 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
470 </xsl:when>
471 <xsl:when test="self::tei:s">
472 <xsl:variable name="first_corresp"
473 select="descendant::tei:seg[1]/attribute::corresp"
474 as="attribute(corresp)"/>
475 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
476 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
477 </xsl:when>
478 <xsl:when test="self::tei:seg">
479 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
480 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
481 </xsl:when>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200482 <xsl:otherwise>
483 <xsl:message terminate="yes" select="'Element not handled: ' || fn:local-name()"/>
484 </xsl:otherwise>
bansp5f841732022-03-16 06:27:31 +0100485 </xsl:choose>
486 </xsl:variable>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200487
bansp5f841732022-03-16 06:27:31 +0100488 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
489 </xsl:variable>
bansp3e5b20c2022-03-18 20:22:31 +0100490
bansp5f841732022-03-16 06:27:31 +0100491 <xsl:element name="span" namespace="{$KorAP_namespace}">
492 <xsl:attribute name="id" select="'s' || $my_index"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200493 <xsl:attribute name="from" select="map:get($offsets,string(@xml:id))[1]"/>
494 <xsl:attribute name="to" select="map:get($offsets,string(@xml:id))[2]"/>
495<!-- <xsl:attribute name="accumulator" select="string-join(map:get($offsets,string(@xml:id)),',')"/> <!-\-test-\->-->
bansp5f841732022-03-16 06:27:31 +0100496 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
497 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
Piotr Banskifdc858a2022-05-25 02:40:32 +0200498 <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX for morpho -->
bansp5f841732022-03-16 06:27:31 +0100499 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100500 <xsl:attribute name="name" select="'name'"/>
501 <xsl:value-of select="local-name()"/>
bansp5f841732022-03-16 06:27:31 +0100502 </xsl:element>
Piotr Banski5fe4bae2022-05-24 02:40:13 +0200503 <xsl:if test="local-name() eq 'seg'"> <!--test-->
504 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
505 <xsl:attribute name="name" select="'orth'"/>
506 <xsl:value-of select="fn:normalize-space(.)"/>
507 </xsl:element>
508 </xsl:if>
bansp5f841732022-03-16 06:27:31 +0100509 <xsl:if test="count(@*)">
510 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
511 <xsl:attribute name="name" select="'attr'"/>
512 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
513 <xsl:attribute name="type" select="'attr'"/>
514 <xsl:for-each select="@*">
515 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
516 <xsl:attribute name="name" select="local-name(.)"/>
517 <xsl:value-of select="."/>
518 </xsl:element>
519 </xsl:for-each>
520 </xsl:element>
521 </xsl:element>
522 </xsl:if>
523 </xsl:element>
524 </xsl:element>
525 <xsl:apply-templates mode="struct">
526 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
527 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
528 <xsl:with-param name="index" select="$my_index"/>
529 </xsl:apply-templates>
530 </xsl:template>
531
532 <!-- ************************** morpho ******************* -->
533
534 <xsl:template name="create_morpho">
535 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200536 <xsl:param name="compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100537 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100538 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
539 <xsl:param name="target" as="xs:string"/>
540
541 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
542 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
543 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp3e5b20c2022-03-18 20:22:31 +0100544 <xsl:element name="layer" namespace="{$KorAP_namespace}">
545 <xsl:attribute name="docid" select="$compoundID"/>
546 <xsl:attribute name="version" select="$KorAP-XML_version"/>
547
548 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
549 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
550 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
551 </xsl:apply-templates>
552 </xsl:element>
553 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100554 </xsl:result-document>
555 </xsl:template>
556
bansp3e5b20c2022-03-18 20:22:31 +0100557 <xsl:template match="tei:*" mode="morpho">
558 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
559 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
560 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
561 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
562 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
563 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
564 <xsl:variable name="preceding-count" select="count($preceding)"/>
565 <xsl:variable name="outside-preceding-count" as="xs:integer">
566 <xsl:choose>
567 <xsl:when test="self::tei:s or self::tei:p">
568 <xsl:choose>
569 <xsl:when test="$preceding-count">
570 <xsl:sequence select="
571 sum(for $p in $preceding
572 return
573 count($p/descendant::*))"/>
574 </xsl:when>
575 <xsl:otherwise>
576 <xsl:sequence select="0"/>
577 </xsl:otherwise>
578 </xsl:choose>
579 </xsl:when>
580 <xsl:otherwise>
581 <xsl:sequence select="0"/>
582 </xsl:otherwise>
583 </xsl:choose>
584 </xsl:variable>
585 <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
586 as="xs:integer"/>
587
588 <xsl:variable name="start" as="xs:integer">
589 <xsl:choose>
590 <xsl:when test="self::tei:text or self::tei:body">
591 <xsl:sequence select="0"/>
592 </xsl:when>
593 <xsl:when test="self::tei:p">
594 <xsl:variable name="first_corresp"
595 select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
596 as="attribute(corresp)"/>
597 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
598 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
599 </xsl:when>
600 <xsl:when test="self::tei:s">
601 <xsl:variable name="first_corresp"
602 select="descendant::tei:seg[1]/attribute::corresp"
603 as="attribute(corresp)"/>
604 <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
605 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
606 </xsl:when>
607 <!--<xsl:when test="self::tei:seg">
608 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
609 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
610 </xsl:when>-->
611 </xsl:choose>
612 </xsl:variable>
613 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
614 </xsl:variable>
615
616 <xsl:apply-templates mode="morpho">
617 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
618 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
619 <xsl:with-param name="index" select="$my_index"/>
620 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
621 </xsl:apply-templates>
622 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100623
bansp3e5b20c2022-03-18 20:22:31 +0100624 <xsl:template match="tei:seg" mode="morpho">
625 <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
626 <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
627 <xsl:param name="index" as="xs:integer" required="no" select="1"/>
628 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
629 <!-- I have made a major mess here, but it works... it's so spread out
630 because I wanted to make sure to be able to look up the individual
631 constituent values, should anything go wrong -->
632 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
633 <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
634 <xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
635 <xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
636 <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
637 <xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
638 <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
639 <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
640 <xsl:variable name="preceding-count" select="count($preceding)"/>
banspe726b4a2022-03-28 05:47:45 +0200641 <!--<xsl:variable name="outside-preceding-count" as="xs:integer">
bansp3e5b20c2022-03-18 20:22:31 +0100642 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200643 <xsl:when test="self::tei:s or self::tei:p"> <!-\- THIS NEEDS TO BE REVISITED AFTER THIS TEMPLATE HAS BECOME MORE SPECIFIC -\->
bansp3e5b20c2022-03-18 20:22:31 +0100644 <xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200645 <xsl:when test="$preceding-count"> commented out for now
bansp3e5b20c2022-03-18 20:22:31 +0100646 <xsl:sequence select="
647 sum(for $p in $preceding
648 return
649 count($p/descendant::*))"/>
650 </xsl:when>
651 <xsl:otherwise>
652 <xsl:sequence select="0"/>
653 </xsl:otherwise>
654 </xsl:choose>
655 </xsl:when>
656 <xsl:otherwise>
657 <xsl:sequence select="0"/>
658 </xsl:otherwise>
659 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200660 </xsl:variable>-->
661 <xsl:variable name="my_index" select="$index + 1 + $preceding-count" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100662
663 <xsl:variable name="start" as="xs:integer">
664 <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
665 <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
666 </xsl:variable>
667 <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
668 </xsl:variable>
669 <xsl:element name="span" namespace="{$KorAP_namespace}">
670 <xsl:attribute name="id" select="'s' || $my_index"/>
671 <xsl:attribute name="from" select="$start"/>
672 <xsl:attribute name="to" select="$end"/>
673 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
674 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
675 <xsl:attribute name="type" select="'lex'"/>
676 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
677 <xsl:attribute name="name" select="'lex'"/>
678 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
679 <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
680
681
682 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
683 <xsl:attribute name="name" select="'lemma'"/>
684 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
685 </xsl:element>
686 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
687 <xsl:attribute name="name" select="'pos'"/>
688 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
689 </xsl:element>
690 <xsl:if test="string-length($chosen-msd)">
691 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
692 <xsl:attribute name="name" select="'msd'"/>
693 <xsl:value-of select="$chosen-msd"/>
694 </xsl:element>
695 </xsl:if>
696 <xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
697 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
698 <xsl:attribute name="name" select="'join'"/>
699 <xsl:value-of select="'left'"/>
700 </xsl:element>
701 </xsl:if>
702 </xsl:element>
703 </xsl:element>
704 </xsl:element>
705 </xsl:element>
banspe726b4a2022-03-28 05:47:45 +0200706 <xsl:apply-templates mode="morpho">
bansp3e5b20c2022-03-18 20:22:31 +0100707 <xsl:with-param name="ini" select="$start" as="xs:integer"/>
708 <xsl:with-param name="fin" select="$end" as="xs:integer"/>
709 <xsl:with-param name="index" select="$my_index"/>
710 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200711 </xsl:apply-templates>-->
bansp3e5b20c2022-03-18 20:22:31 +0100712 </xsl:template>
banspe726b4a2022-03-28 05:47:45 +0200713
bansp5f841732022-03-16 06:27:31 +0100714 <!-- ************************** TEXT header ******************* -->
715
716 <xsl:template name="create_text_header">
717 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200718 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100719 <xsl:param name="target" as="xs:string"/>
720
721 <!-- create the local header.xml file -->
722 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
723 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
724
725 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
banspe726b4a2022-03-28 05:47:45 +0200726 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text">
727 <xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
728 </xsl:apply-templates>
bansp5f841732022-03-16 06:27:31 +0100729 </idsHeader>
730 </xsl:result-document>
731 </xsl:template>
732
733 <xsl:template match="tei:fileDesc" mode="text">
bansp9103aab2022-03-19 05:10:21 +0100734 <xsl:element name="{local-name()}">
bansp5f841732022-03-16 06:27:31 +0100735 <xsl:apply-templates mode="text"/>
bansp9103aab2022-03-19 05:10:21 +0100736 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100737 </xsl:template>
738
739 <xsl:template match="tei:title" mode="text">
740 <t.title>
741 <xsl:apply-templates/>
742 </t.title>
743 </xsl:template>
744
745 <xsl:template match="tei:titleStmt" mode="text">
banspe726b4a2022-03-28 05:47:45 +0200746 <xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
bansp5f841732022-03-16 06:27:31 +0100747 <titleStmt>
748 <textSigle>
banspe726b4a2022-03-28 05:47:45 +0200749 <xsl:value-of select="$compoundID"/>
bansp5f841732022-03-16 06:27:31 +0100750 </textSigle>
751 <xsl:apply-templates mode="text"/>
752 </titleStmt>
753 </xsl:template>
754
bansp9103aab2022-03-19 05:10:21 +0100755 <xsl:template match="tei:publicationStmt" mode="text">
756 <xsl:element name="{local-name()}">
757 <xsl:apply-templates mode="text"/>
758 </xsl:element>
759 </xsl:template>
760
761 <xsl:template match="tei:availability" mode="text">
762 <xsl:element name="{local-name()}">
763 <xsl:apply-templates mode="text" select="@* | *"/>
764 </xsl:element>
765 </xsl:template>
766
767 <xsl:template match="tei:profileDesc" mode="text">
768 <xsl:element name="{local-name()}">
769 <xsl:apply-templates mode="text"/>
770 </xsl:element>
771 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100772
bansp9103aab2022-03-19 05:10:21 +0100773 <xsl:template match="tei:textClass" mode="text">
774 <xsl:element name="{local-name()}">
775 <xsl:apply-templates mode="text" select="@* | *"/>
776 </xsl:element>
777 </xsl:template>
778
779 <xsl:template match="tei:catRef" mode="text corpus">
780 <xsl:element name="{local-name()}">
781 <xsl:apply-templates mode="text" select="@* | *"/>
782 </xsl:element>
783 </xsl:template>
784
785 <xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
786 <xsl:copy-of select="."/>
787 </xsl:template>
788
789 <xsl:template match="tei:p" mode="text corpus">
790 <xsl:element name="{local-name()}">
791 <xsl:apply-templates mode="header-text"/>
792 </xsl:element>
793 </xsl:template>
794
795
796 <!-- OPTIMIZATION has to take modes into account -->
bansp5e2d1c02022-03-10 04:51:40 +0100797 <!-- ************************** CORPUS header ******************* -->
798 <xsl:template name="create_corpus_header">
799 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100800 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100801
802 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100803 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100804
805 <!--doctype-public="{$publicDoctypeI5}"
806 doctype-system="{$systemDoctypeI5}">
807 these are, sadly, useless
808 -->
809
810 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
bansp9103aab2022-03-19 05:10:21 +0100811 <xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
bansp5e2d1c02022-03-10 04:51:40 +0100812 </idsHeader>
813 </xsl:result-document>
814 </xsl:template>
815
816 <xsl:template match="tei:fileDesc" mode="corpus">
bansp9103aab2022-03-19 05:10:21 +0100817 <xsl:element name="{local-name()}">
bansp5e2d1c02022-03-10 04:51:40 +0100818 <xsl:apply-templates mode="corpus"/>
bansp9103aab2022-03-19 05:10:21 +0100819 </xsl:element>
bansp5e2d1c02022-03-10 04:51:40 +0100820 </xsl:template>
bansp9103aab2022-03-19 05:10:21 +0100821
bansp5e2d1c02022-03-10 04:51:40 +0100822
823 <xsl:template match="tei:title" mode="corpus">
824 <c.title>
bansp9103aab2022-03-19 05:10:21 +0100825 <xsl:apply-templates mode="corpus" select="@*"/>
826 <xsl:apply-templates mode="header-text"/>
bansp5e2d1c02022-03-10 04:51:40 +0100827 </c.title>
828 </xsl:template>
829
830 <xsl:template match="tei:titleStmt" mode="corpus">
831 <titleStmt>
832 <korpusSigle>
833 <xsl:value-of select="$corpusID"/>
834 </korpusSigle>
835 <xsl:apply-templates mode="corpus"/>
836 </titleStmt>
837 </xsl:template>
838
bansp9103aab2022-03-19 05:10:21 +0100839 <xsl:template match="tei:publicationStmt" mode="corpus">
840 <xsl:element name="{local-name()}">
841 <xsl:apply-templates mode="corpus"/>
842 </xsl:element>
843 </xsl:template>
844
845 <xsl:template match="tei:availability" mode="corpus">
846 <xsl:element name="{local-name()}">
847 <xsl:apply-templates mode="corpus" select="@* | *"/>
848 </xsl:element>
849 </xsl:template>
850
851 <xsl:template match="tei:encodingDesc" mode="corpus">
852 <xsl:element name="{local-name()}">
853 <xsl:apply-templates mode="corpus"/>
854 </xsl:element>
855 </xsl:template>
856
857 <xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
858 <xsl:element name="{local-name()}">
859 <xsl:apply-templates mode="corpus" select="@* | *"/>
860 </xsl:element>
861 </xsl:template>
862
863 <xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
864 <xsl:element name="{local-name()}">
865 <xsl:apply-templates mode="corpus" select="@*"/>
866 <xsl:apply-templates mode="header-text"/>
867 </xsl:element>
868 </xsl:template>
869<!--
870 <xsl:template match="tei:textClass" mode="corpus">
871 <xsl:element name="{local-name()}">
872 <xsl:apply-templates mode="corpus" select="@* | *"/>
873 </xsl:element>
874 </xsl:template>
875
876 <xsl:template match="tei:catRef" mode="corpus">
877 <xsl:element name="{local-name()}">
878 <xsl:apply-templates mode="corpus" select="@* | *"/>
879 </xsl:element>
880 </xsl:template>
881-->
bansp5e2d1c02022-03-10 04:51:40 +0100882
883
884
885 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
886 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
887 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
888
889 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
890 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
891
892 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
893 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
894 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
895 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
896
897 -->
898 <!--<xsl:template name="test_full">
899 <xsl:param name="corpusID"/>
900 <xsl:param name="docID"/>
901 <xsl:param name="textID"/>
902 <xsl:call-template name="xsl:initial-template"/>
903 </xsl:template>-->
904
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200905 <xsl:function name="f:calc_content_length" as="xs:integer">
906 <xsl:param name="node" as="node()"/>
907 <xsl:choose>
908 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
909 <xsl:variable name="last_corresp"
910 select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
911 as="attribute(corresp)"/>
912 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
913 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
914 </xsl:when>
915 <xsl:when test="$node/self::tei:p">
916 <xsl:variable name="last_corresp"
917 select="$node/descendant::tei:s[last()]/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
918 as="attribute(corresp)"/>
919 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
920 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
921 </xsl:when>
922 <xsl:when test="$node/self::tei:s">
923 <xsl:variable name="last_corresp"
924 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
925 as="attribute(corresp)"/>
926 <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
927 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
928 </xsl:when>
929 <xsl:otherwise>
930 <xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
931 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
932 <!-- REMOVE THIS -->
933 <xsl:message select="$numbers"/>
934 </xsl:if>
935 <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
936 </xsl:otherwise>
937 </xsl:choose>
938 </xsl:function>
939
940 <xsl:function name="f:calc_offsets" as="xs:integer+">
941 <xsl:param name="node" as="element()"/>
942 <xsl:param name="skip_start" as="xs:boolean" />
943
944 <xsl:variable name="start" as="xs:integer">
945 <xsl:choose>
946
947 <xsl:when test="$skip_start or $node/self::tei:text or $node/self::tei:body">
948 <xsl:sequence select="0"/>
949 </xsl:when>
950
951 <!-- handle p -->
952
953 <xsl:when test="$node/self::tei:p">
954 <xsl:variable name="my_pos" as="xs:integer" select="count($node/preceding-sibling::tei:p) + 1"/>
955 <xsl:variable name="preceding" as="node()*"
956 select="$node/ancestor::tei:body/tei:p[position() lt $my_pos]"/>
957
958 <xsl:choose>
959 <xsl:when test="count($preceding) eq 0">
960 <xsl:sequence select="0"/>
961 </xsl:when>
962 <xsl:otherwise>
963 <xsl:sequence select="sum(f:calc_offsets($preceding[last()],true())[$OFFSET_END],1)"/>
964
965 <!-- BUG danger: I am not sure if a "1" should rather be added after each p; let me try to handle that in the return value of the $length variable,
966 and make it sensitive to the skip_start parameter
967
968 I will then have to remove the ",1" from here!
969
970 -->
971
972 <!-- <xsl:variable name="last_corresps"
973 select="$preceding/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
974 as="attribute(corresp)+"/>
975 <xsl:variable name="end_offsets" as="xs:integer+">
976 <xsl:for-each select="$last_corresps">
977 <xsl:variable name="numbers"
978 select="substring-after(substring-before(., ')'), ',')"/>
979 <xsl:sequence
980 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
981 />
982 </xsl:for-each>
983 </xsl:variable>
984 <xsl:sequence select="sum($end_offsets, 1)"/>
985
986 this is a non-recursive variant that may turn out to be much less cpu-intensive, not sure
987 - but if it's plugged in, it will have to be adjusted to the current form of the recursive variant,
988 because it hasn't been maintained since it got commented out
989 -->
990 </xsl:otherwise>
991 </xsl:choose>
992 </xsl:when>
993
994 <!-- handle s -->
995
996 <!-- the value for s gets counted since the start of the current p
997 - so we look at the preceding s's
998 + the preceding p's
999 -->
1000 <xsl:when test="$node/self::tei:s">
1001 <!--<xsl:variable name="last_corresp"
1002 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
1003 as="attribute(corresp)"/>
1004 <xsl:variable name="numbers"
1005 select="substring-after(substring-before($last_corresp, ')'), ',')"/>
1006 <xsl:sequence
1007 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
1008 />
1009 -->
1010
1011 <xsl:variable name="internal_start" as="xs:integer">
1012 <xsl:variable name="my_pos" as="xs:integer" select="count($node/preceding-sibling::tei:s) + 1"/>
1013 <xsl:variable name="preceding" as="node()*"
1014 select="$node/ancestor::tei:p[1]/tei:s[position() lt $my_pos]"/>
1015
1016 <xsl:choose>
1017 <xsl:when test="count($preceding) eq 0">
1018 <xsl:sequence select="0"/>
1019 </xsl:when>
1020 <xsl:otherwise>
1021 <xsl:sequence select="f:calc_offsets($preceding[last()],true())[$OFFSET_END]"/>
1022 <!--<xsl:sequence select="sum(f:calc_offsets($preceding[last()],true())[$OFFSET_END],1)"/>-->
1023 <!-- again, CAREFUL ABOUT THE +1, it might need to vanish -->
1024 </xsl:otherwise>
1025 </xsl:choose>
1026 </xsl:variable>
1027
1028 <xsl:variable name="external_start" as="xs:integer" select="f:calc_offsets($node/ancestor::tei:p[1],false())[$OFFSET_START]"/>
1029
1030 <xsl:sequence select="$internal_start + $external_start"/>
1031 </xsl:when>
1032
1033 <!-- handle seg -->
1034
1035 <xsl:when test="$node/self::tei:seg">
1036 <!-- for segs, the s elements are irrelevant, and the local offset is immediately available on the @corresp -->
1037
1038 <xsl:variable name="numbers"
1039 select="substring-after(substring-before($node/@corresp, ')'), ',')"/>
1040
1041 <xsl:variable name="internal_start" select="xs:integer(substring-before($numbers, ','))"
1042 as="xs:integer"/>
1043 <xsl:variable name="external_start" as="xs:integer"
1044 select="f:calc_offsets($node/ancestor::tei:p[1], false())[$OFFSET_START]"/>
1045
1046 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
1047
1048 <xsl:message select="'numbers: ' || $numbers"/>
1049 </xsl:if>
1050 <xsl:sequence select="$internal_start + $external_start"/>
1051 </xsl:when>
1052 </xsl:choose>
1053 </xsl:variable>
1054
1055 <xsl:variable name="length" as="xs:integer">
1056 <xsl:choose>
1057
1058 <xsl:when test="$node/self::tei:text or $node/self::tei:body">
1059 <xsl:variable name="last_corresps"
1060 select="$node/descendant::tei:p/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
1061 as="attribute(corresp)+"/>
1062
1063 <xsl:variable name="end_offsets" as="xs:integer+">
1064 <xsl:for-each select="$last_corresps">
1065 <xsl:variable name="numbers" select="substring-after(substring-before(., ')'), ',')"/>
1066 <xsl:sequence
1067 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
1068 />
1069 </xsl:for-each>
1070 </xsl:variable>
1071
1072 <xsl:sequence select="sum($end_offsets)"/>
1073
1074 </xsl:when>
1075 <xsl:when test="$node/self::tei:p">
1076 <xsl:variable name="last_corresps"
1077 select="$node/descendant::tei:s[last()]/(descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'])[last()]/@corresp"
1078 as="attribute(corresp)+"/>
1079 <xsl:variable name="end_offsets" as="xs:integer+">
1080 <xsl:for-each select="$last_corresps">
1081 <xsl:variable name="numbers" select="substring-after(substring-before(., ')'), ',')"/>
1082 <xsl:sequence
1083 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
1084 />
1085 </xsl:for-each>
1086 </xsl:variable>
1087 <xsl:sequence select="sum($end_offsets)"/>
1088 </xsl:when>
1089
1090
1091
1092
1093 <xsl:when test="$node/self::tei:s">
1094 <xsl:variable name="last_corresp"
1095 select="$node/descendant::tei:seg[count(@nkjp:rejected) eq 0 or @nkjp:rejected ne 'true'][last()]/attribute::corresp"
1096 as="attribute(corresp)"/>
1097 <xsl:variable name="numbers"
1098 select="substring-after(substring-before($last_corresp, ')'), ',')"/>
1099 <xsl:sequence
1100 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
1101 />
1102 </xsl:when>
1103 <xsl:otherwise>
1104 <xsl:variable name="numbers"
1105 select="substring-after(substring-before($node/@corresp, ')'), ',')"/>
1106 <xsl:if test="$node/self::tei:seg and count($node/@nkjp:rejected)">
1107 <!-- REMOVE THIS -->
1108 <xsl:message select="'rejected: ' || $numbers"/>
1109 </xsl:if>
1110 <xsl:sequence
1111 select="xs:integer(substring-before($numbers, ',')) + xs:integer(substring-after($numbers, ','))"
1112 />
1113 </xsl:otherwise>
1114 </xsl:choose>
1115 </xsl:variable>
1116
1117 <xsl:message select="local-name($node) || '[' || count($node/preceding-sibling::*[local-name() eq local-name($node)])+1 || '] length: ' || $length || ' skip_start: ' || $skip_start"/>
1118
1119 <xsl:sequence select="$start, $start + $length -1 + xs:integer($skip_start)"/>
1120 </xsl:function>
1121
1122
Akron9a8ee3e2022-01-31 13:51:49 +01001123</xsl:stylesheet>
Piotr Banski6a4a2522022-05-24 01:16:47 +02001124
Piotr Banskifdc858a2022-05-25 02:40:32 +02001125<!-- template for serializing maps in messages <xsl:message select="('map:',serialize($map, map{'method':'adaptive'}))"/> -->