blob: 05dec9d55f55724e58a1089cd944505cf944316b [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
Piotr Banskif8af3a92022-05-23 03:20:10 +02005 xmlns:fn="http://www.w3.org/2005/xpath-functions"
Piotr Banski6a4a2522022-05-24 01:16:47 +02006 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f fn map nkjp tei"
bansp5e2d1c02022-03-10 04:51:40 +01007 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01008
banspe726b4a2022-03-28 05:47:45 +02009
10<!-- PARAMETERS -->
bansp5e2d1c02022-03-10 04:51:40 +010011
bansp8f6700b2022-03-27 05:27:09 +020012 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020013 <!-- the directory containing NKJP files, in the form of a collection of text-level dirs
14 (that is how we know both the $corpusID and the $docID) -->
Akron9a8ee3e2022-01-31 13:51:49 +010015
bansp8f6700b2022-03-27 05:27:09 +020016 <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020017 <!-- where the corpus/document/text/annotations hierarchy is going to be created -->
banspf2b24e62022-03-28 18:12:08 +020018
19 <xsl:param name="skip_docID" as="xs:string">
Piotr Banskic5950ce2022-05-27 15:07:08 +020020 <!--<xsl:value-of select="''"/>-->
21 <xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>
banspb5992532022-03-29 15:55:44 +020022 </xsl:param>
23 <!-- comma-separated list of document IDs to be skipped from processing
banspf2b24e62022-03-28 18:12:08 +020024 example: HellerPodgladanie,KOT
banspd1bf1db2022-04-04 02:16:24 +020025 no functionality beyond string identity is supported
26 (this is just for testing) -->
banspb5992532022-03-29 15:55:44 +020027
Piotr Banski1ae16bd2022-05-25 15:59:40 +020028 <xsl:param name="SHOW_ORTH_IN_STRUCT" as="xs:boolean" select="true()"/>
Piotr Banski09096ee2022-05-25 13:41:03 +020029 <!-- for debugging structure.xml production -->
30
bansp8f6700b2022-03-27 05:27:09 +020031
bansp9dc10002022-05-17 22:33:34 +020032<!-- VARIABLES (= constants...) -->
banspe726b4a2022-03-28 05:47:45 +020033
34 <xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
35 <xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
bansp8f6700b2022-03-27 05:27:09 +020036
37 <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +010038
banspd1bf1db2022-04-04 02:16:24 +020039 <xsl:variable name="systemDoctypeI5" as="xs:string"
40 select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" static="true"/>
bansp5e2d1c02022-03-10 04:51:40 +010041
banspd1bf1db2022-04-04 02:16:24 +020042 <xsl:variable name="publicDoctypeI5" as="xs:string" static="true"
43 select="'-//IDS//DTD I5 1.0//EN'"/>
bansp5e2d1c02022-03-10 04:51:40 +010044
banspd1bf1db2022-04-04 02:16:24 +020045 <xsl:variable name="KorAP_namespace" static="true" as="xs:string"
46 select="'http://ids-mannheim.de/ns/KorAP'"/>
bansp5e2d1c02022-03-10 04:51:40 +010047
bansp5f841732022-03-16 06:27:31 +010048 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
49 <!-- this is only a bit funny -->
50
banspe726b4a2022-03-28 05:47:45 +020051 <xsl:variable name="collection_params" as="xs:string" static="yes"
52 select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
53 />
54 <!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
55
56 <xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
banspd1bf1db2022-04-04 02:16:24 +020057
banspe726b4a2022-03-28 05:47:45 +020058<!-- MODES -->
bansp5e2d1c02022-03-10 04:51:40 +010059
60 <xsl:mode name="corpus" on-no-match="deep-skip"/>
61 <xsl:mode name="text" on-no-match="deep-skip"/>
bansp9103aab2022-03-19 05:10:21 +010062 <xsl:mode name="header-text" on-no-match="text-only-copy"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +020063 <xsl:mode use-accumulators="#all"/>
Piotr Banski09096ee2022-05-25 13:41:03 +020064
Piotr Banski9397ca52022-05-30 02:22:10 +020065 <xsl:accumulator name="element-index" as="map(xs:string, xs:integer)" initial-value="map{'':0}">
66 <xsl:accumulator-rule match="tei:*[ancestor-or-self::tei:text]" phase="start">
67 <xsl:variable name="prev_idx" as="xs:integer"
68 select="map:get($value, map:keys($value)[last()])"/>
69 <xsl:sequence select="map { string(@xml:id): $prev_idx + 1 }"/>
70 </xsl:accumulator-rule>
Piotr Banski09096ee2022-05-25 13:41:03 +020071 </xsl:accumulator>
Piotr Banskifdc858a2022-05-25 02:40:32 +020072
73 <xsl:accumulator name="elem-offset-seq" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
74
75 <xsl:accumulator-rule match="tei:body/tei:p" phase="start">
76 <xsl:variable name="preceding_index" as="xs:integer">
77 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
78 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +020079 </xsl:variable>
80
Piotr Banskifdc858a2022-05-25 02:40:32 +020081 <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + 1"/>
82 <!-- for paragraphs, it's in either being initial or not -->
Piotr Banski09096ee2022-05-25 13:41:03 +020083
Piotr Banskifdc858a2022-05-25 02:40:32 +020084 <xsl:sequence select="
85 $value,
86 map {
87 string(@xml:id): ($preceding_index,$our_base)
88 }"/>
89 </xsl:accumulator-rule>
90
91 <xsl:accumulator-rule match="tei:s" phase="start">
92 <xsl:variable name="preceding_index" as="xs:integer">
93 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
94 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +020095 </xsl:variable>
Piotr Banski92791a22022-05-26 01:41:10 +020096 <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + xs:integer(f:is_preceded_by_ws(.,true()))"/>
Piotr Banski09096ee2022-05-25 13:41:03 +020097
Piotr Banskifdc858a2022-05-25 02:40:32 +020098 <xsl:sequence select="
99 $value,
100 map {
101 string(@xml:id): ($preceding_index,$our_base)
102 }"/>
103 </xsl:accumulator-rule>
104
105 <xsl:accumulator-rule match="tei:w[parent::tei:seg[count(@nkjp:rejected) eq 0]]" phase="end">
106 <xsl:variable name="preceding_index" as="xs:integer">
107 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
108 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200109 </xsl:variable>
110 <xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(parent::tei:seg,true()))"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200111
Piotr Banski6a4a2522022-05-24 01:16:47 +0200112 <xsl:sequence select="
113 $value,
114 map {
Piotr Banski69f3c5f2022-05-24 10:52:09 +0200115 string(parent::tei:seg/@xml:id): ($our_base,$our_base + string-length())
Piotr Banski6a4a2522022-05-24 01:16:47 +0200116 }"/>
117 </xsl:accumulator-rule>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200118
119 <xsl:accumulator-rule match="tei:s" phase="end">
120 <xsl:variable name="preceding_index" as="xs:integer">
121 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
122 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200123
Piotr Banskifdc858a2022-05-25 02:40:32 +0200124 </xsl:variable>
125 <xsl:variable name="our_base" as="xs:integer">
126 <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
127 <xsl:sequence select="$incomplete[2]"/>
128 </xsl:variable>
Piotr Banski09096ee2022-05-25 13:41:03 +0200129
Piotr Banskifdc858a2022-05-25 02:40:32 +0200130 <xsl:sequence select="
131 $value,
132 map {
133 string(@xml:id): ($our_base,$preceding_index)
134 }"/>
135 </xsl:accumulator-rule>
136
137 <xsl:accumulator-rule match="tei:body/tei:p" phase="end">
138 <xsl:variable name="preceding_index" as="xs:integer">
139 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
140 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
141
Piotr Banskifdc858a2022-05-25 02:40:32 +0200142 </xsl:variable>
143 <xsl:variable name="our_base" as="xs:integer">
144 <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
145 <xsl:sequence select="$incomplete[2]"/>
146 </xsl:variable>
Piotr Banski09096ee2022-05-25 13:41:03 +0200147
Piotr Banskifdc858a2022-05-25 02:40:32 +0200148 <xsl:sequence select="
149 $value,
150 map {
151 string(@xml:id): ($our_base,$preceding_index)
152 }"/>
153 </xsl:accumulator-rule>
154
155 <xsl:accumulator-rule match="tei:body" phase="end">
156 <xsl:variable name="preceding_index" as="xs:integer">
157 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
158 <xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
159 </xsl:variable>
160
161 <xsl:sequence select="
162 $value,
163 map {
164 string(@xml:id): (0, $preceding_index)
165 }"/>
166 </xsl:accumulator-rule>
167
168 <xsl:accumulator-rule match="tei:text" phase="end">
169 <xsl:variable name="preceding_index" as="xs:integer">
170 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
171 <xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
172 </xsl:variable>
173
174 <xsl:sequence select="
175 $value,
176 map {
177 string(@xml:id): (0, $preceding_index)
178 }"/>
179 </xsl:accumulator-rule>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200180 </xsl:accumulator>
bansp5e2d1c02022-03-10 04:51:40 +0100181
banspe726b4a2022-03-28 05:47:45 +0200182 <!-- FUNCTIONS -->
183
bansp5f841732022-03-16 06:27:31 +0100184 <xsl:function name="f:compute_nesting" as="xs:integer">
banspd1bf1db2022-04-04 02:16:24 +0200185 <xsl:param name="node" as="element()"/>
bansp5f841732022-03-16 06:27:31 +0100186 <xsl:variable name="rel_depth"
187 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
188 as="xs:integer"/>
bansp5f841732022-03-16 06:27:31 +0100189 <xsl:sequence select="$rel_depth"/>
190 </xsl:function>
191
Piotr Banski92791a22022-05-26 01:41:10 +0200192 <xsl:function name="f:is_preceded_by_ws" as="xs:boolean">
bansp9dc10002022-05-17 22:33:34 +0200193 <xsl:param name="node" as="element()"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200194 <xsl:param name="suppress_initial" as="xs:boolean"/>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200195
Piotr Banski92791a22022-05-26 01:41:10 +0200196 <xsl:choose>
197 <xsl:when test="local-name($node) eq 'seg'">
198 <xsl:choose>
199 <xsl:when test="$node/@nkjp:nps">
200 <xsl:sequence select="fn:false()"/>
201 </xsl:when>
202 <xsl:when
203 test="$node/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0] and $node/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0] and not($node/preceding::tei:seg[count(@nkjp:rejected) eq 0])">
204 <xsl:sequence select="fn:false()"/>
205 <!-- the otherwise very costly check for preceding segs fires only if the first two are true, so it will have minimal search space -->
206 </xsl:when>
207 <xsl:when
208 test="$suppress_initial and $node/ancestor::tei:s/descendant::tei:seg[count(@nkjp:rejected) eq 0][1]/@xml:id eq $node/@xml:id">
209 <!-- I forget how node identity works now, so let me just compare the IDs -->
210 <xsl:sequence select="fn:false()"/>
211 </xsl:when>
212 <xsl:otherwise>
213 <xsl:sequence select="fn:true()"/>
214 </xsl:otherwise>
215 </xsl:choose>
216 </xsl:when>
217 <xsl:when test="local-name($node) eq 's'">
218 <xsl:choose>
219 <xsl:when test="exists($node/preceding-sibling::tei:s)">
220 <xsl:sequence select="fn:true()"/>
221 </xsl:when>
222 <xsl:otherwise>
223 <xsl:sequence
224 select="not($suppress_initial) and exists($node/ancestor::tei:p[1]/preceding-sibling::tei:p)"
225 />
226 </xsl:otherwise>
227 </xsl:choose>
228
229 </xsl:when>
230 <xsl:when test="local-name($node) eq 'p'">
231 <xsl:sequence select="exists($node/preceding-sibling::tei:p)"/>
232 </xsl:when>
233 <xsl:otherwise>
234 <xsl:message terminate="yes"
235 select="'Wrong argument passed to f:is_preceded_by_ws(): ' || local-name($node) || ' Only p, s, seg are allowed.'"
236 />
237 </xsl:otherwise>
238 </xsl:choose>
239 </xsl:function>
banspd1bf1db2022-04-04 02:16:24 +0200240
banspb5992532022-03-29 15:55:44 +0200241<!-- UTILITY TEMPLATES -->
242
bansp9103aab2022-03-19 05:10:21 +0100243 <xsl:template match="@default" mode="#all"/>
bansp97ba7ce2022-03-26 05:14:06 +0100244 <!-- this is to delete some auto-inserted attribute throughout -->
bansp9103aab2022-03-19 05:10:21 +0100245
Piotr Banski6a4a2522022-05-24 01:16:47 +0200246 <xsl:template match="tei:w" mode="#all"/>
banspe726b4a2022-03-28 05:47:45 +0200247<!-- NKJP-SGJP has apparently resigned from standoff representations by adding <w> everywhere;
Piotr Banskifdc858a2022-05-25 02:40:32 +0200248 we reach for them, but from the level of <seg>, so we don't need to process <w> separately -->
bansp8f6700b2022-03-27 05:27:09 +0200249
Piotr Banski09096ee2022-05-25 13:41:03 +0200250 <!-- fall-thru, skipping the potential <paren> element and filtering out the bad guys -->
Piotr Banski6a4a2522022-05-24 01:16:47 +0200251 <xsl:template match="tei:choice" mode="struct">
Piotr Banski09096ee2022-05-25 13:41:03 +0200252 <xsl:apply-templates select="descendant::tei:seg[count(@nkjp:rejected) eq 0]" mode="struct"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200253 </xsl:template>
Piotr Banskia51907c2022-05-25 15:09:41 +0200254 <xsl:template match="tei:choice" mode="morpho">
255 <xsl:apply-templates select="descendant::tei:seg[count(@nkjp:rejected) eq 0]" mode="morpho"/>
256 </xsl:template>
banspb5992532022-03-29 15:55:44 +0200257
258 <!-- MAIN PROCESSING -->
259
bansp5e2d1c02022-03-10 04:51:40 +0100260 <xsl:template name="xsl:initial-template">
banspf2b24e62022-03-28 18:12:08 +0200261 <xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
banspd1bf1db2022-04-04 02:16:24 +0200262
banspe726b4a2022-03-28 05:47:45 +0200263 <!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
bansp8f6700b2022-03-27 05:27:09 +0200264 because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
265 <xsl:call-template name="create_corpus_header">
banspe726b4a2022-03-28 05:47:45 +0200266 <xsl:with-param name="text.xml" select="$collection_of_text[1]" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200267 <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
268 </xsl:call-template>
269
banspe726b4a2022-03-28 05:47:45 +0200270 <xsl:for-each select="$collection_of_text">
271 <xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
272 <xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
273 <xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
274 <xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
Piotr Banskic5950ce2022-05-27 15:07:08 +0200275 <xsl:variable name="ann_named.uri" select="$my_dir || '/ann_named.xml'" as="xs:string"/>
276 <xsl:variable name="ann_groups.uri" select="$my_dir || '/ann_groups.xml'" as="xs:string"/>
277 <xsl:variable name="ann_words.uri" select="$my_dir || '/ann_words.xml'" as="xs:string"/>
banspe726b4a2022-03-28 05:47:45 +0200278
banspf2b24e62022-03-28 18:12:08 +0200279 <xsl:choose>
280 <xsl:when test="$my_textID = $IDs_to_skip"/>
bansp9dc10002022-05-17 22:33:34 +0200281 <!-- this is a utility step, for when we want to ignore some texts for any reason (debugging, selective update) -->
banspf2b24e62022-03-28 18:12:08 +0200282 <xsl:otherwise>
banspd1bf1db2022-04-04 02:16:24 +0200283
bansp9dc10002022-05-17 22:33:34 +0200284 <xsl:call-template name="process_single_sample">
banspf2b24e62022-03-28 18:12:08 +0200285 <xsl:with-param name="text.xml" as="document-node()" select="."/>
286 <xsl:with-param name="ann_morphosyntax.xml" as="document-node()"
287 select="doc($ann_morphosyntax.uri)"/>
288 <xsl:with-param name="ann_segmentation.xml" as="document-node()"
289 select="doc($ann_segmentation.uri)"/>
290 <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
Piotr Banskic5950ce2022-05-27 15:07:08 +0200291 <!-- the following parameters may happen to be null -->
292 <xsl:with-param name="ann_named.xml" as="document-node()*"
293 select="if(fn:doc-available($ann_named.uri)) then doc($ann_named.uri) else ()"/>
294 <xsl:with-param name="ann_groups.xml" as="document-node()*"
295 select="if(fn:doc-available($ann_groups.uri)) then doc($ann_groups.uri) else ()"/>
296 <xsl:with-param name="ann_words.xml" as="document-node()*"
297 select="if(fn:doc-available($ann_words.uri)) then doc($ann_words.uri) else ()"/>
298
bansp9dc10002022-05-17 22:33:34 +0200299 </xsl:call-template>
banspf2b24e62022-03-28 18:12:08 +0200300 </xsl:otherwise>
301 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200302 </xsl:for-each>
bansp8f6700b2022-03-27 05:27:09 +0200303 </xsl:template>
304
305 <xsl:template name="process_single_sample">
banspe726b4a2022-03-28 05:47:45 +0200306 <xsl:param name="text.xml" as="document-node()"/>
307 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
308 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspd1bf1db2022-04-04 02:16:24 +0200309 <xsl:param name="my_textID" as="xs:string" select="'0-BAD_textID'"/>
bansp9dc10002022-05-17 22:33:34 +0200310 <!-- empty textID should never happen, but if it does, it will be signalled at the top of the output -->
Piotr Banskic5950ce2022-05-27 15:07:08 +0200311 <xsl:param name="ann_named.xml" as="document-node()*"/>
312 <xsl:param name="ann_groups.xml" as="document-node()*"/>
313 <xsl:param name="ann_words.xml" as="document-node()*"/>
banspe726b4a2022-03-28 05:47:45 +0200314
315 <xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
316
317 <xsl:variable name="compoundID" as="xs:string"
318 select="$corpusID || '_' || $docID || '.' || $my_textID"/>
319 <!-- this is what occurs in the text and data layers as @docid -->
320
bansp5e2d1c02022-03-10 04:51:40 +0100321 <xsl:call-template name="create_data">
bansp9dc10002022-05-17 22:33:34 +0200322 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200323 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
324 <xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100325 </xsl:call-template>
326
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200327 <xsl:call-template name="create_struct">
banspe726b4a2022-03-28 05:47:45 +0200328 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100329 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
330 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200331 <xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
bansp5f841732022-03-16 06:27:31 +0100332 />
333 </xsl:call-template>
Piotr Banski92791a22022-05-26 01:41:10 +0200334
Piotr Banskia51907c2022-05-25 15:09:41 +0200335 <xsl:call-template name="create_morpho">
banspe726b4a2022-03-28 05:47:45 +0200336 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100337 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
338 as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100339 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
340 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200341 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100342 </xsl:call-template>
Piotr Banskia51907c2022-05-25 15:09:41 +0200343
Piotr Banski09096ee2022-05-25 13:41:03 +0200344 <xsl:call-template name="create_text_header">
bansp5e2d1c02022-03-10 04:51:40 +0100345 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200346 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
347 <xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200348 </xsl:call-template>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200349
Piotr Banskic5950ce2022-05-27 15:07:08 +0200350 <xsl:if test="$ann_named.xml">
351 <xsl:call-template name="create_named">
352 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
353 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
354 as="document-node()"/>
355 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
356 as="document-node()"/>
357 <xsl:with-param name="ann_named.xml" select="$ann_named.xml"
358 as="document-node()"/>
359 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/named.xml'" as="xs:string"/>
360 </xsl:call-template>
361 </xsl:if>
362
363 <xsl:if test="$ann_words.xml and $ann_groups.xml">
364 <xsl:call-template name="create_groups">
365 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
366 <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
367 as="document-node()"/>
368 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
369 as="document-node()"/>
370 <xsl:with-param name="ann_words.xml" select="$ann_words.xml"
371 as="document-node()"/>
372 <xsl:with-param name="ann_groups.xml" select="$ann_groups.xml"
373 as="document-node()"/>
374 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/groups.xml'" as="xs:string"/>
375 </xsl:call-template>
376 </xsl:if>
377
bansp5e2d1c02022-03-10 04:51:40 +0100378 </xsl:template>
379
380 <!-- ************************** data.xml ******************* -->
381
382 <xsl:template name="create_data">
bansp9dc10002022-05-17 22:33:34 +0200383 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200384 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100385 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100386 <!-- create the data.xml file -->
387 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100388 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100389
Akron9a8ee3e2022-01-31 13:51:49 +0100390 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100391 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100392 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100393 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
394 <xsl:attribute name="file" select="'metadata.xml'"/>
395 </xsl:element>
396
397 <xsl:element name="text" namespace="{$KorAP_namespace}">
bansp9dc10002022-05-17 22:33:34 +0200398 <xsl:variable name="content" as="xs:string+">
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200399 <xsl:for-each select="$ann_segmentation.xml/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s//tei:seg[count(@nkjp:rejected) eq 0]">
bansp9dc10002022-05-17 22:33:34 +0200400 <xsl:sequence select="
Piotr Banskifdc858a2022-05-25 02:40:32 +0200401 if (f:is_preceded_by_ws(.,false())) then
bansp9dc10002022-05-17 22:33:34 +0200402 ' '
403 else
404 '', ./tei:w"/>
405 </xsl:for-each>
406 </xsl:variable>
407 <xsl:value-of select="string-join($content)"/>
bansp5e2d1c02022-03-10 04:51:40 +0100408 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100409 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100410 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100411 </xsl:template>
412
bansp5f841732022-03-16 06:27:31 +0100413 <!-- ************************** struct ******************* -->
414
415 <xsl:template name="create_struct">
banspe726b4a2022-03-28 05:47:45 +0200416 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100417 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
418 <xsl:param name="target" as="xs:string"/>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200419
bansp5f841732022-03-16 06:27:31 +0100420 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
421 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
422 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
423 <xsl:element name="layer" namespace="{$KorAP_namespace}">
424 <xsl:attribute name="docid" select="$compoundID"/>
425 <xsl:attribute name="version" select="$KorAP-XML_version"/>
426
427 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
Piotr Banski09096ee2022-05-25 13:41:03 +0200428 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>
bansp5f841732022-03-16 06:27:31 +0100429 </xsl:element>
430 </xsl:element>
431 </xsl:result-document>
432 </xsl:template>
433
434 <xsl:template match="tei:*" mode="struct">
Piotr Banski09096ee2022-05-25 13:41:03 +0200435 <xsl:variable name="offsets" as="xs:integer+">
Piotr Banskiad3581f2022-05-28 18:39:35 +0200436 <xsl:sequence select="map:get(fn:accumulator-after('elem-offset-seq')[last()], string(@xml:id))"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200437 </xsl:variable>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200438
bansp5f841732022-03-16 06:27:31 +0100439 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
Piotr Banski9397ca52022-05-30 02:22:10 +0200440 <!--<xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>-->
441 <xsl:variable name="my_index" select="map:get(fn:accumulator-before('element-index'),string(@xml:id))" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100442
bansp5f841732022-03-16 06:27:31 +0100443 <xsl:element name="span" namespace="{$KorAP_namespace}">
444 <xsl:attribute name="id" select="'s' || $my_index"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200445 <xsl:attribute name="from" select="$offsets[1]"/>
446 <xsl:attribute name="to" select="$offsets[2]"/>
bansp5f841732022-03-16 06:27:31 +0100447 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200448 <xsl:if test="local-name() eq 'seg' and $SHOW_ORTH_IN_STRUCT">
449 <xsl:comment><xsl:value-of select="fn:normalize-space(.)"/></xsl:comment>
450 </xsl:if>
bansp5f841732022-03-16 06:27:31 +0100451 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
Piotr Banskifdc858a2022-05-25 02:40:32 +0200452 <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX for morpho -->
bansp5f841732022-03-16 06:27:31 +0100453 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100454 <xsl:attribute name="name" select="'name'"/>
455 <xsl:value-of select="local-name()"/>
bansp5f841732022-03-16 06:27:31 +0100456 </xsl:element>
457 <xsl:if test="count(@*)">
458 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
459 <xsl:attribute name="name" select="'attr'"/>
460 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
461 <xsl:attribute name="type" select="'attr'"/>
462 <xsl:for-each select="@*">
463 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
464 <xsl:attribute name="name" select="local-name(.)"/>
465 <xsl:value-of select="."/>
466 </xsl:element>
467 </xsl:for-each>
468 </xsl:element>
469 </xsl:element>
470 </xsl:if>
471 </xsl:element>
472 </xsl:element>
Piotr Banskia51907c2022-05-25 15:09:41 +0200473 <xsl:apply-templates mode="struct"/>
bansp5f841732022-03-16 06:27:31 +0100474 </xsl:template>
475
476 <!-- ************************** morpho ******************* -->
477
478 <xsl:template name="create_morpho">
banspe726b4a2022-03-28 05:47:45 +0200479 <xsl:param name="compoundID" as="xs:string"/>
bansp3e5b20c2022-03-18 20:22:31 +0100480 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100481 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
482 <xsl:param name="target" as="xs:string"/>
483
484 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
485 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
486 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp3e5b20c2022-03-18 20:22:31 +0100487 <xsl:element name="layer" namespace="{$KorAP_namespace}">
488 <xsl:attribute name="docid" select="$compoundID"/>
489 <xsl:attribute name="version" select="$KorAP-XML_version"/>
490
491 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
492 <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
Piotr Banskia51907c2022-05-25 15:09:41 +0200493 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()" tunnel="yes"/>
bansp3e5b20c2022-03-18 20:22:31 +0100494 </xsl:apply-templates>
495 </xsl:element>
496 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100497 </xsl:result-document>
498 </xsl:template>
499
bansp3e5b20c2022-03-18 20:22:31 +0100500 <xsl:template match="tei:seg" mode="morpho">
Piotr Banskia51907c2022-05-25 15:09:41 +0200501 <xsl:param name="ann_morphosyntax.xml" as="document-node()" tunnel="yes"/>
502 <!-- it's so spread out because I wanted to make sure to be able to look up the individual
503 constituent values, should anything go wrong; it might get compacted at some point, but
504 the increase in efficiency will probably be minimal, compared to the decrease of readability -->
505 <xsl:variable name="offsets" as="xs:integer+">
Piotr Banski8d2609a2022-05-28 17:08:49 +0200506 <xsl:sequence select="map:get(fn:accumulator-after('elem-offset-seq')[last()], string(@xml:id))"/>
Piotr Banskia51907c2022-05-25 15:09:41 +0200507 </xsl:variable>
bansp3e5b20c2022-03-18 20:22:31 +0100508 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
509 <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
510 <xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
511 <xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
512 <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
513 <xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
514 <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
Piotr Banski9397ca52022-05-30 02:22:10 +0200515 <!--<xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>-->
516 <xsl:variable name="my_index" select="map:get(fn:accumulator-before('element-index'),string(@xml:id))" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100517
bansp3e5b20c2022-03-18 20:22:31 +0100518 <xsl:element name="span" namespace="{$KorAP_namespace}">
Piotr Banskia51907c2022-05-25 15:09:41 +0200519 <xsl:attribute name="id" select="'m' || $my_index"/>
520 <xsl:attribute name="from" select="$offsets[1]"/>
521 <xsl:attribute name="to" select="$offsets[2]"/>
bansp3e5b20c2022-03-18 20:22:31 +0100522 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
523 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
524 <xsl:attribute name="type" select="'lex'"/>
525 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
526 <xsl:attribute name="name" select="'lex'"/>
527 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
528 <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
Piotr Banskia51907c2022-05-25 15:09:41 +0200529
bansp3e5b20c2022-03-18 20:22:31 +0100530 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
531 <xsl:attribute name="name" select="'lemma'"/>
532 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
533 </xsl:element>
534 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
535 <xsl:attribute name="name" select="'pos'"/>
536 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
537 </xsl:element>
538 <xsl:if test="string-length($chosen-msd)">
539 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
540 <xsl:attribute name="name" select="'msd'"/>
541 <xsl:value-of select="$chosen-msd"/>
542 </xsl:element>
543 </xsl:if>
544 <xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
545 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
546 <xsl:attribute name="name" select="'join'"/>
547 <xsl:value-of select="'left'"/>
548 </xsl:element>
549 </xsl:if>
550 </xsl:element>
551 </xsl:element>
552 </xsl:element>
553 </xsl:element>
bansp3e5b20c2022-03-18 20:22:31 +0100554 </xsl:template>
banspe726b4a2022-03-28 05:47:45 +0200555
Piotr Banskic5950ce2022-05-27 15:07:08 +0200556 <!-- ************************** named entities ******************* -->
557
558 <xsl:template name="create_named">
559 <xsl:param name="compoundID" as="xs:string"/>
560 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
561 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
562 <xsl:param name="ann_named.xml" as="document-node()"/>
563 <xsl:param name="target" as="xs:string"/>
564
565 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
566 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
567 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
568 <xsl:element name="layer" namespace="{$KorAP_namespace}">
569 <xsl:attribute name="docid" select="$compoundID"/>
570 <xsl:attribute name="version" select="$KorAP-XML_version"/>
571
572 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
573 <!--<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="named">
574 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()" tunnel="yes"/>
575 <xsl:with-param name="ann_named.xml" select="$ann_named.xml" as="document-node()" tunnel="yes"/>
576 </xsl:apply-templates>-->
577 </xsl:element>
578 </xsl:element>
579 </xsl:result-document>
580 </xsl:template>
581
582 <!-- ************************** syntactic chunks ******************* -->
583
584 <xsl:template name="create_groups">
585 <xsl:param name="compoundID" as="xs:string"/>
586 <xsl:param name="ann_segmentation.xml" as="document-node()"/>
587 <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
588 <xsl:param name="ann_words.xml" as="document-node()"/>
589 <xsl:param name="ann_groups.xml" as="document-node()"/>
590 <xsl:param name="target" as="xs:string"/>
591
592 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
593 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
594 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
595 <xsl:element name="layer" namespace="{$KorAP_namespace}">
596 <xsl:attribute name="docid" select="$compoundID"/>
597 <xsl:attribute name="version" select="$KorAP-XML_version"/>
598
599 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
600 <!--<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="groups">
601 <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()" tunnel="yes"/>
602 <xsl:with-param name="ann_words.xml" select="$ann_words.xml" as="document-node()" tunnel="yes"/>
603 <xsl:with-param name="ann_groups.xml" select="$ann_groups.xml" as="document-node()" tunnel="yes"/>
604 </xsl:apply-templates>-->
605 </xsl:element>
606 </xsl:element>
607 </xsl:result-document>
608 </xsl:template>
609
bansp5f841732022-03-16 06:27:31 +0100610 <!-- ************************** TEXT header ******************* -->
611
612 <xsl:template name="create_text_header">
613 <xsl:param name="text.xml" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200614 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100615 <xsl:param name="target" as="xs:string"/>
616
617 <!-- create the local header.xml file -->
618 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
619 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
620
621 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
banspe726b4a2022-03-28 05:47:45 +0200622 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:*" mode="text">
623 <xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
624 </xsl:apply-templates>
bansp5f841732022-03-16 06:27:31 +0100625 </idsHeader>
626 </xsl:result-document>
627 </xsl:template>
628
629 <xsl:template match="tei:fileDesc" mode="text">
bansp9103aab2022-03-19 05:10:21 +0100630 <xsl:element name="{local-name()}">
bansp5f841732022-03-16 06:27:31 +0100631 <xsl:apply-templates mode="text"/>
bansp9103aab2022-03-19 05:10:21 +0100632 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100633 </xsl:template>
634
635 <xsl:template match="tei:title" mode="text">
636 <t.title>
637 <xsl:apply-templates/>
638 </t.title>
639 </xsl:template>
640
641 <xsl:template match="tei:titleStmt" mode="text">
banspe726b4a2022-03-28 05:47:45 +0200642 <xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
bansp5f841732022-03-16 06:27:31 +0100643 <titleStmt>
644 <textSigle>
banspe726b4a2022-03-28 05:47:45 +0200645 <xsl:value-of select="$compoundID"/>
bansp5f841732022-03-16 06:27:31 +0100646 </textSigle>
647 <xsl:apply-templates mode="text"/>
648 </titleStmt>
649 </xsl:template>
650
bansp9103aab2022-03-19 05:10:21 +0100651 <xsl:template match="tei:publicationStmt" mode="text">
652 <xsl:element name="{local-name()}">
653 <xsl:apply-templates mode="text"/>
654 </xsl:element>
655 </xsl:template>
656
657 <xsl:template match="tei:availability" mode="text">
658 <xsl:element name="{local-name()}">
659 <xsl:apply-templates mode="text" select="@* | *"/>
660 </xsl:element>
661 </xsl:template>
662
663 <xsl:template match="tei:profileDesc" mode="text">
664 <xsl:element name="{local-name()}">
665 <xsl:apply-templates mode="text"/>
666 </xsl:element>
667 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100668
bansp9103aab2022-03-19 05:10:21 +0100669 <xsl:template match="tei:textClass" mode="text">
670 <xsl:element name="{local-name()}">
671 <xsl:apply-templates mode="text" select="@* | *"/>
672 </xsl:element>
673 </xsl:template>
674
675 <xsl:template match="tei:catRef" mode="text corpus">
676 <xsl:element name="{local-name()}">
677 <xsl:apply-templates mode="text" select="@* | *"/>
678 </xsl:element>
679 </xsl:template>
680
681 <xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text corpus">
682 <xsl:copy-of select="."/>
683 </xsl:template>
684
685 <xsl:template match="tei:p" mode="text corpus">
686 <xsl:element name="{local-name()}">
687 <xsl:apply-templates mode="header-text"/>
688 </xsl:element>
689 </xsl:template>
690
691
692 <!-- OPTIMIZATION has to take modes into account -->
bansp5e2d1c02022-03-10 04:51:40 +0100693 <!-- ************************** CORPUS header ******************* -->
694 <xsl:template name="create_corpus_header">
695 <xsl:param name="text.xml" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100696 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100697
698 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100699 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100700
701 <!--doctype-public="{$publicDoctypeI5}"
702 doctype-system="{$systemDoctypeI5}">
703 these are, sadly, useless
704 -->
705
706 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
bansp9103aab2022-03-19 05:10:21 +0100707 <xsl:apply-templates select="$text.xml/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus"/>
bansp5e2d1c02022-03-10 04:51:40 +0100708 </idsHeader>
709 </xsl:result-document>
710 </xsl:template>
711
712 <xsl:template match="tei:fileDesc" mode="corpus">
bansp9103aab2022-03-19 05:10:21 +0100713 <xsl:element name="{local-name()}">
bansp5e2d1c02022-03-10 04:51:40 +0100714 <xsl:apply-templates mode="corpus"/>
bansp9103aab2022-03-19 05:10:21 +0100715 </xsl:element>
bansp5e2d1c02022-03-10 04:51:40 +0100716 </xsl:template>
bansp9103aab2022-03-19 05:10:21 +0100717
bansp5e2d1c02022-03-10 04:51:40 +0100718
719 <xsl:template match="tei:title" mode="corpus">
720 <c.title>
bansp9103aab2022-03-19 05:10:21 +0100721 <xsl:apply-templates mode="corpus" select="@*"/>
722 <xsl:apply-templates mode="header-text"/>
bansp5e2d1c02022-03-10 04:51:40 +0100723 </c.title>
724 </xsl:template>
725
726 <xsl:template match="tei:titleStmt" mode="corpus">
727 <titleStmt>
728 <korpusSigle>
729 <xsl:value-of select="$corpusID"/>
730 </korpusSigle>
731 <xsl:apply-templates mode="corpus"/>
732 </titleStmt>
733 </xsl:template>
734
bansp9103aab2022-03-19 05:10:21 +0100735 <xsl:template match="tei:publicationStmt" mode="corpus">
736 <xsl:element name="{local-name()}">
737 <xsl:apply-templates mode="corpus"/>
738 </xsl:element>
739 </xsl:template>
740
741 <xsl:template match="tei:availability" mode="corpus">
742 <xsl:element name="{local-name()}">
743 <xsl:apply-templates mode="corpus" select="@* | *"/>
744 </xsl:element>
745 </xsl:template>
746
747 <xsl:template match="tei:encodingDesc" mode="corpus">
748 <xsl:element name="{local-name()}">
749 <xsl:apply-templates mode="corpus"/>
750 </xsl:element>
751 </xsl:template>
752
753 <xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus">
754 <xsl:element name="{local-name()}">
755 <xsl:apply-templates mode="corpus" select="@* | *"/>
756 </xsl:element>
757 </xsl:template>
758
759 <xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus">
760 <xsl:element name="{local-name()}">
761 <xsl:apply-templates mode="corpus" select="@*"/>
762 <xsl:apply-templates mode="header-text"/>
763 </xsl:element>
764 </xsl:template>
765<!--
766 <xsl:template match="tei:textClass" mode="corpus">
767 <xsl:element name="{local-name()}">
768 <xsl:apply-templates mode="corpus" select="@* | *"/>
769 </xsl:element>
770 </xsl:template>
771
772 <xsl:template match="tei:catRef" mode="corpus">
773 <xsl:element name="{local-name()}">
774 <xsl:apply-templates mode="corpus" select="@* | *"/>
775 </xsl:element>
776 </xsl:template>
777-->
bansp5e2d1c02022-03-10 04:51:40 +0100778
779
780
781 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
782 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
783 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
784
785 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
786 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
787
788 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
789 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
790 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
791 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
792
793 -->
794 <!--<xsl:template name="test_full">
795 <xsl:param name="corpusID"/>
796 <xsl:param name="docID"/>
797 <xsl:param name="textID"/>
798 <xsl:call-template name="xsl:initial-template"/>
799 </xsl:template>-->
800
Akron9a8ee3e2022-01-31 13:51:49 +0100801</xsl:stylesheet>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200802
Piotr Banskifdc858a2022-05-25 02:40:32 +0200803<!-- template for serializing maps in messages <xsl:message select="('map:',serialize($map, map{'method':'adaptive'}))"/> -->