blob: 9362be4ba02ce358b578722eb8945ca90a252144 [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
Piotr Banskif8af3a92022-05-23 03:20:10 +02005 xmlns:fn="http://www.w3.org/2005/xpath-functions"
Piotr Banski763b41f2022-06-02 01:13:23 +02006 xmlns:xi="http://www.w3.org/2001/XInclude"
7 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f fn map nkjp tei xi"
bansp5e2d1c02022-03-10 04:51:40 +01008 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01009
banspe726b4a2022-03-28 05:47:45 +020010
11<!-- PARAMETERS -->
bansp5e2d1c02022-03-10 04:51:40 +010012
bansp8f6700b2022-03-27 05:27:09 +020013 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020014 <!-- the directory containing NKJP files, in the form of a collection of text-level dirs
15 (that is how we know both the $corpusID and the $docID) -->
Akron9a8ee3e2022-01-31 13:51:49 +010016
bansp8f6700b2022-03-27 05:27:09 +020017 <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
banspd1bf1db2022-04-04 02:16:24 +020018 <!-- where the corpus/document/text/annotations hierarchy is going to be created -->
banspf2b24e62022-03-28 18:12:08 +020019
20 <xsl:param name="skip_docID" as="xs:string">
Piotr Banskic5950ce2022-05-27 15:07:08 +020021 <!--<xsl:value-of select="''"/>-->
22 <xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>
banspb5992532022-03-29 15:55:44 +020023 </xsl:param>
24 <!-- comma-separated list of document IDs to be skipped from processing
banspf2b24e62022-03-28 18:12:08 +020025 example: HellerPodgladanie,KOT
banspd1bf1db2022-04-04 02:16:24 +020026 no functionality beyond string identity is supported
27 (this is just for testing) -->
banspb5992532022-03-29 15:55:44 +020028
Piotr Banski1ae16bd2022-05-25 15:59:40 +020029 <xsl:param name="SHOW_ORTH_IN_STRUCT" as="xs:boolean" select="true()"/>
Piotr Banskid2b78b82022-06-03 17:05:59 +020030 <!-- for debugging structure.xml production; otherwise we don't see the referenced span -->
31
32 <xsl:param name="SHOW_REDUNDANT_ORTH" as="xs:boolean" select="true()"/>
33 <!-- in layers such as morpho or named, one can recover the orth, so this is a bit of
34 space-wasting luxury, useful at the early stages -->
Piotr Banski09096ee2022-05-25 13:41:03 +020035
bansp8f6700b2022-03-27 05:27:09 +020036
bansp9dc10002022-05-17 22:33:34 +020037<!-- VARIABLES (= constants...) -->
banspe726b4a2022-03-28 05:47:45 +020038
39 <xsl:variable name="corpusID" as="xs:string" select="'NKJP'" static="yes"/>
40 <xsl:variable name="docID" as="xs:string" select="'NKJP'" static="yes"/>
bansp8f6700b2022-03-27 05:27:09 +020041
42 <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +010043
banspd1bf1db2022-04-04 02:16:24 +020044 <xsl:variable name="systemDoctypeI5" as="xs:string"
45 select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" static="true"/>
bansp5e2d1c02022-03-10 04:51:40 +010046
banspd1bf1db2022-04-04 02:16:24 +020047 <xsl:variable name="publicDoctypeI5" as="xs:string" static="true"
48 select="'-//IDS//DTD I5 1.0//EN'"/>
bansp5e2d1c02022-03-10 04:51:40 +010049
banspd1bf1db2022-04-04 02:16:24 +020050 <xsl:variable name="KorAP_namespace" static="true" as="xs:string"
51 select="'http://ids-mannheim.de/ns/KorAP'"/>
bansp5e2d1c02022-03-10 04:51:40 +010052
bansp5f841732022-03-16 06:27:31 +010053 <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
54 <!-- this is only a bit funny -->
55
banspe726b4a2022-03-28 05:47:45 +020056 <xsl:variable name="collection_params" as="xs:string" static="yes"
57 select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
58 />
59 <!-- see https://www.saxonica.com/documentation11/index.html#!sourcedocs/collections/collection-directories -->
60
61 <xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
banspd1bf1db2022-04-04 02:16:24 +020062
banspe726b4a2022-03-28 05:47:45 +020063<!-- MODES -->
bansp5e2d1c02022-03-10 04:51:40 +010064
Piotr Banski43b9db02022-06-03 02:38:42 +020065 <xsl:mode name="struct" on-no-match="deep-skip"/>
Piotr Banskid2b78b82022-06-03 17:05:59 +020066 <xsl:mode name="morpho" on-no-match="deep-skip"/>
Piotr Banskifaa910f2022-06-03 00:46:29 +020067 <xsl:mode name="corpus-header" on-no-match="deep-skip"/>
Piotr Banskia0a9fc02022-06-03 01:20:18 +020068 <xsl:mode name="text-header" on-no-match="deep-skip"/>
69 <xsl:mode name="text_inside_header" on-no-match="text-only-copy"/>
Piotr Banski763b41f2022-06-02 01:13:23 +020070 <xsl:mode name="copy" on-no-match="shallow-copy"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +020071 <xsl:mode use-accumulators="#all"/>
Piotr Banski09096ee2022-05-25 13:41:03 +020072
Piotr Banskie1ac5202022-05-30 21:25:21 +020073 <xsl:accumulator name="element-index" as="xs:integer" initial-value="0">
Piotr Banskid2b78b82022-06-03 17:05:59 +020074 <!--<xsl:accumulator-rule match="tei:*[ancestor-or-self::tei:text]" select="$value + 1" phase="start"/>-->
75 <xsl:accumulator-rule
76 match="tei:teiCorpus/tei:TEI/(tei:text | tei:text/tei:body | tei:text/tei:body/tei:p | tei:text/tei:body/tei:p/tei:s | tei:text/tei:body/tei:p/tei:s/tei:seg)"
77 select="$value + 1" phase="start"/>
Piotr Banski09096ee2022-05-25 13:41:03 +020078 </xsl:accumulator>
Piotr Banskid2b78b82022-06-03 17:05:59 +020079
Piotr Banski65a6d0b2022-05-31 17:23:08 +020080 <xsl:accumulator name="morpho-offsets" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
Piotr Banskifdc858a2022-05-25 02:40:32 +020081
82 <xsl:accumulator-rule match="tei:body/tei:p" phase="start">
83 <xsl:variable name="preceding_index" as="xs:integer">
84 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
85 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banski6a4a2522022-05-24 01:16:47 +020086 </xsl:variable>
87
Piotr Banskifdc858a2022-05-25 02:40:32 +020088 <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + 1"/>
89 <!-- for paragraphs, it's in either being initial or not -->
Piotr Banski09096ee2022-05-25 13:41:03 +020090
Piotr Banskifdc858a2022-05-25 02:40:32 +020091 <xsl:sequence select="
92 $value,
93 map {
94 string(@xml:id): ($preceding_index,$our_base)
95 }"/>
96 </xsl:accumulator-rule>
Piotr Banskid2b78b82022-06-03 17:05:59 +020097
Piotr Banskifdc858a2022-05-25 02:40:32 +020098 <xsl:accumulator-rule match="tei:s" phase="start">
99 <xsl:variable name="preceding_index" as="xs:integer">
100 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
101 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200102 </xsl:variable>
Piotr Banski92791a22022-05-26 01:41:10 +0200103 <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + xs:integer(f:is_preceded_by_ws(.,true()))"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200104
Piotr Banskifdc858a2022-05-25 02:40:32 +0200105 <xsl:sequence select="
106 $value,
107 map {
108 string(@xml:id): ($preceding_index,$our_base)
109 }"/>
110 </xsl:accumulator-rule>
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200111
112<!-- I want something that won't be matched in other layers, for efficiency - that
113 may allow me to merge the accumulators, eventually;
114 but I also want to filter out the rejected tokenization alternatives already here -->
115 <xsl:accumulator-rule match="tei:seg[tei:fs[@type eq 'morph' and tei:f[@name eq 'disamb']]]" phase="end">
116
Piotr Banskifdc858a2022-05-25 02:40:32 +0200117 <xsl:variable name="preceding_index" as="xs:integer">
118 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
119 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200120 </xsl:variable>
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200121 <xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(.,true()))"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200122
Piotr Banski6a4a2522022-05-24 01:16:47 +0200123 <xsl:sequence select="
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200124 $value,
125 map {
126 string(@xml:id): ($our_base,$our_base + string-length(tei:fs/tei:f[@name eq 'orth']/tei:string))
127 }"/>
128
Piotr Banski6a4a2522022-05-24 01:16:47 +0200129 </xsl:accumulator-rule>
Piotr Banskid2b78b82022-06-03 17:05:59 +0200130
Piotr Banskifdc858a2022-05-25 02:40:32 +0200131 <xsl:accumulator-rule match="tei:s" phase="end">
132 <xsl:variable name="preceding_index" as="xs:integer">
133 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
134 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200135
Piotr Banskifdc858a2022-05-25 02:40:32 +0200136 </xsl:variable>
137 <xsl:variable name="our_base" as="xs:integer">
138 <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
139 <xsl:sequence select="$incomplete[2]"/>
140 </xsl:variable>
Piotr Banski09096ee2022-05-25 13:41:03 +0200141
Piotr Banskifdc858a2022-05-25 02:40:32 +0200142 <xsl:sequence select="
143 $value,
144 map {
145 string(@xml:id): ($our_base,$preceding_index)
146 }"/>
147 </xsl:accumulator-rule>
Piotr Banskid2b78b82022-06-03 17:05:59 +0200148
Piotr Banskifdc858a2022-05-25 02:40:32 +0200149 <xsl:accumulator-rule match="tei:body/tei:p" phase="end">
150 <xsl:variable name="preceding_index" as="xs:integer">
151 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
152 <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
153
Piotr Banskifdc858a2022-05-25 02:40:32 +0200154 </xsl:variable>
155 <xsl:variable name="our_base" as="xs:integer">
156 <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
157 <xsl:sequence select="$incomplete[2]"/>
158 </xsl:variable>
Piotr Banski09096ee2022-05-25 13:41:03 +0200159
Piotr Banskifdc858a2022-05-25 02:40:32 +0200160 <xsl:sequence select="
161 $value,
162 map {
163 string(@xml:id): ($our_base,$preceding_index)
164 }"/>
165 </xsl:accumulator-rule>
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200166
Piotr Banskifdc858a2022-05-25 02:40:32 +0200167 <xsl:accumulator-rule match="tei:body" phase="end">
168 <xsl:variable name="preceding_index" as="xs:integer">
169 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
170 <xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
171 </xsl:variable>
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200172
Piotr Banskifdc858a2022-05-25 02:40:32 +0200173 <xsl:sequence select="
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200174 $value,
175 map {
176 string(@xml:id): (0, $preceding_index)
177 }"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200178 </xsl:accumulator-rule>
Piotr Banskid2b78b82022-06-03 17:05:59 +0200179
Piotr Banskifdc858a2022-05-25 02:40:32 +0200180 <xsl:accumulator-rule match="tei:text" phase="end">
181 <xsl:variable name="preceding_index" as="xs:integer">
182 <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
183 <xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
184 </xsl:variable>
185
186 <xsl:sequence select="
187 $value,
188 map {
189 string(@xml:id): (0, $preceding_index)
190 }"/>
191 </xsl:accumulator-rule>
Piotr Banskid2b78b82022-06-03 17:05:59 +0200192
Piotr Banski6a4a2522022-05-24 01:16:47 +0200193 </xsl:accumulator>
bansp5e2d1c02022-03-10 04:51:40 +0100194
banspe726b4a2022-03-28 05:47:45 +0200195 <!-- FUNCTIONS -->
196
bansp5f841732022-03-16 06:27:31 +0100197 <xsl:function name="f:compute_nesting" as="xs:integer">
banspd1bf1db2022-04-04 02:16:24 +0200198 <xsl:param name="node" as="element()"/>
bansp5f841732022-03-16 06:27:31 +0100199 <xsl:variable name="rel_depth"
200 select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
201 as="xs:integer"/>
bansp5f841732022-03-16 06:27:31 +0100202 <xsl:sequence select="$rel_depth"/>
203 </xsl:function>
204
Piotr Banski92791a22022-05-26 01:41:10 +0200205 <xsl:function name="f:is_preceded_by_ws" as="xs:boolean">
bansp9dc10002022-05-17 22:33:34 +0200206 <xsl:param name="node" as="element()"/>
Piotr Banskifdc858a2022-05-25 02:40:32 +0200207 <xsl:param name="suppress_initial" as="xs:boolean"/>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200208
Piotr Banski92791a22022-05-26 01:41:10 +0200209 <xsl:choose>
210 <xsl:when test="local-name($node) eq 'seg'">
211 <xsl:choose>
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200212 <xsl:when test="$node/tei:fs/tei:f[@name eq 'nps']">
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200213 <xsl:sequence select="fn:false()"/>
214 </xsl:when>
Piotr Banski92791a22022-05-26 01:41:10 +0200215 <xsl:when
216 test="$node/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0] and $node/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0] and not($node/preceding::tei:seg[count(@nkjp:rejected) eq 0])">
217 <xsl:sequence select="fn:false()"/>
218 <!-- the otherwise very costly check for preceding segs fires only if the first two are true, so it will have minimal search space -->
219 </xsl:when>
220 <xsl:when
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200221 test="$suppress_initial and $node/ancestor::tei:s/descendant::tei:seg[tei:fs/tei:f[@name eq 'disamb']][1]/@xml:id eq $node/@xml:id">
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200222 <xsl:sequence select="fn:false()"/>
223 </xsl:when>
Piotr Banski92791a22022-05-26 01:41:10 +0200224 <xsl:otherwise>
225 <xsl:sequence select="fn:true()"/>
226 </xsl:otherwise>
227 </xsl:choose>
228 </xsl:when>
229 <xsl:when test="local-name($node) eq 's'">
230 <xsl:choose>
231 <xsl:when test="exists($node/preceding-sibling::tei:s)">
232 <xsl:sequence select="fn:true()"/>
233 </xsl:when>
234 <xsl:otherwise>
235 <xsl:sequence
236 select="not($suppress_initial) and exists($node/ancestor::tei:p[1]/preceding-sibling::tei:p)"
237 />
238 </xsl:otherwise>
239 </xsl:choose>
240
241 </xsl:when>
242 <xsl:when test="local-name($node) eq 'p'">
243 <xsl:sequence select="exists($node/preceding-sibling::tei:p)"/>
244 </xsl:when>
245 <xsl:otherwise>
246 <xsl:message terminate="yes"
247 select="'Wrong argument passed to f:is_preceded_by_ws(): ' || local-name($node) || ' Only p, s, seg are allowed.'"
248 />
249 </xsl:otherwise>
250 </xsl:choose>
251 </xsl:function>
banspd1bf1db2022-04-04 02:16:24 +0200252
banspb5992532022-03-29 15:55:44 +0200253<!-- UTILITY TEMPLATES -->
254
bansp9103aab2022-03-19 05:10:21 +0100255 <xsl:template match="@default" mode="#all"/>
bansp97ba7ce2022-03-26 05:14:06 +0100256 <!-- this is to delete some auto-inserted attribute throughout -->
bansp9103aab2022-03-19 05:10:21 +0100257
banspb5992532022-03-29 15:55:44 +0200258 <!-- MAIN PROCESSING -->
259
bansp5e2d1c02022-03-10 04:51:40 +0100260 <xsl:template name="xsl:initial-template">
banspf2b24e62022-03-28 18:12:08 +0200261 <xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
banspd1bf1db2022-04-04 02:16:24 +0200262
banspe726b4a2022-03-28 05:47:45 +0200263 <!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
bansp8f6700b2022-03-27 05:27:09 +0200264 because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
265 <xsl:call-template name="create_corpus_header">
Piotr Banskifaa910f2022-06-03 00:46:29 +0200266 <xsl:with-param name="text.doc" select="$collection_of_text[1]" as="document-node()"/>
bansp8f6700b2022-03-27 05:27:09 +0200267 <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
268 </xsl:call-template>
269
banspe726b4a2022-03-28 05:47:45 +0200270 <xsl:for-each select="$collection_of_text">
271 <xsl:variable name="my_dir" as="xs:string" select="replace(base-uri(),'/text\.xml','')"/>
272 <xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
273 <xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
Piotr Banski081c5de2022-06-03 01:57:13 +0200274 <!--<xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>-->
Piotr Banskic5950ce2022-05-27 15:07:08 +0200275 <xsl:variable name="ann_named.uri" select="$my_dir || '/ann_named.xml'" as="xs:string"/>
276 <xsl:variable name="ann_groups.uri" select="$my_dir || '/ann_groups.xml'" as="xs:string"/>
277 <xsl:variable name="ann_words.uri" select="$my_dir || '/ann_words.xml'" as="xs:string"/>
banspe726b4a2022-03-28 05:47:45 +0200278
banspf2b24e62022-03-28 18:12:08 +0200279 <xsl:choose>
280 <xsl:when test="$my_textID = $IDs_to_skip"/>
bansp9dc10002022-05-17 22:33:34 +0200281 <!-- this is a utility step, for when we want to ignore some texts for any reason (debugging, selective update) -->
banspf2b24e62022-03-28 18:12:08 +0200282 <xsl:otherwise>
banspd1bf1db2022-04-04 02:16:24 +0200283
bansp9dc10002022-05-17 22:33:34 +0200284 <xsl:call-template name="process_single_sample">
Piotr Banskif9590692022-06-02 15:39:48 +0200285 <xsl:with-param name="text.doc" as="document-node()" select="."/>
286 <xsl:with-param name="ann_morphosyntax.doc" as="document-node()"
banspf2b24e62022-03-28 18:12:08 +0200287 select="doc($ann_morphosyntax.uri)"/>
Piotr Banski081c5de2022-06-03 01:57:13 +0200288<!-- <xsl:with-param name="ann_segmentation.xml" as="document-node()"
289 select="doc($ann_segmentation.uri)"/>-->
banspf2b24e62022-03-28 18:12:08 +0200290 <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
Piotr Banskic5950ce2022-05-27 15:07:08 +0200291 <!-- the following parameters may happen to be null -->
Piotr Banskif9590692022-06-02 15:39:48 +0200292 <xsl:with-param name="ann_named.doc" as="document-node()*"
Piotr Banskic5950ce2022-05-27 15:07:08 +0200293 select="if(fn:doc-available($ann_named.uri)) then doc($ann_named.uri) else ()"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200294 <xsl:with-param name="ann_groups.doc" as="document-node()*"
Piotr Banskic5950ce2022-05-27 15:07:08 +0200295 select="if(fn:doc-available($ann_groups.uri)) then doc($ann_groups.uri) else ()"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200296 <xsl:with-param name="ann_words.doc" as="document-node()*"
Piotr Banskic5950ce2022-05-27 15:07:08 +0200297 select="if(fn:doc-available($ann_words.uri)) then doc($ann_words.uri) else ()"/>
298
bansp9dc10002022-05-17 22:33:34 +0200299 </xsl:call-template>
banspf2b24e62022-03-28 18:12:08 +0200300 </xsl:otherwise>
301 </xsl:choose>
banspe726b4a2022-03-28 05:47:45 +0200302 </xsl:for-each>
bansp8f6700b2022-03-27 05:27:09 +0200303 </xsl:template>
304
305 <xsl:template name="process_single_sample">
Piotr Banskif9590692022-06-02 15:39:48 +0200306 <xsl:param name="text.doc" as="document-node()"/>
307 <xsl:param name="ann_morphosyntax.doc" as="document-node()"/>
banspd1bf1db2022-04-04 02:16:24 +0200308 <xsl:param name="my_textID" as="xs:string" select="'0-BAD_textID'"/>
bansp9dc10002022-05-17 22:33:34 +0200309 <!-- empty textID should never happen, but if it does, it will be signalled at the top of the output -->
Piotr Banskif9590692022-06-02 15:39:48 +0200310 <xsl:param name="ann_named.doc" as="document-node()*"/>
311 <xsl:param name="ann_groups.doc" as="document-node()*"/>
312 <xsl:param name="ann_words.doc" as="document-node()*"/>
banspe726b4a2022-03-28 05:47:45 +0200313
314 <xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
315
316 <xsl:variable name="compoundID" as="xs:string"
317 select="$corpusID || '_' || $docID || '.' || $my_textID"/>
318 <!-- this is what occurs in the text and data layers as @docid -->
319
bansp5e2d1c02022-03-10 04:51:40 +0100320 <xsl:call-template name="create_data">
Piotr Banski081c5de2022-06-03 01:57:13 +0200321 <xsl:with-param name="ann_morphosyntax.doc" select="$ann_morphosyntax.doc" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200322 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
323 <xsl:with-param name="target" select="$targetBaseDir || '/data.xml'" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100324 </xsl:call-template>
325
Piotr Banski43b9db02022-06-03 02:38:42 +0200326 <xsl:call-template name="create_struct">
banspe726b4a2022-03-28 05:47:45 +0200327 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
Piotr Banski43b9db02022-06-03 02:38:42 +0200328 <xsl:with-param name="ann_morphosyntax.doc" select="$ann_morphosyntax.doc"
bansp5f841732022-03-16 06:27:31 +0100329 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200330 <xsl:with-param name="target" select="$targetBaseDir || '/struct/structure.xml'" as="xs:string"
bansp5f841732022-03-16 06:27:31 +0100331 />
Piotr Banski43b9db02022-06-03 02:38:42 +0200332 </xsl:call-template>
Piotr Banski92791a22022-05-26 01:41:10 +0200333
Piotr Banskiba6cc632022-06-03 14:07:33 +0200334 <xsl:call-template name="create_morpho">
banspe726b4a2022-03-28 05:47:45 +0200335 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200336 <xsl:with-param name="ann_morphosyntax.doc" select="$ann_morphosyntax.doc"
bansp5f841732022-03-16 06:27:31 +0100337 as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200338 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/morpho.xml'" as="xs:string"/>
Piotr Banskiba6cc632022-06-03 14:07:33 +0200339 </xsl:call-template>
Piotr Banskia51907c2022-05-25 15:09:41 +0200340
Piotr Banskid2b78b82022-06-03 17:05:59 +0200341 <xsl:call-template name="create_text_header">
Piotr Banskif9590692022-06-02 15:39:48 +0200342 <xsl:with-param name="text.doc" select="$text.doc" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200343 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
344 <xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200345 </xsl:call-template>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200346
Piotr Banskif9590692022-06-02 15:39:48 +0200347 <xsl:if test="$ann_named.doc">
Piotr Banski763b41f2022-06-02 01:13:23 +0200348 <xsl:variable name="rev_lookup-seq" as="map(*)+">
349 <xsl:for-each
Piotr Banskif9590692022-06-02 15:39:48 +0200350 select="$ann_named.doc//tei:seg/tei:ptr">
Piotr Banski763b41f2022-06-02 01:13:23 +0200351 <xsl:variable name="trg" as="xs:string" select="fn:substring-before(@target, '#')"/>
Piotr Banskid2b78b82022-06-03 17:05:59 +0200352 <!-- caution: as of 01-June-2022, some of the pointers are malformed (missing '#' when referencing locally).
353 so we need to act around it but also sustainably - in case that error gets corrected -->
Piotr Banski763b41f2022-06-02 01:13:23 +0200354
355 <xsl:if test="fn:string-length($trg) and $trg eq 'ann_morphosyntax.xml'">
356 <xsl:sequence>
357 <xsl:map-entry key="fn:substring-after(fn:string(@target), '#')" select="fn:current()"
358 />
359 </xsl:sequence>
360 </xsl:if>
361 </xsl:for-each>
362 </xsl:variable>
363 <xsl:variable name="rev_lookup" as="map(*)" select="map:merge($rev_lookup-seq,map{'duplicates':'combine'})"/>
364
Piotr Banskic5950ce2022-05-27 15:07:08 +0200365 <xsl:call-template name="create_named">
366 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200367 <xsl:with-param name="ann_morphosyntax.doc" select="$ann_morphosyntax.doc"
Piotr Banskic5950ce2022-05-27 15:07:08 +0200368 as="document-node()"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200369 <xsl:with-param name="ann_named.doc" select="$ann_named.doc"
Piotr Banskic5950ce2022-05-27 15:07:08 +0200370 as="document-node()"/>
371 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/named.xml'" as="xs:string"/>
Piotr Banski763b41f2022-06-02 01:13:23 +0200372 <xsl:with-param name="rev_lookup" select="$rev_lookup" as="map(*)"/>
Piotr Banskic5950ce2022-05-27 15:07:08 +0200373 </xsl:call-template>
374 </xsl:if>
Piotr Banskid2b78b82022-06-03 17:05:59 +0200375
Piotr Banskif9590692022-06-02 15:39:48 +0200376 <xsl:if test="$ann_words.doc and $ann_groups.doc">
Piotr Banskic5950ce2022-05-27 15:07:08 +0200377 <xsl:call-template name="create_groups">
378 <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200379 <xsl:with-param name="ann_morphosyntax.doc" select="$ann_morphosyntax.doc"
Piotr Banskic5950ce2022-05-27 15:07:08 +0200380 as="document-node()"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200381 <xsl:with-param name="ann_words.doc" select="$ann_words.doc"
Piotr Banskic5950ce2022-05-27 15:07:08 +0200382 as="document-node()"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200383 <xsl:with-param name="ann_groups.doc" select="$ann_groups.doc"
Piotr Banskic5950ce2022-05-27 15:07:08 +0200384 as="document-node()"/>
385 <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/groups.xml'" as="xs:string"/>
386 </xsl:call-template>
387 </xsl:if>
388
bansp5e2d1c02022-03-10 04:51:40 +0100389 </xsl:template>
390
391 <!-- ************************** data.xml ******************* -->
392
393 <xsl:template name="create_data">
Piotr Banski081c5de2022-06-03 01:57:13 +0200394 <xsl:param name="ann_morphosyntax.doc" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200395 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100396 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100397 <!-- create the data.xml file -->
398 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
bansp5f841732022-03-16 06:27:31 +0100399 xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100400
Akron9a8ee3e2022-01-31 13:51:49 +0100401 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +0100402 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
bansp5f841732022-03-16 06:27:31 +0100403 <xsl:attribute name="docid" select="$compoundID"/>
bansp5e2d1c02022-03-10 04:51:40 +0100404 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
405 <xsl:attribute name="file" select="'metadata.xml'"/>
406 </xsl:element>
407
408 <xsl:element name="text" namespace="{$KorAP_namespace}">
bansp9dc10002022-05-17 22:33:34 +0200409 <xsl:variable name="content" as="xs:string+">
Piotr Banski081c5de2022-06-03 01:57:13 +0200410 <xsl:for-each select="$ann_morphosyntax.doc/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s/tei:seg[tei:fs/tei:f[@name eq 'disamb']]">
bansp9dc10002022-05-17 22:33:34 +0200411 <xsl:sequence select="
Piotr Banskifdc858a2022-05-25 02:40:32 +0200412 if (f:is_preceded_by_ws(.,false())) then
bansp9dc10002022-05-17 22:33:34 +0200413 ' '
414 else
Piotr Banski081c5de2022-06-03 01:57:13 +0200415 '', ./tei:fs/tei:f[@name eq 'orth']/tei:string"/>
bansp9dc10002022-05-17 22:33:34 +0200416 </xsl:for-each>
417 </xsl:variable>
418 <xsl:value-of select="string-join($content)"/>
bansp5e2d1c02022-03-10 04:51:40 +0100419 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +0100420 </xsl:element>
banspf79443e2022-02-25 14:25:33 +0100421 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +0100422 </xsl:template>
423
bansp5f841732022-03-16 06:27:31 +0100424 <!-- ************************** struct ******************* -->
425
426 <xsl:template name="create_struct">
banspe726b4a2022-03-28 05:47:45 +0200427 <xsl:param name="compoundID" as="xs:string"/>
Piotr Banski43b9db02022-06-03 02:38:42 +0200428 <xsl:param name="ann_morphosyntax.doc" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100429 <xsl:param name="target" as="xs:string"/>
Piotr Banski4f4c2d22022-05-19 01:44:32 +0200430
bansp5f841732022-03-16 06:27:31 +0100431 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
432 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
433 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
434 <xsl:element name="layer" namespace="{$KorAP_namespace}">
435 <xsl:attribute name="docid" select="$compoundID"/>
436 <xsl:attribute name="version" select="$KorAP-XML_version"/>
437
438 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
Piotr Banski43b9db02022-06-03 02:38:42 +0200439 <xsl:apply-templates select="$ann_morphosyntax.doc/tei:teiCorpus/tei:TEI/tei:text" mode="struct"/>
bansp5f841732022-03-16 06:27:31 +0100440 </xsl:element>
441 </xsl:element>
442 </xsl:result-document>
443 </xsl:template>
444
Piotr Banski43b9db02022-06-03 02:38:42 +0200445 <xsl:template match="tei:text | tei:body | tei:p | tei:s | tei:seg[tei:fs/tei:f[@name eq 'disamb']]" mode="struct">
Piotr Banski09096ee2022-05-25 13:41:03 +0200446 <xsl:variable name="offsets" as="xs:integer+">
Piotr Banski43b9db02022-06-03 02:38:42 +0200447 <xsl:sequence select="map:get(fn:accumulator-after('morpho-offsets')[last()], string(@xml:id))"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200448 </xsl:variable>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200449
bansp5f841732022-03-16 06:27:31 +0100450 <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
Piotr Banskie1ac5202022-05-30 21:25:21 +0200451 <xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100452
bansp5f841732022-03-16 06:27:31 +0100453 <xsl:element name="span" namespace="{$KorAP_namespace}">
454 <xsl:attribute name="id" select="'s' || $my_index"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200455 <xsl:attribute name="from" select="$offsets[1]"/>
456 <xsl:attribute name="to" select="$offsets[2]"/>
bansp5f841732022-03-16 06:27:31 +0100457 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200458 <xsl:if test="local-name() eq 'seg' and $SHOW_ORTH_IN_STRUCT">
Piotr Banskid2b78b82022-06-03 17:05:59 +0200459 <xsl:comment select="
460 (if (tei:fs/tei:f[@name eq 'nps']) then
461 ' '
462 else
463 ' _') || tei:fs/tei:f[@name eq 'orth']/tei:string"/>
Piotr Banski09096ee2022-05-25 13:41:03 +0200464 </xsl:if>
bansp5f841732022-03-16 06:27:31 +0100465 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
Piotr Banskifdc858a2022-05-25 02:40:32 +0200466 <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX for morpho -->
bansp5f841732022-03-16 06:27:31 +0100467 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100468 <xsl:attribute name="name" select="'name'"/>
469 <xsl:value-of select="local-name()"/>
bansp5f841732022-03-16 06:27:31 +0100470 </xsl:element>
471 <xsl:if test="count(@*)">
472 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
473 <xsl:attribute name="name" select="'attr'"/>
474 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
475 <xsl:attribute name="type" select="'attr'"/>
476 <xsl:for-each select="@*">
477 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
478 <xsl:attribute name="name" select="local-name(.)"/>
479 <xsl:value-of select="."/>
480 </xsl:element>
481 </xsl:for-each>
482 </xsl:element>
483 </xsl:element>
484 </xsl:if>
485 </xsl:element>
486 </xsl:element>
Piotr Banskia51907c2022-05-25 15:09:41 +0200487 <xsl:apply-templates mode="struct"/>
bansp5f841732022-03-16 06:27:31 +0100488 </xsl:template>
489
490 <!-- ************************** morpho ******************* -->
491
492 <xsl:template name="create_morpho">
banspe726b4a2022-03-28 05:47:45 +0200493 <xsl:param name="compoundID" as="xs:string"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200494 <xsl:param name="ann_morphosyntax.doc" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100495 <xsl:param name="target" as="xs:string"/>
496
497 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
498 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
499 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp3e5b20c2022-03-18 20:22:31 +0100500 <xsl:element name="layer" namespace="{$KorAP_namespace}">
501 <xsl:attribute name="docid" select="$compoundID"/>
502 <xsl:attribute name="version" select="$KorAP-XML_version"/>
503
504 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
Piotr Banskid2b78b82022-06-03 17:05:59 +0200505 <xsl:apply-templates select="$ann_morphosyntax.doc/tei:teiCorpus/tei:TEI/tei:text/tei:body/tei:p/tei:s/tei:seg" mode="morpho"/>
bansp3e5b20c2022-03-18 20:22:31 +0100506 </xsl:element>
507 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100508 </xsl:result-document>
509 </xsl:template>
510
Piotr Banskiba6cc632022-06-03 14:07:33 +0200511 <xsl:template match="tei:seg[tei:fs/tei:f[@name eq 'disamb']]" mode="morpho">
512
Piotr Banskia51907c2022-05-25 15:09:41 +0200513 <!-- it's so spread out because I wanted to make sure to be able to look up the individual
514 constituent values, should anything go wrong; it might get compacted at some point, but
515 the increase in efficiency will probably be minimal, compared to the decrease of readability -->
516 <xsl:variable name="offsets" as="xs:integer+">
Piotr Banskiba6cc632022-06-03 14:07:33 +0200517 <xsl:sequence select="map:get(fn:accumulator-after('morpho-offsets')[last()], string(@xml:id))"/>
Piotr Banskia51907c2022-05-25 15:09:41 +0200518 </xsl:variable>
Piotr Banskiba6cc632022-06-03 14:07:33 +0200519
Piotr Banskiba6cc632022-06-03 14:07:33 +0200520 <xsl:variable name="my_disamb" select="tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
bansp3e5b20c2022-03-18 20:22:31 +0100521 <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
Piotr Banskiba6cc632022-06-03 14:07:33 +0200522 <xsl:variable name="my_choice-lex" select="tei:fs/tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
bansp3e5b20c2022-03-18 20:22:31 +0100523 <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
Piotr Banskie1ac5202022-05-30 21:25:21 +0200524 <xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
bansp3e5b20c2022-03-18 20:22:31 +0100525
bansp3e5b20c2022-03-18 20:22:31 +0100526 <xsl:element name="span" namespace="{$KorAP_namespace}">
Piotr Banskia51907c2022-05-25 15:09:41 +0200527 <xsl:attribute name="id" select="'m' || $my_index"/>
528 <xsl:attribute name="from" select="$offsets[1]"/>
529 <xsl:attribute name="to" select="$offsets[2]"/>
bansp3e5b20c2022-03-18 20:22:31 +0100530 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
531 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
532 <xsl:attribute name="type" select="'lex'"/>
533 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
534 <xsl:attribute name="name" select="'lex'"/>
Piotr Banskid2b78b82022-06-03 17:05:59 +0200535 <xsl:if test="$SHOW_REDUNDANT_ORTH">
536 <xsl:comment select="
Piotr Banskiba6cc632022-06-03 14:07:33 +0200537 (if (tei:fs/tei:f[@name eq 'nps']) then
538 ' '
539 else
540 ' _') || tei:fs/tei:f[@name eq 'orth']/tei:string"/>
Piotr Banskid2b78b82022-06-03 17:05:59 +0200541 </xsl:if>
bansp3e5b20c2022-03-18 20:22:31 +0100542 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100543 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
Piotr Banskia44cd7a2022-06-03 19:19:50 +0200544 <xsl:attribute name="name" select="'orig'"/>
545 <xsl:value-of select="tei:fs/tei:f[@name eq 'orth']/tei:string"/>
546 </xsl:element>
547 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
bansp3e5b20c2022-03-18 20:22:31 +0100548 <xsl:attribute name="name" select="'lemma'"/>
549 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
550 </xsl:element>
551 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
552 <xsl:attribute name="name" select="'pos'"/>
553 <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
554 </xsl:element>
555 <xsl:if test="string-length($chosen-msd)">
556 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
557 <xsl:attribute name="name" select="'msd'"/>
558 <xsl:value-of select="$chosen-msd"/>
559 </xsl:element>
560 </xsl:if>
Piotr Banskiba6cc632022-06-03 14:07:33 +0200561 <xsl:if test="tei:fs/tei:f[@name eq 'nps']">
bansp3e5b20c2022-03-18 20:22:31 +0100562 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
563 <xsl:attribute name="name" select="'join'"/>
564 <xsl:value-of select="'left'"/>
565 </xsl:element>
566 </xsl:if>
567 </xsl:element>
568 </xsl:element>
Piotr Banskia44cd7a2022-06-03 19:19:50 +0200569 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
570 <xsl:attribute name="name" select="'interps'"/>
571 <xsl:for-each select="tei:fs/tei:f[@name eq 'interps']/tei:fs">
572 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
573 <xsl:attribute name="type" select="'alt'"/>
574 <xsl:if test="tei:f[@name eq 'msd']//tei:symbol/@xml:id = $my_choice-id">
575 <xsl:attribute name="n" select="'choice'"/>
576 </xsl:if>
577 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
578 <xsl:attribute name="name" select="'lemma'"/>
579 <xsl:value-of select="tei:f[@name eq 'base']/tei:string"/>
580 </xsl:element>
581 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
582 <xsl:attribute name="name" select="'pos'"/>
583 <xsl:value-of select="tei:f[@name eq 'ctag']/tei:symbol/@value"/>
584 </xsl:element>
585 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
586 <xsl:attribute name="name" select="'msd'"/>
587 <xsl:apply-templates select="tei:f[@name eq 'msd']/*" mode="inside-interps">
588 <xsl:with-param name="choice" select="$my_choice-id" as="xs:string" tunnel="yes"/>
589 </xsl:apply-templates>
590 </xsl:element>
591 </xsl:element>
592 </xsl:for-each>
593 </xsl:element>
bansp3e5b20c2022-03-18 20:22:31 +0100594 </xsl:element>
595 </xsl:element>
bansp3e5b20c2022-03-18 20:22:31 +0100596 </xsl:template>
banspe726b4a2022-03-28 05:47:45 +0200597
Piotr Banskia44cd7a2022-06-03 19:19:50 +0200598 <xsl:template match="tei:vAlt" mode="inside-interps">
599 <xsl:copy select="." copy-namespaces="no">
600 <xsl:apply-templates mode="inside-interps" select="*"/>
601 </xsl:copy>
602 </xsl:template>
603
604 <xsl:template match="tei:symbol" mode="inside-interps">
605 <xsl:param name="choice" as="xs:string" tunnel="yes"/>
606 <xsl:copy select="." copy-namespaces="no">
607 <xsl:attribute name="value" select="
608 if (@value eq '') then
609 0
610 else
611 @value"/>
612<!-- the check above is to appease the validator, which won't allow for empty values of @value -->
613 <xsl:if test="@xml:id eq $choice">
614 <xsl:attribute name="n" select="'choice'"/>
615 </xsl:if>
616 </xsl:copy>
617 </xsl:template>
618
Piotr Banskic5950ce2022-05-27 15:07:08 +0200619 <!-- ************************** named entities ******************* -->
620
621 <xsl:template name="create_named">
622 <xsl:param name="compoundID" as="xs:string"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200623 <xsl:param name="ann_morphosyntax.doc" as="document-node()"/>
624 <xsl:param name="ann_named.doc" as="document-node()"/>
Piotr Banskic5950ce2022-05-27 15:07:08 +0200625 <xsl:param name="target" as="xs:string"/>
Piotr Banski763b41f2022-06-02 01:13:23 +0200626 <xsl:param name="rev_lookup" as="map(*)"/>
627
Piotr Banskic5950ce2022-05-27 15:07:08 +0200628 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
629 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
630 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
631 <xsl:element name="layer" namespace="{$KorAP_namespace}">
632 <xsl:attribute name="docid" select="$compoundID"/>
633 <xsl:attribute name="version" select="$KorAP-XML_version"/>
634
635 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
Piotr Banskif9590692022-06-02 15:39:48 +0200636 <xsl:apply-templates select="$ann_morphosyntax.doc//tei:text" mode="named">
637 <xsl:with-param name="ann_named.doc" select="$ann_named.doc" as="document-node()" tunnel="yes"/>
Piotr Banski763b41f2022-06-02 01:13:23 +0200638 <xsl:with-param name="rev_lookup" select="$rev_lookup" as="map(*)" tunnel="yes"/>
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200639 </xsl:apply-templates>
Piotr Banskic5950ce2022-05-27 15:07:08 +0200640 </xsl:element>
641 </xsl:element>
642 </xsl:result-document>
643 </xsl:template>
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200644
645 <xsl:template match="tei:seg" mode="named"/>
646
647 <xsl:template match="tei:seg[tei:fs[tei:f[@name eq 'disamb']]]" mode="named">
Piotr Banskif9590692022-06-02 15:39:48 +0200648 <xsl:param name="ann_named.doc" as="document-node()" tunnel="yes"/>
Piotr Banski763b41f2022-06-02 01:13:23 +0200649 <xsl:param name="rev_lookup" as="map(*)" tunnel="yes"/>
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200650
651 <xsl:variable name="offsets" as="xs:integer+">
652 <xsl:sequence select="map:get(fn:accumulator-after('morpho-offsets')[last()], string(@xml:id))"/>
653 </xsl:variable>
654
Piotr Banski763b41f2022-06-02 01:13:23 +0200655 <xsl:variable name="ptr" select="map:get($rev_lookup,fn:string(@xml:id))" as="element(tei:ptr)*"/>
656 <!-- it's an element, because we need to see where it stands in a sequence... -->
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200657
Piotr Banski763b41f2022-06-02 01:13:23 +0200658 <xsl:if test="$ptr">
659 <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
660 <xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
661
662 <xsl:element name="span" namespace="{$KorAP_namespace}">
663 <xsl:attribute name="id" select="'n' || $my_index"/>
664 <xsl:attribute name="from" select="$offsets[1]"/>
665 <xsl:attribute name="to" select="$offsets[2]"/>
666 <xsl:attribute name="l" select="f:compute_nesting(.)"/>
667 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
668 <xsl:attribute name="type" select="'ne'"/>
669 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
670 <xsl:attribute name="name" select="'ne'"/>
Piotr Banskid2b78b82022-06-03 17:05:59 +0200671 <xsl:if test="$SHOW_REDUNDANT_ORTH">
672 <xsl:comment select="
673 (if (tei:fs/tei:f[@name eq 'nps']) then
674 ' '
675 else
676 ' _') || tei:fs/tei:f[@name eq 'orth']/tei:string"/>
677 </xsl:if>
Piotr Banski763b41f2022-06-02 01:13:23 +0200678 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
679 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
680 <xsl:attribute name="name" select="'complex-ent'"/>
681 <xsl:for-each select="$ptr">
682 <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
683 <xsl:attribute name="type" select="'complex-ent'"/>
684 <xsl:for-each select="parent::tei:seg/tei:fs[1]/tei:f">
685 <xsl:if test="@name eq 'type' or @name eq 'subtype'">
686 <xsl:copy-of select="." copy-namespaces="no"/>
687 </xsl:if>
688 </xsl:for-each>
689 <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
690 <xsl:attribute name="name" select="'nkjp-named'"/>
691 <xsl:copy-of select="parent::tei:seg/tei:fs[1]" copy-namespaces="no"/>
692 </xsl:element>
693 </xsl:element>
694 </xsl:for-each>
695
696 </xsl:element>
697
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200698 </xsl:element>
699 </xsl:element>
700 </xsl:element>
701 </xsl:element>
Piotr Banski763b41f2022-06-02 01:13:23 +0200702 </xsl:if>
Piotr Banski65a6d0b2022-05-31 17:23:08 +0200703 </xsl:template>
704
Piotr Banskic5950ce2022-05-27 15:07:08 +0200705
706 <!-- ************************** syntactic chunks ******************* -->
707
708 <xsl:template name="create_groups">
709 <xsl:param name="compoundID" as="xs:string"/>
Piotr Banskif9590692022-06-02 15:39:48 +0200710 <xsl:param name="ann_morphosyntax.doc" as="document-node()"/>
711 <xsl:param name="ann_words.doc" as="document-node()"/>
712 <xsl:param name="ann_groups.doc" as="document-node()"/>
Piotr Banskic5950ce2022-05-27 15:07:08 +0200713 <xsl:param name="target" as="xs:string"/>
714
715 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
716 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
717 <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
718 <xsl:element name="layer" namespace="{$KorAP_namespace}">
719 <xsl:attribute name="docid" select="$compoundID"/>
720 <xsl:attribute name="version" select="$KorAP-XML_version"/>
721
722 <xsl:element name="spanList" namespace="{$KorAP_namespace}">
Piotr Banskid2b78b82022-06-03 17:05:59 +0200723 <!--<xsl:apply-templates select="$ann_morphosyntax.doc//tei:text" mode="groups">
Piotr Banskif9590692022-06-02 15:39:48 +0200724 <xsl:with-param name="ann_words.doc" select="$ann_words.doc" as="document-node()" tunnel="yes"/>
725 <xsl:with-param name="ann_groups.doc" select="$ann_groups.doc" as="document-node()" tunnel="yes"/>
Piotr Banskic5950ce2022-05-27 15:07:08 +0200726 </xsl:apply-templates>-->
727 </xsl:element>
728 </xsl:element>
729 </xsl:result-document>
730 </xsl:template>
731
bansp5f841732022-03-16 06:27:31 +0100732 <!-- ************************** TEXT header ******************* -->
733
734 <xsl:template name="create_text_header">
Piotr Banskif9590692022-06-02 15:39:48 +0200735 <xsl:param name="text.doc" as="document-node()"/>
banspe726b4a2022-03-28 05:47:45 +0200736 <xsl:param name="compoundID" as="xs:string"/>
bansp5f841732022-03-16 06:27:31 +0100737 <xsl:param name="target" as="xs:string"/>
738
739 <!-- create the local header.xml file -->
740 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
741 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
742
743 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200744 <xsl:apply-templates select="$text.doc//tei:TEI/tei:teiHeader/tei:*" mode="text-header">
banspe726b4a2022-03-28 05:47:45 +0200745 <xsl:with-param name="compoundID" as="xs:string" select="$compoundID" tunnel="yes"/>
746 </xsl:apply-templates>
bansp5f841732022-03-16 06:27:31 +0100747 </idsHeader>
748 </xsl:result-document>
749 </xsl:template>
750
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200751 <xsl:template match="tei:fileDesc" mode="text-header">
bansp9103aab2022-03-19 05:10:21 +0100752 <xsl:element name="{local-name()}">
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200753 <xsl:apply-templates mode="text-header"/>
bansp9103aab2022-03-19 05:10:21 +0100754 </xsl:element>
bansp5f841732022-03-16 06:27:31 +0100755 </xsl:template>
756
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200757 <xsl:template match="tei:title" mode="text-header">
bansp5f841732022-03-16 06:27:31 +0100758 <t.title>
759 <xsl:apply-templates/>
760 </t.title>
761 </xsl:template>
762
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200763 <xsl:template match="tei:titleStmt" mode="text-header">
banspe726b4a2022-03-28 05:47:45 +0200764 <xsl:param name="compoundID" as="xs:string" tunnel="yes"/>
bansp5f841732022-03-16 06:27:31 +0100765 <titleStmt>
766 <textSigle>
banspe726b4a2022-03-28 05:47:45 +0200767 <xsl:value-of select="$compoundID"/>
bansp5f841732022-03-16 06:27:31 +0100768 </textSigle>
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200769 <xsl:apply-templates mode="text-header"/>
bansp5f841732022-03-16 06:27:31 +0100770 </titleStmt>
771 </xsl:template>
772
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200773 <xsl:template match="tei:publicationStmt" mode="text-header">
bansp9103aab2022-03-19 05:10:21 +0100774 <xsl:element name="{local-name()}">
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200775 <xsl:apply-templates mode="text-header"/>
bansp9103aab2022-03-19 05:10:21 +0100776 </xsl:element>
777 </xsl:template>
778
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200779 <xsl:template match="tei:availability" mode="text-header">
bansp9103aab2022-03-19 05:10:21 +0100780 <xsl:element name="{local-name()}">
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200781 <xsl:apply-templates mode="text-header" select="@* | *"/>
bansp9103aab2022-03-19 05:10:21 +0100782 </xsl:element>
783 </xsl:template>
784
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200785 <xsl:template match="tei:profileDesc" mode="text-header">
bansp9103aab2022-03-19 05:10:21 +0100786 <xsl:element name="{local-name()}">
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200787 <xsl:apply-templates mode="text-header"/>
bansp9103aab2022-03-19 05:10:21 +0100788 </xsl:element>
789 </xsl:template>
bansp5f841732022-03-16 06:27:31 +0100790
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200791 <xsl:template match="tei:textClass" mode="text-header">
bansp9103aab2022-03-19 05:10:21 +0100792 <xsl:element name="{local-name()}">
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200793 <xsl:apply-templates mode="text-header" select="@* | *"/>
bansp9103aab2022-03-19 05:10:21 +0100794 </xsl:element>
795 </xsl:template>
796
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200797 <xsl:template match="tei:catRef" mode="text-header corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100798 <xsl:element name="{local-name()}">
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200799 <xsl:apply-templates mode="text-header" select="@* | *"/>
bansp9103aab2022-03-19 05:10:21 +0100800 </xsl:element>
801 </xsl:template>
802
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200803 <xsl:template match="@status | @scheme | @target | @type | @xml:id[ancestor::tei:classDecl] | @xml:lang" mode="text-header corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100804 <xsl:copy-of select="."/>
805 </xsl:template>
806
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200807 <xsl:template match="tei:p" mode="text-header corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100808 <xsl:element name="{local-name()}">
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200809 <xsl:apply-templates mode="text_inside_header"/>
bansp9103aab2022-03-19 05:10:21 +0100810 </xsl:element>
811 </xsl:template>
812
813
814 <!-- OPTIMIZATION has to take modes into account -->
bansp5e2d1c02022-03-10 04:51:40 +0100815 <!-- ************************** CORPUS header ******************* -->
816 <xsl:template name="create_corpus_header">
Piotr Banskifaa910f2022-06-03 00:46:29 +0200817 <xsl:param name="text.doc" as="document-node()"/>
bansp5f841732022-03-16 06:27:31 +0100818 <xsl:param name="target" as="xs:string"/>
bansp5e2d1c02022-03-10 04:51:40 +0100819
820 <!-- create the corpus-level header.xml file -->
bansp5f841732022-03-16 06:27:31 +0100821 <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
bansp5e2d1c02022-03-10 04:51:40 +0100822
823 <!--doctype-public="{$publicDoctypeI5}"
824 doctype-system="{$systemDoctypeI5}">
825 these are, sadly, useless
826 -->
827
828 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
Piotr Banskifaa910f2022-06-03 00:46:29 +0200829 <xsl:apply-templates select="$text.doc/tei:teiCorpus/tei:teiHeader/tei:*" mode="corpus-header"/>
bansp5e2d1c02022-03-10 04:51:40 +0100830 </idsHeader>
831 </xsl:result-document>
832 </xsl:template>
833
Piotr Banskifaa910f2022-06-03 00:46:29 +0200834 <xsl:template match="tei:fileDesc" mode="corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100835 <xsl:element name="{local-name()}">
Piotr Banskifaa910f2022-06-03 00:46:29 +0200836 <xsl:apply-templates mode="corpus-header"/>
bansp9103aab2022-03-19 05:10:21 +0100837 </xsl:element>
bansp5e2d1c02022-03-10 04:51:40 +0100838 </xsl:template>
bansp9103aab2022-03-19 05:10:21 +0100839
bansp5e2d1c02022-03-10 04:51:40 +0100840
Piotr Banskifaa910f2022-06-03 00:46:29 +0200841 <xsl:template match="tei:title" mode="corpus-header">
bansp5e2d1c02022-03-10 04:51:40 +0100842 <c.title>
Piotr Banskifaa910f2022-06-03 00:46:29 +0200843 <xsl:apply-templates mode="corpus-header" select="@*"/>
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200844 <xsl:apply-templates mode="text_inside_header"/>
bansp5e2d1c02022-03-10 04:51:40 +0100845 </c.title>
846 </xsl:template>
847
Piotr Banskifaa910f2022-06-03 00:46:29 +0200848 <xsl:template match="tei:titleStmt" mode="corpus-header">
bansp5e2d1c02022-03-10 04:51:40 +0100849 <titleStmt>
850 <korpusSigle>
851 <xsl:value-of select="$corpusID"/>
852 </korpusSigle>
Piotr Banskifaa910f2022-06-03 00:46:29 +0200853 <xsl:apply-templates mode="corpus-header"/>
bansp5e2d1c02022-03-10 04:51:40 +0100854 </titleStmt>
855 </xsl:template>
856
Piotr Banskifaa910f2022-06-03 00:46:29 +0200857 <xsl:template match="tei:publicationStmt" mode="corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100858 <xsl:element name="{local-name()}">
Piotr Banskifaa910f2022-06-03 00:46:29 +0200859 <xsl:apply-templates mode="corpus-header"/>
bansp9103aab2022-03-19 05:10:21 +0100860 </xsl:element>
861 </xsl:template>
862
Piotr Banskifaa910f2022-06-03 00:46:29 +0200863 <xsl:template match="tei:availability" mode="corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100864 <xsl:element name="{local-name()}">
Piotr Banskifaa910f2022-06-03 00:46:29 +0200865 <xsl:apply-templates mode="corpus-header" select="@* | *"/>
bansp9103aab2022-03-19 05:10:21 +0100866 </xsl:element>
867 </xsl:template>
868
Piotr Banskifaa910f2022-06-03 00:46:29 +0200869 <xsl:template match="tei:encodingDesc" mode="corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100870 <xsl:element name="{local-name()}">
Piotr Banskifaa910f2022-06-03 00:46:29 +0200871 <xsl:apply-templates mode="corpus-header"/>
bansp9103aab2022-03-19 05:10:21 +0100872 </xsl:element>
873 </xsl:template>
874
Piotr Banskifaa910f2022-06-03 00:46:29 +0200875 <xsl:template match="tei:classDecl | tei:taxonomy | tei:category | tei:taxonomy/tei:bibl" mode="corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100876 <xsl:element name="{local-name()}">
Piotr Banskifaa910f2022-06-03 00:46:29 +0200877 <xsl:apply-templates mode="corpus-header" select="@* | *"/>
bansp9103aab2022-03-19 05:10:21 +0100878 </xsl:element>
879 </xsl:template>
880
Piotr Banskifaa910f2022-06-03 00:46:29 +0200881 <xsl:template match="tei:bibl/tei:title | tei:edition | tei:desc" mode="corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100882 <xsl:element name="{local-name()}">
Piotr Banskifaa910f2022-06-03 00:46:29 +0200883 <xsl:apply-templates mode="corpus-header" select="@*"/>
Piotr Banskia0a9fc02022-06-03 01:20:18 +0200884 <xsl:apply-templates mode="text_inside_header"/>
bansp9103aab2022-03-19 05:10:21 +0100885 </xsl:element>
886 </xsl:template>
887<!--
Piotr Banskifaa910f2022-06-03 00:46:29 +0200888 <xsl:template match="tei:textClass" mode="corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100889 <xsl:element name="{local-name()}">
890 <xsl:apply-templates mode="corpus" select="@* | *"/>
891 </xsl:element>
892 </xsl:template>
893
Piotr Banskifaa910f2022-06-03 00:46:29 +0200894 <xsl:template match="tei:catRef" mode="corpus-header">
bansp9103aab2022-03-19 05:10:21 +0100895 <xsl:element name="{local-name()}">
896 <xsl:apply-templates mode="corpus" select="@* | *"/>
897 </xsl:element>
898 </xsl:template>
899-->
bansp5e2d1c02022-03-10 04:51:40 +0100900
901
902
903 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
904 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
905 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
906
907 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
908 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
909
910 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
911 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
912 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
913 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
914
915 -->
916 <!--<xsl:template name="test_full">
917 <xsl:param name="corpusID"/>
918 <xsl:param name="docID"/>
919 <xsl:param name="textID"/>
920 <xsl:call-template name="xsl:initial-template"/>
921 </xsl:template>-->
922
Akron9a8ee3e2022-01-31 13:51:49 +0100923</xsl:stylesheet>
Piotr Banski6a4a2522022-05-24 01:16:47 +0200924
Piotr Banskifdc858a2022-05-25 02:40:32 +0200925<!-- template for serializing maps in messages <xsl:message select="('map:',serialize($map, map{'method':'adaptive'}))"/> -->