blob: e186ef1ff23f5df5bfe51fd018778fae45766fd4 [file] [log] [blame]
Akron9a8ee3e2022-01-31 13:51:49 +01001<?xml version="1.0" encoding="UTF-8"?>
2<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
bansp5e2d1c02022-03-10 04:51:40 +01003 xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
4 xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
5 xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
6 version="3.0" expand-text="yes">
Akron9a8ee3e2022-01-31 13:51:49 +01007
bansp5e2d1c02022-03-10 04:51:40 +01008 <xsl:param name="corpusID" as="xs:string"/>
9 <xsl:param name="docID" as="xs:string"/>
10 <xsl:param name="textID" as="xs:string"/>
11
12 <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample1/KOT/'" as="xs:string"/>
banspf79443e2022-02-25 14:25:33 +010013 <!-- the select is a fallback, so that it's clear to the reviewer what I'm passing in the param -->
Akron9a8ee3e2022-01-31 13:51:49 +010014
bansp5e2d1c02022-03-10 04:51:40 +010015 <xsl:variable name="targetTextDir"
16 select="'test/output/' || $corpusID || '/' || $docID || '/' || $textID || '/'" as="xs:string"/>
17 <xsl:variable name="targetCorpusDir" select="'test/output/' || $corpusID || '/'" as="xs:string"/>
18
19 <xsl:variable name="systemDoctypeI5"
20 select="'http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd'" as="xs:string"
21 static="true"/>
22
23 <xsl:variable name="publicDoctypeI5" select="'-//IDS//DTD IDS-XCES 1.0//EN'" as="xs:string"
24 static="true"/>
25
26 <xsl:variable name="KorAP_namespace" select="'http://ids-mannheim.de/ns/KorAP'" static="true"
27 as="xs:string"/>
28
29
30 <xsl:mode name="corpus" on-no-match="deep-skip"/>
31 <xsl:mode name="text" on-no-match="deep-skip"/>
32
33
34 <xsl:template name="xsl:initial-template">
35 <xsl:variable name="text.xml" as="document-node()" select="doc($sourceDir || 'text.xml')"/>
36
37 <xsl:call-template name="create_data">
38 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
39 </xsl:call-template>
40
41 <xsl:call-template name="create_text_header">
42 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
43 </xsl:call-template>
44
45 <xsl:call-template name="create_corpus_header">
46 <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
47 </xsl:call-template>
48 </xsl:template>
49
50 <!-- ************************** data.xml ******************* -->
51
52 <xsl:template name="create_data">
53 <xsl:param name="text.xml" as="document-node()"/>
54 <!-- create the data.xml file -->
55 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
56 xpath-default-namespace="{$KorAP_namespace}" href="{$targetTextDir || 'data.xml'}">
57
Akron9a8ee3e2022-01-31 13:51:49 +010058 <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
bansp5e2d1c02022-03-10 04:51:40 +010059 <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
banspf79443e2022-02-25 14:25:33 +010060 <xsl:attribute name="docid" select="$corpusID || '_' || $docID || '.' || $textID"/>
bansp5e2d1c02022-03-10 04:51:40 +010061 <xsl:element name="metadata" namespace="{$KorAP_namespace}">
62 <xsl:attribute name="file" select="'metadata.xml'"/>
63 </xsl:element>
64
65 <xsl:element name="text" namespace="{$KorAP_namespace}">
banspf79443e2022-02-25 14:25:33 +010066 <xsl:value-of select="$text.xml//*[local-name() = 'ab']"/>
bansp5e2d1c02022-03-10 04:51:40 +010067 </xsl:element>
Akron9a8ee3e2022-01-31 13:51:49 +010068 </xsl:element>
banspf79443e2022-02-25 14:25:33 +010069 </xsl:result-document>
Akron9a8ee3e2022-01-31 13:51:49 +010070 </xsl:template>
71
bansp5e2d1c02022-03-10 04:51:40 +010072 <!-- ************************** CORPUS header ******************* -->
73 <xsl:template name="create_corpus_header">
74 <xsl:param name="text.xml" as="document-node()"/>
75
76 <!-- create the corpus-level header.xml file -->
77 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
78 href="{$targetCorpusDir || 'header.xml'}">
79
80 <!--doctype-public="{$publicDoctypeI5}"
81 doctype-system="{$systemDoctypeI5}">
82 these are, sadly, useless
83 -->
84
85 <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
86 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc" mode="corpus"/>
87 </idsHeader>
88 </xsl:result-document>
89 </xsl:template>
90
91 <xsl:template match="tei:fileDesc" mode="corpus">
92 <fileDesc>
93 <xsl:apply-templates mode="corpus"/>
94 </fileDesc>
95 </xsl:template>
96
97 <xsl:template match="tei:title" mode="corpus">
98 <c.title>
99 <xsl:apply-templates/>
100 </c.title>
101 </xsl:template>
102
103 <xsl:template match="tei:titleStmt" mode="corpus">
104 <titleStmt>
105 <korpusSigle>
106 <xsl:value-of select="$corpusID"/>
107 </korpusSigle>
108 <xsl:apply-templates mode="corpus"/>
109 </titleStmt>
110 </xsl:template>
111
112
113 <!-- ************************** TEXT header ******************* -->
114
115 <xsl:template name="create_text_header">
116 <xsl:param name="text.xml" as="document-node()"/>
117
118 <!-- create the local header.xml file -->
119 <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
120 xpath-default-namespace="http://ids-mannheim.de/ns/KorAP"
121 href="{$targetTextDir || 'header.xml'}">
122
123 <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
124 <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc" mode="text"/>
125 </idsHeader>
126 </xsl:result-document>
127 </xsl:template>
128
129 <xsl:template match="tei:fileDesc" mode="text">
130 <fileDesc>
131 <xsl:apply-templates mode="text"/>
132 </fileDesc>
133 </xsl:template>
134
135 <xsl:template match="tei:title" mode="text">
136 <t.title>
137 <xsl:apply-templates/>
138 </t.title>
139 </xsl:template>
140
141 <xsl:template match="tei:titleStmt" mode="text">
142 <titleStmt>
bansp608b1022022-03-10 16:19:54 +0100143 <textSigle>
bansp5e2d1c02022-03-10 04:51:40 +0100144 <xsl:value-of select="$corpusID || '/' || $textID"/>
bansp608b1022022-03-10 16:19:54 +0100145 </textSigle>
bansp5e2d1c02022-03-10 04:51:40 +0100146 <xsl:apply-templates mode="text"/>
147 </titleStmt>
148 </xsl:template>
149
150
151
152
153 <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
154 <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
155 because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
156
157 https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
158 https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
159
160 In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
161 want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
162 I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
163 I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
164
165 -->
166 <!--<xsl:template name="test_full">
167 <xsl:param name="corpusID"/>
168 <xsl:param name="docID"/>
169 <xsl:param name="textID"/>
170 <xsl:call-template name="xsl:initial-template"/>
171 </xsl:template>-->
172
Akron9a8ee3e2022-01-31 13:51:49 +0100173</xsl:stylesheet>