first touch: make sure that I can grab at the data and send it where I want it to go
Change-Id: Ide29bf5f041df7b2838aecd3ea445abf7095c9f9
diff --git a/nkjp2korap.xsl b/nkjp2korap.xsl
index 5b19e00..b8c156e 100644
--- a/nkjp2korap.xsl
+++ b/nkjp2korap.xsl
@@ -1,47 +1,173 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:xs="http://www.w3.org/2001/XMLSchema"
- xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
- xmlns:tei="http://www.tei-c.org/ns/1.0"
- xmlns:f="func" xmlns:map="http://www.w3.org/2005/xpath-functions/map"
- exclude-result-prefixes="xs f map nkjp tei" version="3.0" expand-text="yes">
+ xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
+ xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
+ xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f map nkjp tei"
+ version="3.0" expand-text="yes">
- <xsl:param name="corpusID"/>
- <xsl:param name="docID"/>
- <xsl:param name="textID"/>
- <xsl:param name="targetDir" select="'test/resources/nkjp2korap_sample1/KOT/'"/>
+ <xsl:param name="corpusID" as="xs:string"/>
+ <xsl:param name="docID" as="xs:string"/>
+ <xsl:param name="textID" as="xs:string"/>
+
+ <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample1/KOT/'" as="xs:string"/>
<!-- the select is a fallback, so that it's clear to the reviewer what I'm passing in the param -->
-
- <xsl:template name="xsl:initial-template">
-
- <xsl:variable name="text.xml" as="document-node()" select="doc($targetDir || 'text.xml')"/>
- <!-- Template to create the data.xml file -->
- <xsl:result-document encoding="UTF-8" method="xml" indent="yes" xpath-default-namespace="http://ids-mannheim.de/ns/KorAP"
- href="{'test/output/' || $corpusID || '/' || $docID || '/' || $textID || '/data.xml'}">
+ <xsl:variable name="targetTextDir"
+ select="'test/output/' || $corpusID || '/' || $docID || '/' || $textID || '/'" as="xs:string"/>
+ <xsl:variable name="targetCorpusDir" select="'test/output/' || $corpusID || '/'" as="xs:string"/>
+
+ <xsl:variable name="systemDoctypeI5"
+ select="'http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd'" as="xs:string"
+ static="true"/>
+
+ <xsl:variable name="publicDoctypeI5" select="'-//IDS//DTD IDS-XCES 1.0//EN'" as="xs:string"
+ static="true"/>
+
+ <xsl:variable name="KorAP_namespace" select="'http://ids-mannheim.de/ns/KorAP'" static="true"
+ as="xs:string"/>
+
+
+ <xsl:mode name="corpus" on-no-match="deep-skip"/>
+ <xsl:mode name="text" on-no-match="deep-skip"/>
+
+
+ <xsl:template name="xsl:initial-template">
+ <xsl:variable name="text.xml" as="document-node()" select="doc($sourceDir || 'text.xml')"/>
+
+ <xsl:call-template name="create_data">
+ <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
+ </xsl:call-template>
+
+ <xsl:call-template name="create_text_header">
+ <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
+ </xsl:call-template>
+
+ <xsl:call-template name="create_corpus_header">
+ <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
+ </xsl:call-template>
+ </xsl:template>
+
+ <!-- ************************** data.xml ******************* -->
+
+ <xsl:template name="create_data">
+ <xsl:param name="text.xml" as="document-node()"/>
+ <!-- create the data.xml file -->
+ <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
+ xpath-default-namespace="{$KorAP_namespace}" href="{$targetTextDir || 'data.xml'}">
+
<xsl:processing-instruction name="xml-model">href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"</xsl:processing-instruction>
- <xsl:element name="raw_text">
+ <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
<xsl:attribute name="docid" select="$corpusID || '_' || $docID || '.' || $textID"/>
- <metadata file="metadata.xml"/>
- <text>
+ <xsl:element name="metadata" namespace="{$KorAP_namespace}">
+ <xsl:attribute name="file" select="'metadata.xml'"/>
+ </xsl:element>
+
+ <xsl:element name="text" namespace="{$KorAP_namespace}">
<xsl:value-of select="$text.xml//*[local-name() = 'ab']"/>
- </text>
+ </xsl:element>
</xsl:element>
</xsl:result-document>
-
- <xsl:result-document encoding="UTF-8" method="xml" indent="yes" xpath-default-namespace="http://ids-mannheim.de/ns/KorAP"
- href="{'test/output/' || $corpusID || '/' || $docID || '/' || $textID || '/header.xml'}"
- doctype-public="-//IDS//DTD IDS-XCES 1.0//EN" doctype-system="http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
-
- <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
- <xsl:copy-of select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc"/>
- </idsHeader>
-
- <xsl:copy>
-
- </xsl:copy>
- </xsl:result-document>
-
</xsl:template>
+ <!-- ************************** CORPUS header ******************* -->
+ <xsl:template name="create_corpus_header">
+ <xsl:param name="text.xml" as="document-node()"/>
+
+ <!-- create the corpus-level header.xml file -->
+ <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
+ href="{$targetCorpusDir || 'header.xml'}">
+
+ <!--doctype-public="{$publicDoctypeI5}"
+ doctype-system="{$systemDoctypeI5}">
+ these are, sadly, useless
+ -->
+
+ <idsHeader type="corpus" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+ <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc" mode="corpus"/>
+ </idsHeader>
+ </xsl:result-document>
+ </xsl:template>
+
+ <xsl:template match="tei:fileDesc" mode="corpus">
+ <fileDesc>
+ <xsl:apply-templates mode="corpus"/>
+ </fileDesc>
+ </xsl:template>
+
+ <xsl:template match="tei:title" mode="corpus">
+ <c.title>
+ <xsl:apply-templates/>
+ </c.title>
+ </xsl:template>
+
+ <xsl:template match="tei:titleStmt" mode="corpus">
+ <titleStmt>
+ <korpusSigle>
+ <xsl:value-of select="$corpusID"/>
+ </korpusSigle>
+ <xsl:apply-templates mode="corpus"/>
+ </titleStmt>
+ </xsl:template>
+
+
+ <!-- ************************** TEXT header ******************* -->
+
+ <xsl:template name="create_text_header">
+ <xsl:param name="text.xml" as="document-node()"/>
+
+ <!-- create the local header.xml file -->
+ <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
+ xpath-default-namespace="http://ids-mannheim.de/ns/KorAP"
+ href="{$targetTextDir || 'header.xml'}">
+
+ <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+ <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc" mode="text"/>
+ </idsHeader>
+ </xsl:result-document>
+ </xsl:template>
+
+ <xsl:template match="tei:fileDesc" mode="text">
+ <fileDesc>
+ <xsl:apply-templates mode="text"/>
+ </fileDesc>
+ </xsl:template>
+
+ <xsl:template match="tei:title" mode="text">
+ <t.title>
+ <xsl:apply-templates/>
+ </t.title>
+ </xsl:template>
+
+ <xsl:template match="tei:titleStmt" mode="text">
+ <titleStmt>
+ <korpusSigle>
+ <xsl:value-of select="$corpusID || '/' || $textID"/>
+ </korpusSigle>
+ <xsl:apply-templates mode="text"/>
+ </titleStmt>
+ </xsl:template>
+
+
+
+
+ <!-- this template can be called by the XSPEC test; TODO: find a way to call the main() template directly -->
+ <!-- I have not fully handled the param transmission, which would have to be kludged in just for the sake of XSPec,
+ because I'm disabling this for now, due to XSpec design issues; relevant links, a.o.:
+
+ https://stackoverflow.com/questions/64933277/what-is-the-cause-of-error-cannot-execute-xslresult-document-while-evaluating
+ https://www.balisage.net/Proceedings/vol25/html/Galtman01/BalisageVol25-Galtman01.html
+
+ In short: the internal design of XSpec forces kludges when one wants to use xsl:result-document in their stylesheets. But I don't
+ want to be strangled by kludges at the beginning of work, I've already lost quite a bit of time on this investigation,
+ I will therefore "just code" and then can think of externalizing bits of templates if we want to play with tests. For now,
+ I don't want to have to handle context items is a special way inside variables, etc., because I'm not sure it's worth it.
+
+ -->
+ <!--<xsl:template name="test_full">
+ <xsl:param name="corpusID"/>
+ <xsl:param name="docID"/>
+ <xsl:param name="textID"/>
+ <xsl:call-template name="xsl:initial-template"/>
+ </xsl:template>-->
+
</xsl:stylesheet>
diff --git a/test/output/NKJP/NKJP/KOT/data.xml b/test/output/NKJP/NKJP/KOT/data.xml
index a0a41cd..5727c58 100644
--- a/test/output/NKJP/NKJP/KOT/data.xml
+++ b/test/output/NKJP/NKJP/KOT/data.xml
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
-<raw_text docid="NKJP_NKJP.KOT">
+<raw_text xmlns="http://ids-mannheim.de/ns/KorAP" docid="NKJP_NKJP.KOT">
<metadata file="metadata.xml"/>
<text>Nie zdążyłam jeszcze doprowadzić mieszkania do ładu po rządach Mini, która zrobiła w nim maksi bałagan, kiedy na horyzoncie za sprawą pani Joasi pojawił się czarny jak smoła Dratewka. Dratewkę pani Joasia wyrwała z bestialskich łap pewnego szewca, którego niech piekło pochłonie.</text>
</raw_text>
diff --git a/test/output/NKJP/NKJP/KOT/header.xml b/test/output/NKJP/NKJP/KOT/header.xml
index f40fa68..e80c8ed 100644
--- a/test/output/NKJP/NKJP/KOT/header.xml
+++ b/test/output/NKJP/NKJP/KOT/header.xml
@@ -1,37 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE idsHeader
- PUBLIC "-//IDS//DTD IDS-XCES 1.0//EN" "http://corpora.ids-mannheim.de/idsxces1/DTD/ids.xcesdoc.dtd">
<idsHeader type="document"
pattern="text"
status="new"
version="1.1"
TEIform="teiHeader">
- <fileDesc xmlns="http://www.tei-c.org/ns/1.0"
- xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
- xmlns:xi="http://www.w3.org/2001/XInclude">
+ <fileDesc>
<titleStmt>
- <title>TEI P5 encoded version of sample(s) of "Kot"</title>
+ <korpusSigle>NKJP/KOT</korpusSigle>
+ <t.title>TEI P5 encoded version of sample(s) of "Kot"</t.title>
</titleStmt>
- <extent nkjp:file="text.xml">
- <num type="word" value="4892"/>
- </extent>
- <publicationStmt nkjp:subcorpus="one_million">
- <availability status="free" default="false">
- <p>See the licence agreement.</p>
- </availability>
- </publicationStmt>
- <sourceDesc default="false">
- <bibl xml:id="h_src-bibl" default="false">
- <title xml:lang="pl">Kot (agregat)</title>
- <idno type="nkjp">IJPPAN_4scal-KOT</idno>
- <note type="text_origin" anchored="true">IJP PAN</note>
- </bibl>
- <listBibl default="false">
- <bibl xml:id="h_76-bibl" default="false">
- <ptr target="#txt_76-div"/>
- <idno type="para">p55in262</idno>
- </bibl>
- </listBibl>
- </sourceDesc>
</fileDesc>
</idsHeader>
diff --git a/test/output/NKJP/header.xml b/test/output/NKJP/header.xml
index e69de29..45a94cf 100644
--- a/test/output/NKJP/header.xml
+++ b/test/output/NKJP/header.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<idsHeader type="corpus"
+ pattern="text"
+ status="new"
+ version="1.1"
+ TEIform="teiHeader">
+ <fileDesc>
+ <titleStmt>
+ <korpusSigle>NKJP</korpusSigle>
+ <c.title>TEI P5 encoded version of sample(s) of "Kot"</c.title>
+ </titleStmt>
+ </fileDesc>
+</idsHeader>