initial modification that I need to commit
Change-Id: I0844f0f4561f21c82c08f9e2f20db10945f4b177
diff --git a/nkjp2korap.xsl b/nkjp2korap.xsl
index 5a486c0..c70a01d 100644
--- a/nkjp2korap.xsl
+++ b/nkjp2korap.xsl
@@ -9,12 +9,15 @@
<xsl:param name="docID" as="xs:string"/>
<xsl:param name="textID" as="xs:string"/>
- <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample1/KOT/'" as="xs:string"/>
- <!-- the select is a fallback, so that it's clear to the reviewer what I'm passing in the param -->
+ <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
+ <!-- the directory containing NKJP files, in the form of a collection of text-level dirs -->
- <xsl:variable name="targetTextDir"
- select="'test/output/' || $corpusID || '/' || $docID || '/' || $textID || '/'" as="xs:string"/>
- <xsl:variable name="targetCorpusDir" select="'test/output/' || $corpusID || '/'" as="xs:string"/>
+ <xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
+
+ <xsl:variable name="targetTextDir_slashed"
+ select="$targetDir || '/' || $corpusID || '/' || $docID || '/' || $textID || '/'" as="xs:string"/>
+
+ <xsl:variable name="targetCorpusDir_slashed" select="$targetDir || '/' || $corpusID || '/'" as="xs:string"/>
<xsl:variable name="systemDoctypeI5"
select="'http://corpora.ids-mannheim.de/I5/DTD/i5.dtd'" as="xs:string"
@@ -83,23 +86,42 @@
<xsl:template match="@default" mode="#all"/>
<!-- this is to delete some auto-inserted attribute throughout -->
+ <xsl:variable name="collection_params" as="xs:string" static="yes"
+ select="'recurse=yes;validation=strip;select=text.xml;content-type=application/xml;on-error=warning;xinclude=yes'"
+ />
+
+<xsl:variable name="collection_of_text" select="collection($sourceDir || '?' || $collection_params)" as="document-node()+"/>
+
<xsl:template name="xsl:initial-template">
- <xsl:variable name="text.xml" as="document-node()" select="doc($sourceDir || 'text.xml')"/>
+ <xsl:variable name="text.xml" select="$collection_of_text[1]"/>
+
+ <!-- we only want to call this once, and we process a random NKJP corpus file for that purpose,
+ because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
+ <xsl:call-template name="create_corpus_header">
+ <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
+ <xsl:with-param name="target" select="$targetCorpusDir_slashed || 'header.xml'" as="xs:string"/>
+ </xsl:call-template>
+
+
+ </xsl:template>
+
+ <xsl:template name="process_single_sample">
+ <xsl:variable name="text.xml" as="document-node()" select="doc($sourceDir || '/text.xml')"/>
<xsl:variable name="ann_morphosyntax.xml" as="document-node()"
- select="doc($sourceDir || 'ann_morphosyntax.xml')"/>
+ select="doc($sourceDir || '/ann_morphosyntax.xml')"/>
<xsl:variable name="ann_segmentation.xml" as="document-node()"
- select="doc($sourceDir || 'ann_segmentation.xml')"/>
+ select="doc($sourceDir || '/ann_segmentation.xml')"/>
<xsl:call-template name="create_data">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
- <xsl:with-param name="target" select="$targetTextDir || 'data.xml'" as="xs:string"/>
+ <xsl:with-param name="target" select="$targetTextDir_slashed || 'data.xml'" as="xs:string"/>
</xsl:call-template>
<xsl:call-template name="create_struct">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
as="document-node()"/>
- <xsl:with-param name="target" select="$targetTextDir || 'struct/structure.xml'" as="xs:string"
+ <xsl:with-param name="target" select="$targetTextDir_slashed || 'struct/structure.xml'" as="xs:string"
/>
</xsl:call-template>
@@ -109,18 +131,14 @@
as="document-node()"/>
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
as="document-node()"/>
- <xsl:with-param name="target" select="$targetTextDir || 'nkjp/morpho.xml'" as="xs:string"/>
+ <xsl:with-param name="target" select="$targetTextDir_slashed || 'nkjp/morpho.xml'" as="xs:string"/>
</xsl:call-template>
<xsl:call-template name="create_text_header">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
- <xsl:with-param name="target" select="$targetTextDir || 'header.xml'" as="xs:string"/>
+ <xsl:with-param name="target" select="$targetTextDir_slashed || 'header.xml'" as="xs:string"/>
</xsl:call-template>
- <xsl:call-template name="create_corpus_header">
- <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
- <xsl:with-param name="target" select="$targetCorpusDir || 'header.xml'" as="xs:string"/>
- </xsl:call-template>
</xsl:template>
<!-- ************************** data.xml ******************* -->
diff --git a/test/output/NKJP/header.xml b/test/output/NKJP/header.xml
index da2df68..ca11562 100644
--- a/test/output/NKJP/header.xml
+++ b/test/output/NKJP/header.xml
@@ -11,7 +11,7 @@
<c.title xml:lang="en">National Corpus of Polish -- the 1 million word subcorpus</c.title>
</titleStmt>
<publicationStmt>
- <availability status="unknown">
+ <availability>
<p>This 1 million word subcorpus of the National Corpus of Polish is available publicly for free.</p>
</availability>
</publicationStmt>