add ability to skip some document IDs as a comma-separated parameter
Change-Id: Id186b63191adcc21a489b5aec0733d84cc4d9137
diff --git a/nkjp2korap.xsl b/nkjp2korap.xsl
index 4bfdb7a..4a377df 100644
--- a/nkjp2korap.xsl
+++ b/nkjp2korap.xsl
@@ -12,6 +12,13 @@
<!-- the directory containing NKJP files, in the form of a collection of text-level dirs -->
<xsl:param name="targetDir" select="'test/output'" as="xs:string"/>
+
+ <xsl:param name="skip_docID" as="xs:string">
+ <xsl:value-of select="''"/>
+ <!-- comma-separated list of document IDs to be skipped from processing
+ example: HellerPodgladanie,KOT
+ no functionality beyond string identity is supported -->
+ </xsl:param>
<!-- VARIABLES -->
@@ -106,6 +113,8 @@
-->
<xsl:template name="xsl:initial-template">
+ <xsl:variable name="IDs_to_skip" select="tokenize($skip_docID,',')" as="xs:string*"/>
+
<!-- we only want to call the template below once, and we process a random NKJP corpus file for that purpose,
because all we need is the main corpus header, and we can (should) get to that from any NKJP corpus document -->
<xsl:call-template name="create_corpus_header">
@@ -119,12 +128,20 @@
<xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
<xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
- <xsl:call-template name="process_single_sample">
- <xsl:with-param name="text.xml" as="document-node()" select="."/>
- <xsl:with-param name="ann_morphosyntax.xml" as="document-node()" select="doc($ann_morphosyntax.uri)"/>
- <xsl:with-param name="ann_segmentation.xml" as="document-node()" select="doc($ann_segmentation.uri)"/>
- <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
- </xsl:call-template>
+ <xsl:choose>
+ <xsl:when test="$my_textID = $IDs_to_skip"/>
+ <xsl:otherwise>
+ <xsl:message select="$my_textID"></xsl:message>
+ <!--<xsl:call-template name="process_single_sample">
+ <xsl:with-param name="text.xml" as="document-node()" select="."/>
+ <xsl:with-param name="ann_morphosyntax.xml" as="document-node()"
+ select="doc($ann_morphosyntax.uri)"/>
+ <xsl:with-param name="ann_segmentation.xml" as="document-node()"
+ select="doc($ann_segmentation.uri)"/>
+ <xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
+ </xsl:call-template>-->
+ </xsl:otherwise>
+ </xsl:choose>
</xsl:for-each>
</xsl:template>