catalog fixed. script prepared for processing, morpho files have some new data now (from the new NKJP version)
Change-Id: Ieb010abfb3295d99f024cf971e48cd4b08d47657
diff --git a/nkjp2korap.xsl b/nkjp2korap.xsl
index e01bbd5..d8a4603 100644
--- a/nkjp2korap.xsl
+++ b/nkjp2korap.xsl
@@ -10,7 +10,7 @@
<!-- PARAMETERS -->
- <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample2'" as="xs:string"/>
+ <xsl:param name="sourceDir" select="'test/resources/nkjp2korap_sample3'" as="xs:string"/>
<!-- the directory containing NKJP files, in the form of a collection of text-level dirs
(that is how we know both the $corpusID and the $docID) -->
@@ -19,7 +19,7 @@
<xsl:param name="skip_docID" as="xs:string">
<xsl:value-of select="''"/>
- <!--<xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>-->
+ <!--<xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KOT,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>-->
</xsl:param>
<!-- comma-separated list of document IDs to be skipped from processing
example: HellerPodgladanie,KOT
@@ -346,34 +346,33 @@
<xsl:if test="$ann_named.doc">
<xsl:variable name="rev_lookup-seq" as="map(*)+">
- <xsl:for-each
- select="$ann_named.doc//tei:seg/tei:ptr">
- <xsl:variable name="trg" as="xs:string" select="fn:substring-before(@target, '#')"/>
- <!-- caution: as of 01-June-2022, some of the pointers are malformed (missing '#' when referencing locally).
+ <xsl:for-each select="$ann_named.doc//tei:seg/tei:ptr">
+ <xsl:variable name="trg" as="xs:string" select="fn:substring-before(@target, '#')"/>
+ <!-- caution: as of 01-June-2022, some of the pointers are malformed (missing '#' when referencing locally).
so we need to act around it but also sustainably - in case that error gets corrected -->
- <xsl:if test="fn:string-length($trg) and $trg eq 'ann_morphosyntax.xml'">
+ <xsl:if test="fn:string-length($trg) and $trg eq 'ann_morphosyntax.xml'">
<xsl:sequence>
<xsl:map-entry key="fn:substring-after(fn:string(@target), '#')" select="fn:current()"
/>
</xsl:sequence>
- </xsl:if>
- </xsl:for-each>
+ </xsl:if>
+ </xsl:for-each>
</xsl:variable>
- <xsl:variable name="rev_lookup" as="map(*)" select="map:merge($rev_lookup-seq,map{'duplicates':'combine'})"/>
-
+ <xsl:variable name="rev_lookup" as="map(*)"
+ select="map:merge($rev_lookup-seq, map {'duplicates': 'combine'})"/>
+
<xsl:call-template name="create_named">
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
<xsl:with-param name="ann_morphosyntax.doc" select="$ann_morphosyntax.doc"
as="document-node()"/>
- <xsl:with-param name="ann_named.doc" select="$ann_named.doc"
- as="document-node()"/>
+ <xsl:with-param name="ann_named.doc" select="$ann_named.doc" as="document-node()"/>
<xsl:with-param name="target" select="$targetBaseDir || '/nkjp/named.xml'" as="xs:string"/>
<xsl:with-param name="rev_lookup" select="$rev_lookup" as="map(*)"/>
</xsl:call-template>
</xsl:if>
- <xsl:if test="$ann_words.doc and $ann_groups.doc">
+<!-- <xsl:if test="$ann_words.doc and $ann_groups.doc">
<xsl:call-template name="create_groups">
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
<xsl:with-param name="ann_morphosyntax.doc" select="$ann_morphosyntax.doc"
@@ -384,7 +383,7 @@
as="document-node()"/>
<xsl:with-param name="target" select="$targetBaseDir || '/nkjp/groups.xml'" as="xs:string"/>
</xsl:call-template>
- </xsl:if>
+ </xsl:if>-->
</xsl:template>