added placeholders for handling more layers of annotation; tei:seg in 'morpho' mode needs some streamlining (the profiler suggests)
Change-Id: I5ab451b784f14407a07d10589893b2fd46ee5296
diff --git a/nkjp2korap.xsl b/nkjp2korap.xsl
index 1449770..8aef8b9 100644
--- a/nkjp2korap.xsl
+++ b/nkjp2korap.xsl
@@ -17,8 +17,8 @@
<!-- where the corpus/document/text/annotations hierarchy is going to be created -->
<xsl:param name="skip_docID" as="xs:string">
- <xsl:value-of select="''"/>
- <!--<xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>-->
+ <!--<xsl:value-of select="''"/>-->
+ <xsl:value-of select="'HellerPodgladanie,IsakowiczZaleskiMoje,KolakowskiOco,MysliwskiKamien,WilkWilczy,ZycieWarszawy_Zycie'"/>
</xsl:param>
<!-- comma-separated list of document IDs to be skipped from processing
example: HellerPodgladanie,KOT
@@ -268,6 +268,9 @@
<xsl:variable name="my_textID" as="xs:string" select="tokenize($my_dir,'/')[last()]"/>
<xsl:variable name="ann_morphosyntax.uri" select="$my_dir || '/ann_morphosyntax.xml'" as="xs:string"/>
<xsl:variable name="ann_segmentation.uri" select="$my_dir || '/ann_segmentation.xml'" as="xs:string"/>
+ <xsl:variable name="ann_named.uri" select="$my_dir || '/ann_named.xml'" as="xs:string"/>
+ <xsl:variable name="ann_groups.uri" select="$my_dir || '/ann_groups.xml'" as="xs:string"/>
+ <xsl:variable name="ann_words.uri" select="$my_dir || '/ann_words.xml'" as="xs:string"/>
<xsl:choose>
<xsl:when test="$my_textID = $IDs_to_skip"/>
@@ -281,6 +284,14 @@
<xsl:with-param name="ann_segmentation.xml" as="document-node()"
select="doc($ann_segmentation.uri)"/>
<xsl:with-param name="my_textID" select="$my_textID" as="xs:string"/>
+ <!-- the following parameters may happen to be null -->
+ <xsl:with-param name="ann_named.xml" as="document-node()*"
+ select="if(fn:doc-available($ann_named.uri)) then doc($ann_named.uri) else ()"/>
+ <xsl:with-param name="ann_groups.xml" as="document-node()*"
+ select="if(fn:doc-available($ann_groups.uri)) then doc($ann_groups.uri) else ()"/>
+ <xsl:with-param name="ann_words.xml" as="document-node()*"
+ select="if(fn:doc-available($ann_words.uri)) then doc($ann_words.uri) else ()"/>
+
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
@@ -293,6 +304,9 @@
<xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="my_textID" as="xs:string" select="'0-BAD_textID'"/>
<!-- empty textID should never happen, but if it does, it will be signalled at the top of the output -->
+ <xsl:param name="ann_named.xml" as="document-node()*"/>
+ <xsl:param name="ann_groups.xml" as="document-node()*"/>
+ <xsl:param name="ann_words.xml" as="document-node()*"/>
<xsl:variable name="targetBaseDir" as="xs:string" select="$targetCorpusDir_slashed || $docID || '/' || $my_textID"/>
@@ -329,6 +343,34 @@
<xsl:with-param name="target" select="$targetBaseDir || '/header.xml'" as="xs:string"/>
</xsl:call-template>
+ <xsl:if test="$ann_named.xml">
+ <xsl:call-template name="create_named">
+ <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
+ <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
+ as="document-node()"/>
+ <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
+ as="document-node()"/>
+ <xsl:with-param name="ann_named.xml" select="$ann_named.xml"
+ as="document-node()"/>
+ <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/named.xml'" as="xs:string"/>
+ </xsl:call-template>
+ </xsl:if>
+
+ <xsl:if test="$ann_words.xml and $ann_groups.xml">
+ <xsl:call-template name="create_groups">
+ <xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
+ <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
+ as="document-node()"/>
+ <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
+ as="document-node()"/>
+ <xsl:with-param name="ann_words.xml" select="$ann_words.xml"
+ as="document-node()"/>
+ <xsl:with-param name="ann_groups.xml" select="$ann_groups.xml"
+ as="document-node()"/>
+ <xsl:with-param name="target" select="$targetBaseDir || '/nkjp/groups.xml'" as="xs:string"/>
+ </xsl:call-template>
+ </xsl:if>
+
</xsl:template>
<!-- ************************** data.xml ******************* -->
@@ -511,6 +553,60 @@
</xsl:element>
</xsl:template>
+ <!-- ************************** named entities ******************* -->
+
+ <xsl:template name="create_named">
+ <xsl:param name="compoundID" as="xs:string"/>
+ <xsl:param name="ann_segmentation.xml" as="document-node()"/>
+ <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
+ <xsl:param name="ann_named.xml" as="document-node()"/>
+ <xsl:param name="target" as="xs:string"/>
+
+ <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
+ xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
+ <xsl:processing-instruction name="xml-model">href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"</xsl:processing-instruction>
+ <xsl:element name="layer" namespace="{$KorAP_namespace}">
+ <xsl:attribute name="docid" select="$compoundID"/>
+ <xsl:attribute name="version" select="$KorAP-XML_version"/>
+
+ <xsl:element name="spanList" namespace="{$KorAP_namespace}">
+ <!--<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="named">
+ <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()" tunnel="yes"/>
+ <xsl:with-param name="ann_named.xml" select="$ann_named.xml" as="document-node()" tunnel="yes"/>
+ </xsl:apply-templates>-->
+ </xsl:element>
+ </xsl:element>
+ </xsl:result-document>
+ </xsl:template>
+
+ <!-- ************************** syntactic chunks ******************* -->
+
+ <xsl:template name="create_groups">
+ <xsl:param name="compoundID" as="xs:string"/>
+ <xsl:param name="ann_segmentation.xml" as="document-node()"/>
+ <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
+ <xsl:param name="ann_words.xml" as="document-node()"/>
+ <xsl:param name="ann_groups.xml" as="document-node()"/>
+ <xsl:param name="target" as="xs:string"/>
+
+ <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
+ xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
+ <xsl:processing-instruction name="xml-model">href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"</xsl:processing-instruction>
+ <xsl:element name="layer" namespace="{$KorAP_namespace}">
+ <xsl:attribute name="docid" select="$compoundID"/>
+ <xsl:attribute name="version" select="$KorAP-XML_version"/>
+
+ <xsl:element name="spanList" namespace="{$KorAP_namespace}">
+ <!--<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="groups">
+ <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()" tunnel="yes"/>
+ <xsl:with-param name="ann_words.xml" select="$ann_words.xml" as="document-node()" tunnel="yes"/>
+ <xsl:with-param name="ann_groups.xml" select="$ann_groups.xml" as="document-node()" tunnel="yes"/>
+ </xsl:apply-templates>-->
+ </xsl:element>
+ </xsl:element>
+ </xsl:result-document>
+ </xsl:template>
+
<!-- ************************** TEXT header ******************* -->
<xsl:template name="create_text_header">