fix structure.xml, create morpho.xml
Change-Id: Idd0c44671b6a975626b0a9d40a1999a02e7a1148
diff --git a/nkjp2korap.xsl b/nkjp2korap.xsl
index 46b4bb6..ff1ee50 100644
--- a/nkjp2korap.xsl
+++ b/nkjp2korap.xsl
@@ -101,6 +101,8 @@
<xsl:call-template name="create_morpho">
<xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
+ <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
+ as="document-node()"/>
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
as="document-node()"/>
<xsl:with-param name="target" select="$targetTextDir || 'nkjp/morpho.xml'" as="xs:string"/>
@@ -167,7 +169,7 @@
<xsl:param name="index" as="xs:integer" required="no" select="1"/>
<!-- I have made a major mess here, but it works... it's so spread out
because I wanted to make sure to be able to look up the individual
- constituent values, should anything go wrong -->
+ constituent values, should anything go wrong; optimization will come when it's worked against a larger dataset -->
<xsl:variable name="my_name" select="local-name()" as="xs:string"/>
<xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
<xsl:variable name="preceding-count" select="count($preceding)"/>
@@ -221,14 +223,17 @@
</xsl:variable>
<xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
</xsl:variable>
+
<xsl:element name="span" namespace="{$KorAP_namespace}">
<xsl:attribute name="id" select="'s' || $my_index"/>
<xsl:attribute name="from" select="$start"/>
<xsl:attribute name="to" select="$end"/>
<xsl:attribute name="l" select="f:compute_nesting(.)"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="type" select="'struct'"></xsl:attribute> <!-- STRUCT vs. LEX -->
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
- <xsl:attribute name="name" select="local-name()"/>
+ <xsl:attribute name="name" select="'name'"/>
+ <xsl:value-of select="local-name()"/>
</xsl:element>
<xsl:if test="count(@*)">
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
@@ -257,17 +262,183 @@
<xsl:template name="create_morpho">
<xsl:param name="text.xml" as="document-node()"/>
+ <xsl:param name="ann_segmentation.xml" as="document-node()"/>
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<xsl:processing-instruction name="xml-model">href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"</xsl:processing-instruction>
-
+ <xsl:element name="layer" namespace="{$KorAP_namespace}">
+ <xsl:attribute name="docid" select="$compoundID"/>
+ <xsl:attribute name="version" select="$KorAP-XML_version"/>
+
+ <xsl:element name="spanList" namespace="{$KorAP_namespace}">
+ <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="morpho">
+ <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
+ </xsl:apply-templates>
+ </xsl:element>
+ </xsl:element>
</xsl:result-document>
</xsl:template>
+ <xsl:template match="tei:*" mode="morpho">
+ <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
+ <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
+ <xsl:param name="index" as="xs:integer" required="no" select="1"/>
+ <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
+ <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
+ <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
+ <xsl:variable name="preceding-count" select="count($preceding)"/>
+ <xsl:variable name="outside-preceding-count" as="xs:integer">
+ <xsl:choose>
+ <xsl:when test="self::tei:s or self::tei:p">
+ <xsl:choose>
+ <xsl:when test="$preceding-count">
+ <xsl:sequence select="
+ sum(for $p in $preceding
+ return
+ count($p/descendant::*))"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:sequence select="0"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:sequence select="0"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:variable>
+ <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
+ as="xs:integer"/>
+
+ <xsl:variable name="start" as="xs:integer">
+ <xsl:choose>
+ <xsl:when test="self::tei:text or self::tei:body">
+ <xsl:sequence select="0"/>
+ </xsl:when>
+ <xsl:when test="self::tei:p">
+ <xsl:variable name="first_corresp"
+ select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
+ as="attribute(corresp)"/>
+ <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
+ <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
+ </xsl:when>
+ <xsl:when test="self::tei:s">
+ <xsl:variable name="first_corresp"
+ select="descendant::tei:seg[1]/attribute::corresp"
+ as="attribute(corresp)"/>
+ <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
+ <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
+ </xsl:when>
+ <!--<xsl:when test="self::tei:seg">
+ <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
+ <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
+ </xsl:when>-->
+ </xsl:choose>
+ </xsl:variable>
+ <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
+ </xsl:variable>
+
+ <xsl:apply-templates mode="morpho">
+ <xsl:with-param name="ini" select="$start" as="xs:integer"/>
+ <xsl:with-param name="fin" select="$end" as="xs:integer"/>
+ <xsl:with-param name="index" select="$my_index"/>
+ <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
+ </xsl:apply-templates>
+ </xsl:template>
+ <xsl:template match="tei:seg" mode="morpho">
+ <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
+ <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
+ <xsl:param name="index" as="xs:integer" required="no" select="1"/>
+ <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
+ <!-- I have made a major mess here, but it works... it's so spread out
+ because I wanted to make sure to be able to look up the individual
+ constituent values, should anything go wrong -->
+ <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
+ <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
+ <xsl:variable name="my_morph-seg" as="node()" select="$ann_morphosyntax.xml//tei:seg[substring-after(@corresp,'#') eq $my_id]"/>
+ <xsl:variable name="my_disamb" select="$my_morph-seg//tei:fs/tei:f[@name eq 'disamb']" as="node()"/>
+ <xsl:variable name="my_choice-id" select="substring-after($my_disamb//tei:f[@name eq 'choice']/@fVal,'#')" as="xs:string"/>
+ <xsl:variable name="my_choice-lex" select="$my_morph-seg//tei:f[@name eq 'interps']/tei:fs[@type eq 'lex'][descendant::tei:symbol[@xml:id eq $my_choice-id]]" as="node()"/>
+ <xsl:variable name="chosen-msd" as="xs:string" select="$my_choice-lex/descendant::tei:symbol[@xml:id eq $my_choice-id]/@value"/>
+ <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
+ <xsl:variable name="preceding-count" select="count($preceding)"/>
+ <xsl:variable name="outside-preceding-count" as="xs:integer">
+ <xsl:choose>
+ <xsl:when test="self::tei:s or self::tei:p">
+ <xsl:choose>
+ <xsl:when test="$preceding-count">
+ <xsl:sequence select="
+ sum(for $p in $preceding
+ return
+ count($p/descendant::*))"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:sequence select="0"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:sequence select="0"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:variable>
+ <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
+ as="xs:integer"/>
+
+ <xsl:variable name="start" as="xs:integer">
+ <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
+ <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
+ </xsl:variable>
+ <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
+ </xsl:variable>
+ <xsl:element name="span" namespace="{$KorAP_namespace}">
+ <xsl:attribute name="id" select="'s' || $my_index"/>
+ <xsl:attribute name="from" select="$start"/>
+ <xsl:attribute name="to" select="$end"/>
+ <xsl:attribute name="l" select="f:compute_nesting(.)"/>
+ <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="type" select="'lex'"/>
+ <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="name" select="'lex'"/>
+ <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
+
+
+ <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="name" select="'lemma'"/>
+ <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
+ </xsl:element>
+ <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="name" select="'pos'"/>
+ <xsl:value-of select="$my_choice-lex/tei:f[@name eq 'ctag']/tei:symbol/@value"/>
+ </xsl:element>
+ <xsl:if test="string-length($chosen-msd)">
+ <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="name" select="'msd'"/>
+ <xsl:value-of select="$chosen-msd"/>
+ </xsl:element>
+ </xsl:if>
+ <xsl:if test="$my_morph-seg//tei:fs/tei:f[@name eq 'nps']">
+ <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="name" select="'join'"/>
+ <xsl:value-of select="'left'"/>
+ </xsl:element>
+ </xsl:if>
+ </xsl:element>
+ </xsl:element>
+ </xsl:element>
+ </xsl:element>
+ <xsl:apply-templates mode="morpho">
+ <xsl:with-param name="ini" select="$start" as="xs:integer"/>
+ <xsl:with-param name="fin" select="$end" as="xs:integer"/>
+ <xsl:with-param name="index" select="$my_index"/>
+ <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()"/>
+ </xsl:apply-templates>
+ </xsl:template>
<!-- ************************** TEXT header ******************* -->
<xsl:template name="create_text_header">