recording the state before the transfer to use ann_morphosyntax as the basis (due to the manual corrections present there)
Change-Id: I601291ba349853d12dc0d283c6480a29652b1798
diff --git a/nkjp2korap.xsl b/nkjp2korap.xsl
index e220a05..c3d7e7c 100644
--- a/nkjp2korap.xsl
+++ b/nkjp2korap.xsl
@@ -3,7 +3,8 @@
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"
xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:f="func"
xmlns:fn="http://www.w3.org/2005/xpath-functions"
- xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f fn map nkjp tei"
+ xmlns:xi="http://www.w3.org/2001/XInclude"
+ xmlns:map="http://www.w3.org/2005/xpath-functions/map" exclude-result-prefixes="xs f fn map nkjp tei xi"
version="3.0" expand-text="yes">
@@ -60,6 +61,7 @@
<xsl:mode name="corpus" on-no-match="deep-skip"/>
<xsl:mode name="text" on-no-match="deep-skip"/>
<xsl:mode name="header-text" on-no-match="text-only-copy"/>
+ <xsl:mode name="copy" on-no-match="shallow-copy"/>
<xsl:mode use-accumulators="#all"/>
<xsl:accumulator name="element-index" as="xs:integer" initial-value="0">
@@ -447,15 +449,31 @@
</xsl:call-template>
<xsl:if test="$ann_named.xml">
+ <xsl:variable name="rev_lookup-seq" as="map(*)+">
+ <xsl:for-each
+ select="$ann_named.xml//tei:seg/tei:ptr">
+ <xsl:variable name="trg" as="xs:string" select="fn:substring-before(@target, '#')"/>
+ <!-- caution: as of 01-June-2022, some of the pointers are malformed (missing '#' when referencing locally).
+ so we need to act around it but also sustainably - in case that error gets corrected -->
+
+ <xsl:if test="fn:string-length($trg) and $trg eq 'ann_morphosyntax.xml'">
+ <xsl:sequence>
+ <xsl:map-entry key="fn:substring-after(fn:string(@target), '#')" select="fn:current()"
+ />
+ </xsl:sequence>
+ </xsl:if>
+ </xsl:for-each>
+ </xsl:variable>
+ <xsl:variable name="rev_lookup" as="map(*)" select="map:merge($rev_lookup-seq,map{'duplicates':'combine'})"/>
+
<xsl:call-template name="create_named">
<xsl:with-param name="compoundID" select="$compoundID" as="xs:string"/>
- <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
- as="document-node()"/>
<xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
as="document-node()"/>
<xsl:with-param name="ann_named.xml" select="$ann_named.xml"
as="document-node()"/>
<xsl:with-param name="target" select="$targetBaseDir || '/nkjp/named.xml'" as="xs:string"/>
+ <xsl:with-param name="rev_lookup" select="$rev_lookup" as="map(*)"/>
</xsl:call-template>
</xsl:if>
@@ -621,9 +639,8 @@
<xsl:attribute name="type" select="'lex'"/>
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'lex'"/>
+ <xsl:comment select="(if(@nkjp:nps) then ' ' else '_') || $my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
<xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
- <xsl:comment select="$my_morph-seg//tei:fs/tei:f[@name eq 'orth']/tei:string"/>
-
<xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
<xsl:attribute name="name" select="'lemma'"/>
<xsl:value-of select="$my_choice-lex/tei:f[@name eq 'base']/tei:string"/>
@@ -654,11 +671,11 @@
<xsl:template name="create_named">
<xsl:param name="compoundID" as="xs:string"/>
- <xsl:param name="ann_segmentation.xml" as="document-node()"/> <!-- probably out -->
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<xsl:param name="ann_named.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
-
+ <xsl:param name="rev_lookup" as="map(*)"/>
+
<xsl:result-document encoding="UTF-8" method="xml" indent="yes"
xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
<xsl:processing-instruction name="xml-model">href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"</xsl:processing-instruction>
@@ -668,8 +685,8 @@
<xsl:element name="spanList" namespace="{$KorAP_namespace}">
<xsl:apply-templates select="$ann_morphosyntax.xml//tei:text" mode="named">
- <!--<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml" as="document-node()" tunnel="yes"/>-->
<xsl:with-param name="ann_named.xml" select="$ann_named.xml" as="document-node()" tunnel="yes"/>
+ <xsl:with-param name="rev_lookup" select="$rev_lookup" as="map(*)" tunnel="yes"/>
</xsl:apply-templates>
</xsl:element>
</xsl:element>
@@ -680,32 +697,58 @@
<xsl:template match="tei:seg[tei:fs[tei:f[@name eq 'disamb']]]" mode="named">
<xsl:param name="ann_named.xml" as="document-node()" tunnel="yes"/>
+ <xsl:param name="rev_lookup" as="map(*)" tunnel="yes"/>
<xsl:variable name="offsets" as="xs:integer+">
<xsl:sequence select="map:get(fn:accumulator-after('morpho-offsets')[last()], string(@xml:id))"/>
</xsl:variable>
- <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
- <xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
+ <xsl:variable name="ptr" select="map:get($rev_lookup,fn:string(@xml:id))" as="element(tei:ptr)*"/>
+ <!-- it's an element, because we need to see where it stands in a sequence... -->
- <xsl:element name="span" namespace="{$KorAP_namespace}">
- <xsl:attribute name="id" select="'n' || $my_index"/>
- <xsl:attribute name="from" select="$offsets[1]"/>
- <xsl:attribute name="to" select="$offsets[2]"/>
- <xsl:attribute name="l" select="f:compute_nesting(.)"/>
- <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
- <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
- <xsl:attribute name="name" select="'ne'"/>
- <xsl:comment select="(if(tei:fs/tei:f[@name eq 'nps']) then ' ' else '_') || tei:fs/tei:f[@name eq 'orth']/tei:string"/>
- <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
- <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
- <xsl:attribute name="name" select="'ent'"/>
- <xsl:value-of select="'placeholder'"/>
+ <xsl:if test="$ptr">
+ <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
+ <xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
+
+ <xsl:element name="span" namespace="{$KorAP_namespace}">
+ <xsl:attribute name="id" select="'n' || $my_index"/>
+ <xsl:attribute name="from" select="$offsets[1]"/>
+ <xsl:attribute name="to" select="$offsets[2]"/>
+ <xsl:attribute name="l" select="f:compute_nesting(.)"/>
+ <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="type" select="'ne'"/>
+ <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="name" select="'ne'"/>
+ <xsl:comment select="
+ (if (tei:fs/tei:f[@name eq 'nps']) then
+ ' '
+ else
+ '_') || tei:fs/tei:f[@name eq 'orth']/tei:string"/>
+ <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="name" select="'complex-ent'"/>
+ <xsl:for-each select="$ptr">
+ <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="type" select="'complex-ent'"/>
+ <xsl:for-each select="parent::tei:seg/tei:fs[1]/tei:f">
+ <xsl:if test="@name eq 'type' or @name eq 'subtype'">
+ <xsl:copy-of select="." copy-namespaces="no"/>
+ </xsl:if>
+ </xsl:for-each>
+ <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="name" select="'nkjp-named'"/>
+ <xsl:copy-of select="parent::tei:seg/tei:fs[1]" copy-namespaces="no"/>
+ </xsl:element>
+ </xsl:element>
+ </xsl:for-each>
+
+ </xsl:element>
+
</xsl:element>
</xsl:element>
</xsl:element>
</xsl:element>
- </xsl:element>
+ </xsl:if>
</xsl:template>