produce an initial version of named.xml, with just placeholders but also with properly computed offsets (walking ann_morphosyntactic)
Change-Id: I127274e51bf6abdcf0bcda8e8cbfe45c5b919dce
diff --git a/nkjp2korap.xsl b/nkjp2korap.xsl
index 4aae734..e220a05 100644
--- a/nkjp2korap.xsl
+++ b/nkjp2korap.xsl
@@ -66,7 +66,10 @@
<xsl:accumulator-rule match="tei:*[ancestor-or-self::tei:text]" select="$value + 1" phase="start"/>
</xsl:accumulator>
- <xsl:accumulator name="elem-offset-seq" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
+
+
+ <!--I think I may be able to actually merge the two accumulators, but let's see-->
+ <xsl:accumulator name="morpho-offsets" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
<xsl:accumulator-rule match="tei:body/tei:p" phase="start">
<xsl:variable name="preceding_index" as="xs:integer">
@@ -83,7 +86,7 @@
string(@xml:id): ($preceding_index,$our_base)
}"/>
</xsl:accumulator-rule>
-
+ <!-- this is morpho-offsets -->
<xsl:accumulator-rule match="tei:s" phase="start">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
@@ -97,21 +100,27 @@
string(@xml:id): ($preceding_index,$our_base)
}"/>
</xsl:accumulator-rule>
-
- <xsl:accumulator-rule match="tei:w[parent::tei:seg[count(@nkjp:rejected) eq 0]]" phase="end">
+ <!-- this is morpho-offsets -->
+
+<!-- I want something that won't be matched in other layers, for efficiency - that
+ may allow me to merge the accumulators, eventually;
+ but I also want to filter out the rejected tokenization alternatives already here -->
+ <xsl:accumulator-rule match="tei:seg[tei:fs[@type eq 'morph' and tei:f[@name eq 'disamb']]]" phase="end">
+
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
<xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
</xsl:variable>
- <xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(parent::tei:seg,true()))"/>
+ <xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(.,true()))"/>
<xsl:sequence select="
- $value,
- map {
- string(parent::tei:seg/@xml:id): ($our_base,$our_base + string-length())
- }"/>
+ $value,
+ map {
+ string(@xml:id): ($our_base,$our_base + string-length(tei:fs/tei:f[@name eq 'orth']/tei:string))
+ }"/>
+
</xsl:accumulator-rule>
-
+ <!-- this is morpho-offsets -->
<xsl:accumulator-rule match="tei:s" phase="end">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
@@ -129,7 +138,7 @@
string(@xml:id): ($our_base,$preceding_index)
}"/>
</xsl:accumulator-rule>
-
+ <!-- this is morpho-offsets -->
<xsl:accumulator-rule match="tei:body/tei:p" phase="end">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
@@ -147,18 +156,103 @@
string(@xml:id): ($our_base,$preceding_index)
}"/>
</xsl:accumulator-rule>
+ </xsl:accumulator>
+
+
+
+ <xsl:accumulator name="segmentation-offsets" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
+
+ <xsl:accumulator-rule match="tei:body/tei:p" phase="start">
+ <xsl:variable name="preceding_index" as="xs:integer">
+ <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
+ <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
+ </xsl:variable>
+
+ <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + 1"/>
+ <!-- for paragraphs, it's in either being initial or not -->
+
+ <xsl:sequence select="
+ $value,
+ map {
+ string(@xml:id): ($preceding_index,$our_base)
+ }"/>
+ </xsl:accumulator-rule>
+
+ <xsl:accumulator-rule match="tei:s" phase="start">
+ <xsl:variable name="preceding_index" as="xs:integer">
+ <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
+ <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
+ </xsl:variable>
+ <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + xs:integer(f:is_preceded_by_ws(.,true()))"/>
+
+ <xsl:sequence select="
+ $value,
+ map {
+ string(@xml:id): ($preceding_index,$our_base)
+ }"/>
+ </xsl:accumulator-rule>
+
+ <xsl:accumulator-rule match="tei:w[parent::tei:seg[count(@nkjp:rejected) eq 0]]" phase="end">
+ <xsl:variable name="preceding_index" as="xs:integer">
+ <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
+ <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
+ </xsl:variable>
+ <xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(parent::tei:seg,true()))"/>
+
+ <xsl:sequence select="
+ $value,
+ map {
+ string(parent::tei:seg/@xml:id): ($our_base,$our_base + string-length())
+ }"/>
+ </xsl:accumulator-rule>
+
+ <xsl:accumulator-rule match="tei:s" phase="end">
+ <xsl:variable name="preceding_index" as="xs:integer">
+ <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
+ <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
+
+ </xsl:variable>
+ <xsl:variable name="our_base" as="xs:integer">
+ <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
+ <xsl:sequence select="$incomplete[2]"/>
+ </xsl:variable>
+
+ <xsl:sequence select="
+ $value,
+ map {
+ string(@xml:id): ($our_base,$preceding_index)
+ }"/>
+ </xsl:accumulator-rule>
+
+ <xsl:accumulator-rule match="tei:body/tei:p" phase="end">
+ <xsl:variable name="preceding_index" as="xs:integer">
+ <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
+ <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
+
+ </xsl:variable>
+ <xsl:variable name="our_base" as="xs:integer">
+ <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
+ <xsl:sequence select="$incomplete[2]"/>
+ </xsl:variable>
+
+ <xsl:sequence select="
+ $value,
+ map {
+ string(@xml:id): ($our_base,$preceding_index)
+ }"/>
+ </xsl:accumulator-rule>
<xsl:accumulator-rule match="tei:body" phase="end">
<xsl:variable name="preceding_index" as="xs:integer">
<xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
<xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
</xsl:variable>
-
+
<xsl:sequence select="
- $value,
- map {
- string(@xml:id): (0, $preceding_index)
- }"/>
+ $value,
+ map {
+ string(@xml:id): (0, $preceding_index)
+ }"/>
</xsl:accumulator-rule>
<xsl:accumulator-rule match="tei:text" phase="end">
@@ -195,6 +289,10 @@
<xsl:when test="$node/@nkjp:nps">
<xsl:sequence select="fn:false()"/>
</xsl:when>
+ <xsl:when test="$node/tei:fs/tei:f[@name eq 'nps']">
+ <!--added for traversing ann_morphosyntax-->
+ <xsl:sequence select="fn:false()"/>
+ </xsl:when>
<xsl:when
test="$node/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0] and $node/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0] and not($node/preceding::tei:seg[count(@nkjp:rejected) eq 0])">
<xsl:sequence select="fn:false()"/>
@@ -205,6 +303,11 @@
<!-- I forget how node identity works now, so let me just compare the IDs -->
<xsl:sequence select="fn:false()"/>
</xsl:when>
+ <xsl:when
+ test="$suppress_initial and $node/ancestor::tei:s/descendant::tei:seg[tei:fs/tei:f[@name eq 'disamb']][1]/@xml:id eq $node/@xml:id">
+ <!--added for traversing ann_morphosyntax-->
+ <xsl:sequence select="fn:false()"/>
+ </xsl:when>
<xsl:otherwise>
<xsl:sequence select="fn:true()"/>
</xsl:otherwise>
@@ -429,7 +532,7 @@
<xsl:template match="tei:*" mode="struct">
<xsl:variable name="offsets" as="xs:integer+">
- <xsl:sequence select="map:get(fn:accumulator-after('elem-offset-seq')[last()], string(@xml:id))"/>
+ <xsl:sequence select="map:get(fn:accumulator-after('segmentation-offsets')[last()], string(@xml:id))"/>
</xsl:variable>
<xsl:variable name="my_name" select="local-name()" as="xs:string"/>
@@ -498,7 +601,7 @@
constituent values, should anything go wrong; it might get compacted at some point, but
the increase in efficiency will probably be minimal, compared to the decrease of readability -->
<xsl:variable name="offsets" as="xs:integer+">
- <xsl:sequence select="map:get(fn:accumulator-after('elem-offset-seq')[last()], string(@xml:id))"/>
+ <xsl:sequence select="map:get(fn:accumulator-after('segmentation-offsets')[last()], string(@xml:id))"/>
</xsl:variable>
<xsl:variable name="my_name" select="local-name()" as="xs:string"/>
<xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
@@ -551,7 +654,7 @@
<xsl:template name="create_named">
<xsl:param name="compoundID" as="xs:string"/>
- <xsl:param name="ann_segmentation.xml" as="document-node()"/>
+ <xsl:param name="ann_segmentation.xml" as="document-node()"/> <!-- probably out -->
<xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
<xsl:param name="ann_named.xml" as="document-node()"/>
<xsl:param name="target" as="xs:string"/>
@@ -564,14 +667,47 @@
<xsl:attribute name="version" select="$KorAP-XML_version"/>
<xsl:element name="spanList" namespace="{$KorAP_namespace}">
- <!--<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="named">
- <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()" tunnel="yes"/>
+ <xsl:apply-templates select="$ann_morphosyntax.xml//tei:text" mode="named">
+ <!--<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml" as="document-node()" tunnel="yes"/>-->
<xsl:with-param name="ann_named.xml" select="$ann_named.xml" as="document-node()" tunnel="yes"/>
- </xsl:apply-templates>-->
+ </xsl:apply-templates>
</xsl:element>
</xsl:element>
</xsl:result-document>
</xsl:template>
+
+ <xsl:template match="tei:seg" mode="named"/>
+
+ <xsl:template match="tei:seg[tei:fs[tei:f[@name eq 'disamb']]]" mode="named">
+ <xsl:param name="ann_named.xml" as="document-node()" tunnel="yes"/>
+
+ <xsl:variable name="offsets" as="xs:integer+">
+ <xsl:sequence select="map:get(fn:accumulator-after('morpho-offsets')[last()], string(@xml:id))"/>
+ </xsl:variable>
+
+ <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
+ <xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
+
+ <xsl:element name="span" namespace="{$KorAP_namespace}">
+ <xsl:attribute name="id" select="'n' || $my_index"/>
+ <xsl:attribute name="from" select="$offsets[1]"/>
+ <xsl:attribute name="to" select="$offsets[2]"/>
+ <xsl:attribute name="l" select="f:compute_nesting(.)"/>
+ <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="name" select="'ne'"/>
+ <xsl:comment select="(if(tei:fs/tei:f[@name eq 'nps']) then ' ' else '_') || tei:fs/tei:f[@name eq 'orth']/tei:string"/>
+ <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+ <xsl:attribute name="name" select="'ent'"/>
+ <xsl:value-of select="'placeholder'"/>
+ </xsl:element>
+ </xsl:element>
+ </xsl:element>
+ </xsl:element>
+ </xsl:element>
+ </xsl:template>
+
<!-- ************************** syntactic chunks ******************* -->