derive structure.xml; the script isn't optimized yet but I would like to submit the output for a check
Change-Id: Ib7ae6aed1e661490dbd2b37d7818205a7ec50441
diff --git a/nkjp2korap.xsl b/nkjp2korap.xsl
index e186ef1..46b4bb6 100644
--- a/nkjp2korap.xsl
+++ b/nkjp2korap.xsl
@@ -26,24 +26,94 @@
   <xsl:variable name="KorAP_namespace" select="'http://ids-mannheim.de/ns/KorAP'" static="true"
     as="xs:string"/>
 
+  <xsl:variable name="KorAP-XML_version" select="'KorAP-0.4'" as="xs:string" static="true"/>
+  <!--  this is only a bit funny -->
+
+  <xsl:variable name="compoundID" as="xs:string"
+    select="$corpusID || '_' || $docID || '.' || $textID"/>
+  <!--  this is what occurs in the text and data layers as @docid -->
 
   <xsl:mode name="corpus" on-no-match="deep-skip"/>
   <xsl:mode name="text" on-no-match="deep-skip"/>
 
+  <!--  <xsl:variable name="text_depth" as="xs:integer" select="xs:integer('2')" static="true"/>
+  <!-\-  this magic number indicates the depth of the <TEI> element inside teiCorpus/TEI -\->
+-->
+  <xsl:function name="f:compute_nesting" as="xs:integer">
+    <xsl:param name="node" as="node()"/>
+    <xsl:variable name="rel_depth"
+      select="count($node/ancestor-or-self::*[local-name(.) ne 'TEI'][local-name(.) ne 'teiCorpus'])"
+      as="xs:integer"/>
+<!--  I think my skills are lacking  -->
+    <xsl:sequence select="$rel_depth"/>
+  </xsl:function>
+
+  <xsl:function name="f:calc_content_length" as="xs:integer">
+    <xsl:param name="node" as="node()"/>
+    <xsl:choose>
+      <xsl:when test="$node/self::tei:text or $node/self::tei:body">
+        <xsl:variable name="last_corresp"
+          select="$node/descendant::tei:p[last()]/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
+          as="attribute(corresp)"/>
+        <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
+        <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
+      </xsl:when>
+      <xsl:when test="$node/self::tei:p">
+        <xsl:variable name="last_corresp"
+          select="$node/descendant::tei:s[last()]/descendant::tei:seg[last()]/attribute::corresp"
+          as="attribute(corresp)"/>
+        <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
+        <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
+      </xsl:when>
+      <xsl:when test="$node/self::tei:s">
+        <xsl:variable name="last_corresp"
+          select="$node/descendant::tei:seg[last()]/attribute::corresp"
+          as="attribute(corresp)"/>
+        <xsl:variable name="numbers" select="substring-after(substring-before($last_corresp,')'),',')"/>
+        <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
+      </xsl:when>
+      <xsl:otherwise>
+        <xsl:variable name="numbers" select="substring-after(substring-before($node/@corresp,')'),',')"/>
+        <xsl:sequence select="xs:integer(substring-before($numbers,',')) + xs:integer(substring-after($numbers,','))"/>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:function>
 
   <xsl:template name="xsl:initial-template">
     <xsl:variable name="text.xml" as="document-node()" select="doc($sourceDir || 'text.xml')"/>
+    <xsl:variable name="ann_morphosyntax.xml" as="document-node()"
+      select="doc($sourceDir || 'ann_morphosyntax.xml')"/>
+    <xsl:variable name="ann_segmentation.xml" as="document-node()"
+      select="doc($sourceDir || 'ann_segmentation.xml')"/>
 
     <xsl:call-template name="create_data">
       <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
+      <xsl:with-param name="target" select="$targetTextDir || 'data.xml'" as="xs:string"/>
+    </xsl:call-template>
+
+    <xsl:call-template name="create_struct">
+      <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
+      <xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml"
+        as="document-node()"/>
+      <xsl:with-param name="target" select="$targetTextDir || 'struct/structure.xml'" as="xs:string"
+      />
+    </xsl:call-template>
+
+    <xsl:call-template name="create_morpho">
+      <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
+      <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml"
+        as="document-node()"/>
+      <xsl:with-param name="target" select="$targetTextDir || 'nkjp/morpho.xml'" as="xs:string"/>
     </xsl:call-template>
 
     <xsl:call-template name="create_text_header">
       <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
+      <xsl:with-param name="target" select="$targetTextDir || 'header.xml'" as="xs:string"/>
     </xsl:call-template>
 
     <xsl:call-template name="create_corpus_header">
       <xsl:with-param name="text.xml" select="$text.xml" as="document-node()"/>
+      <xsl:with-param name="target" select="$targetCorpusDir || 'header.xml'" as="xs:string"/>
     </xsl:call-template>
   </xsl:template>
 
@@ -51,13 +121,14 @@
 
   <xsl:template name="create_data">
     <xsl:param name="text.xml" as="document-node()"/>
+    <xsl:param name="target" as="xs:string"/>
     <!-- create the data.xml file -->
     <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
-      xpath-default-namespace="{$KorAP_namespace}" href="{$targetTextDir || 'data.xml'}">
+      xpath-default-namespace="{$KorAP_namespace}" href="{$target}">
 
       <xsl:processing-instruction name="xml-model">href=&quot;text.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
       <xsl:element name="raw_text" namespace="{$KorAP_namespace}">
-        <xsl:attribute name="docid" select="$corpusID || '_' || $docID || '.' || $textID"/>
+        <xsl:attribute name="docid" select="$compoundID"/>
         <xsl:element name="metadata" namespace="{$KorAP_namespace}">
           <xsl:attribute name="file" select="'metadata.xml'"/>
         </xsl:element>
@@ -69,13 +140,179 @@
     </xsl:result-document>
   </xsl:template>
 
+  <!--   **************************        struct      *******************                -->
+
+  <xsl:template name="create_struct">
+    <xsl:param name="text.xml" as="document-node()"/>
+    <xsl:param name="ann_segmentation.xml" as="document-node()"/>
+    <xsl:param name="target" as="xs:string"/>
+
+    <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
+      xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
+      <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
+      <xsl:element name="layer" namespace="{$KorAP_namespace}">
+        <xsl:attribute name="docid" select="$compoundID"/>
+        <xsl:attribute name="version" select="$KorAP-XML_version"/>
+
+        <xsl:element name="spanList" namespace="{$KorAP_namespace}">
+          <xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="struct"/>          
+        </xsl:element>
+      </xsl:element>
+    </xsl:result-document>
+  </xsl:template>
+  
+  <xsl:template match="tei:*" mode="struct">
+    <xsl:param name="ini" as="xs:integer" required="no" select="0"/>
+    <xsl:param name="fin" as="xs:integer" required="no" select="999999999"/>
+    <xsl:param name="index" as="xs:integer" required="no" select="1"/>
+    <!-- I have made a major mess here, but it works... it's so spread out 
+      because I wanted to make sure to be able to look up the individual 
+      constituent values, should anything go wrong   -->
+    <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
+    <xsl:variable name="preceding" select="preceding-sibling::*[local-name(.) eq $my_name]"/>
+    <xsl:variable name="preceding-count" select="count($preceding)"/>
+    <xsl:variable name="outside-preceding-count" as="xs:integer">
+      <xsl:choose>
+        <xsl:when test="self::tei:s or self::tei:p">
+          <xsl:choose>
+            <xsl:when test="$preceding-count">
+              <xsl:sequence select="
+                  sum(for $p in $preceding
+                  return
+                    count($p/descendant::*))"/>
+            </xsl:when>
+            <xsl:otherwise>
+              <xsl:sequence select="0"/>
+            </xsl:otherwise>
+          </xsl:choose>
+        </xsl:when>
+        <xsl:otherwise>
+          <xsl:sequence select="0"/>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:variable>
+    <xsl:variable name="my_index" select="$index + 1 + $preceding-count + $outside-preceding-count"
+      as="xs:integer"/>
+
+    <xsl:variable name="start" as="xs:integer">
+      <xsl:choose>
+        <xsl:when test="self::tei:text or self::tei:body">
+          <xsl:sequence select="0"/>
+        </xsl:when>
+        <xsl:when test="self::tei:p">
+          <xsl:variable name="first_corresp"
+            select="descendant::tei:s[1]/descendant::tei:seg[1]/attribute::corresp"
+            as="attribute(corresp)"/>
+          <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
+          <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
+        </xsl:when>
+        <xsl:when test="self::tei:s">
+          <xsl:variable name="first_corresp"
+            select="descendant::tei:seg[1]/attribute::corresp"
+            as="attribute(corresp)"/>
+          <xsl:variable name="numbers" select="substring-after(substring-before($first_corresp,')'),',')"/>
+          <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
+        </xsl:when>
+        <xsl:when test="self::tei:seg">
+          <xsl:variable name="numbers" select="substring-after(substring-before(@corresp,')'),',')"/>
+          <xsl:sequence select="xs:integer(substring-before($numbers,','))"/>
+        </xsl:when>
+      </xsl:choose>
+    </xsl:variable>
+    <xsl:variable name="end" as="xs:integer" select="f:calc_content_length(.)">
+    </xsl:variable>
+    <xsl:element name="span" namespace="{$KorAP_namespace}">
+      <xsl:attribute name="id" select="'s' || $my_index"/>
+      <xsl:attribute name="from" select="$start"/>
+      <xsl:attribute name="to" select="$end"/>
+      <xsl:attribute name="l" select="f:compute_nesting(.)"/>
+      <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+        <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+          <xsl:attribute name="name" select="local-name()"/>
+        </xsl:element>
+        <xsl:if test="count(@*)">
+          <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+            <xsl:attribute name="name" select="'attr'"/>
+            <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+              <xsl:attribute name="type" select="'attr'"/>
+              <xsl:for-each select="@*">
+                <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+                  <xsl:attribute name="name" select="local-name(.)"/>
+                  <xsl:value-of select="."/>
+                </xsl:element>
+              </xsl:for-each>
+            </xsl:element>
+          </xsl:element>
+        </xsl:if>
+      </xsl:element>
+    </xsl:element>
+    <xsl:apply-templates mode="struct">
+      <xsl:with-param name="ini" select="$start" as="xs:integer"/>
+      <xsl:with-param name="fin" select="$end" as="xs:integer"/>
+      <xsl:with-param name="index" select="$my_index"/>
+    </xsl:apply-templates>
+  </xsl:template>
+
+  <!--   **************************        morpho      *******************                -->
+
+  <xsl:template name="create_morpho">
+    <xsl:param name="text.xml" as="document-node()"/>
+    <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
+    <xsl:param name="target" as="xs:string"/>
+
+    <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
+      xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
+      <xsl:processing-instruction name="xml-model">href=&quot;span.rng&quot; type=&quot;application/xml&quot; schematypens=&quot;http://relaxng.org/ns/structure/1.0&quot;</xsl:processing-instruction>
+
+    </xsl:result-document>
+  </xsl:template>
+
+
+  <!--   **************************        TEXT header      *******************                -->
+
+  <xsl:template name="create_text_header">
+    <xsl:param name="text.xml" as="document-node()"/>
+    <xsl:param name="target" as="xs:string"/>
+
+    <!-- create the local header.xml file -->
+    <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
+      xpath-default-namespace="http://ids-mannheim.de/ns/KorAP" href="{$target}">
+
+      <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+        <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc" mode="text"/>
+      </idsHeader>
+    </xsl:result-document>
+  </xsl:template>
+
+  <xsl:template match="tei:fileDesc" mode="text">
+    <fileDesc>
+      <xsl:apply-templates mode="text"/>
+    </fileDesc>
+  </xsl:template>
+
+  <xsl:template match="tei:title" mode="text">
+    <t.title>
+      <xsl:apply-templates/>
+    </t.title>
+  </xsl:template>
+
+  <xsl:template match="tei:titleStmt" mode="text">
+    <titleStmt>
+      <textSigle>
+        <xsl:value-of select="$corpusID || '/' || $textID"/>
+      </textSigle>
+      <xsl:apply-templates mode="text"/>
+    </titleStmt>
+  </xsl:template>
+
+
   <!--   **************************        CORPUS header       *******************                -->
   <xsl:template name="create_corpus_header">
     <xsl:param name="text.xml" as="document-node()"/>
+    <xsl:param name="target" as="xs:string"/>
 
     <!-- create the corpus-level header.xml file -->
-    <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
-      href="{$targetCorpusDir || 'header.xml'}">
+    <xsl:result-document encoding="UTF-8" method="xml" indent="yes" href="{$target}">
 
       <!--doctype-public="{$publicDoctypeI5}"
           doctype-system="{$systemDoctypeI5}">
@@ -110,43 +347,6 @@
   </xsl:template>
 
 
-  <!--   **************************        TEXT header      *******************                -->
-
-  <xsl:template name="create_text_header">
-    <xsl:param name="text.xml" as="document-node()"/>
-
-    <!-- create the local header.xml file -->
-    <xsl:result-document encoding="UTF-8" method="xml" indent="yes"
-      xpath-default-namespace="http://ids-mannheim.de/ns/KorAP"
-      href="{$targetTextDir || 'header.xml'}">
-
-      <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
-        <xsl:apply-templates select="$text.xml//tei:TEI/tei:teiHeader/tei:fileDesc" mode="text"/>
-      </idsHeader>
-    </xsl:result-document>
-  </xsl:template>
-
-  <xsl:template match="tei:fileDesc" mode="text">
-    <fileDesc>
-      <xsl:apply-templates mode="text"/>
-    </fileDesc>
-  </xsl:template>
-
-  <xsl:template match="tei:title" mode="text">
-    <t.title>
-      <xsl:apply-templates/>
-    </t.title>
-  </xsl:template>
-
-  <xsl:template match="tei:titleStmt" mode="text">
-    <titleStmt>
-      <textSigle>
-        <xsl:value-of select="$corpusID || '/' || $textID"/>
-      </textSigle>
-      <xsl:apply-templates mode="text"/>
-    </titleStmt>
-  </xsl:template>
-