produce an initial version of named.xml, with just placeholders but also with properly computed offsets (walking ann_morphosyntactic)

Change-Id: I127274e51bf6abdcf0bcda8e8cbfe45c5b919dce
diff --git a/nkjp2korap.xsl b/nkjp2korap.xsl
index 4aae734..e220a05 100644
--- a/nkjp2korap.xsl
+++ b/nkjp2korap.xsl
@@ -66,7 +66,10 @@
     <xsl:accumulator-rule match="tei:*[ancestor-or-self::tei:text]" select="$value + 1" phase="start"/>
   </xsl:accumulator>
   
-  <xsl:accumulator name="elem-offset-seq" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
+  
+  
+  <!--I think I may be able to actually merge the two accumulators, but let's see-->
+  <xsl:accumulator name="morpho-offsets" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
  
     <xsl:accumulator-rule match="tei:body/tei:p" phase="start">
       <xsl:variable name="preceding_index" as="xs:integer">
@@ -83,7 +86,7 @@
         string(@xml:id): ($preceding_index,$our_base)
         }"/>
     </xsl:accumulator-rule>
- 
+                                                                            <!-- this is morpho-offsets -->
     <xsl:accumulator-rule match="tei:s" phase="start">
       <xsl:variable name="preceding_index" as="xs:integer">
         <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
@@ -97,21 +100,27 @@
         string(@xml:id): ($preceding_index,$our_base)
         }"/>
     </xsl:accumulator-rule>
- 
-    <xsl:accumulator-rule match="tei:w[parent::tei:seg[count(@nkjp:rejected) eq 0]]" phase="end">
+                                                                            <!-- this is morpho-offsets -->
+    
+<!--   I want something that won't be matched in other layers, for efficiency - that 
+      may allow me to merge the accumulators, eventually;
+      but I also want to filter out the rejected tokenization alternatives already here -->
+    <xsl:accumulator-rule match="tei:seg[tei:fs[@type eq 'morph' and tei:f[@name eq 'disamb']]]" phase="end">
+      
       <xsl:variable name="preceding_index" as="xs:integer">
         <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
         <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
       </xsl:variable>
-      <xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(parent::tei:seg,true()))"/>
+      <xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(.,true()))"/>
 
       <xsl:sequence select="
-          $value,
-          map {
-            string(parent::tei:seg/@xml:id): ($our_base,$our_base + string-length())
-          }"/>
+        $value,
+        map {
+        string(@xml:id): ($our_base,$our_base + string-length(tei:fs/tei:f[@name eq 'orth']/tei:string))
+        }"/>
+      
     </xsl:accumulator-rule>
-    
+                                                                            <!-- this is morpho-offsets -->
     <xsl:accumulator-rule match="tei:s" phase="end">
       <xsl:variable name="preceding_index" as="xs:integer">
         <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
@@ -129,7 +138,7 @@
         string(@xml:id): ($our_base,$preceding_index)
         }"/>
     </xsl:accumulator-rule>
-    
+                                                                            <!-- this is morpho-offsets -->
     <xsl:accumulator-rule match="tei:body/tei:p" phase="end">
       <xsl:variable name="preceding_index" as="xs:integer">
         <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
@@ -147,18 +156,103 @@
         string(@xml:id): ($our_base,$preceding_index)
         }"/>
     </xsl:accumulator-rule>
+  </xsl:accumulator>
+  
+  
+
+  <xsl:accumulator name="segmentation-offsets" as="map(xs:string, item()+)+" initial-value="(map{'null':(0,0)})">
+    
+    <xsl:accumulator-rule match="tei:body/tei:p" phase="start">
+      <xsl:variable name="preceding_index" as="xs:integer">
+        <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
+        <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
+      </xsl:variable>
+      
+      <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + 1"/>
+      <!--  for paragraphs, it's in either being initial or not    -->
+      
+      <xsl:sequence select="
+        $value,
+        map {
+        string(@xml:id): ($preceding_index,$our_base)
+        }"/>
+    </xsl:accumulator-rule>
+    
+    <xsl:accumulator-rule match="tei:s" phase="start">
+      <xsl:variable name="preceding_index" as="xs:integer">
+        <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
+        <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
+      </xsl:variable>
+      <xsl:variable name="our_base" as="xs:integer" select="if($preceding_index eq 0) then $preceding_index else $preceding_index + xs:integer(f:is_preceded_by_ws(.,true()))"/>
+      
+      <xsl:sequence select="
+        $value,
+        map {
+        string(@xml:id): ($preceding_index,$our_base)
+        }"/>
+    </xsl:accumulator-rule>
+    
+    <xsl:accumulator-rule match="tei:w[parent::tei:seg[count(@nkjp:rejected) eq 0]]" phase="end">
+      <xsl:variable name="preceding_index" as="xs:integer">
+        <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
+        <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
+      </xsl:variable>
+      <xsl:variable name="our_base" as="xs:integer" select="$preceding_index + xs:integer(f:is_preceded_by_ws(parent::tei:seg,true()))"/>
+      
+      <xsl:sequence select="
+        $value,
+        map {
+        string(parent::tei:seg/@xml:id): ($our_base,$our_base + string-length())
+        }"/>
+    </xsl:accumulator-rule>
+    
+    <xsl:accumulator-rule match="tei:s" phase="end">
+      <xsl:variable name="preceding_index" as="xs:integer">
+        <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
+        <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
+        
+      </xsl:variable>
+      <xsl:variable name="our_base" as="xs:integer">
+        <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
+        <xsl:sequence select="$incomplete[2]"/>
+      </xsl:variable>
+      
+      <xsl:sequence select="
+        $value,
+        map {
+        string(@xml:id): ($our_base,$preceding_index)
+        }"/>
+    </xsl:accumulator-rule>
+    
+    <xsl:accumulator-rule match="tei:body/tei:p" phase="end">
+      <xsl:variable name="preceding_index" as="xs:integer">
+        <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
+        <xsl:sequence select="map:get($the_tail,map:keys($the_tail)[1])[2]"/>
+        
+      </xsl:variable>
+      <xsl:variable name="our_base" as="xs:integer">
+        <xsl:variable name="incomplete" select="map:find($value,string(@xml:id))(1)" as="xs:integer+"/>
+        <xsl:sequence select="$incomplete[2]"/>
+      </xsl:variable>
+      
+      <xsl:sequence select="
+        $value,
+        map {
+        string(@xml:id): ($our_base,$preceding_index)
+        }"/>
+    </xsl:accumulator-rule>
     
     <xsl:accumulator-rule match="tei:body" phase="end">
       <xsl:variable name="preceding_index" as="xs:integer">
         <xsl:variable name="the_tail" as="map(*)" select="head(reverse($value))"/>
         <xsl:sequence select="map:get($the_tail, map:keys($the_tail)[1])[2]"/>
       </xsl:variable>
-
+      
       <xsl:sequence select="
-          $value,
-          map {
-            string(@xml:id): (0, $preceding_index)
-          }"/>
+        $value,
+        map {
+        string(@xml:id): (0, $preceding_index)
+        }"/>
     </xsl:accumulator-rule>
     
     <xsl:accumulator-rule match="tei:text" phase="end">
@@ -195,6 +289,10 @@
           <xsl:when test="$node/@nkjp:nps">
             <xsl:sequence select="fn:false()"/>
           </xsl:when>
+          <xsl:when test="$node/tei:fs/tei:f[@name eq 'nps']">
+            <!--added for traversing ann_morphosyntax-->
+            <xsl:sequence select="fn:false()"/>
+          </xsl:when>
           <xsl:when
             test="$node/ancestor::tei:s[count(preceding-sibling::tei:s) eq 0] and $node/ancestor::tei:p[count(preceding-sibling::tei:p) eq 0] and not($node/preceding::tei:seg[count(@nkjp:rejected) eq 0])">
             <xsl:sequence select="fn:false()"/>
@@ -205,6 +303,11 @@
             <!--  I forget how node identity works now, so let me just compare the IDs        -->
             <xsl:sequence select="fn:false()"/>
           </xsl:when>
+          <xsl:when
+            test="$suppress_initial and $node/ancestor::tei:s/descendant::tei:seg[tei:fs/tei:f[@name eq 'disamb']][1]/@xml:id eq $node/@xml:id">
+            <!--added for traversing ann_morphosyntax-->
+            <xsl:sequence select="fn:false()"/>
+          </xsl:when>
           <xsl:otherwise>
             <xsl:sequence select="fn:true()"/>
           </xsl:otherwise>
@@ -429,7 +532,7 @@
   
   <xsl:template match="tei:*" mode="struct">
     <xsl:variable name="offsets" as="xs:integer+">
-      <xsl:sequence select="map:get(fn:accumulator-after('elem-offset-seq')[last()], string(@xml:id))"/>
+      <xsl:sequence select="map:get(fn:accumulator-after('segmentation-offsets')[last()], string(@xml:id))"/>
     </xsl:variable>
     
     <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
@@ -498,7 +601,7 @@
       constituent values, should anything go wrong; it might get compacted at some point, but 
       the increase in efficiency will probably be minimal, compared to the decrease of readability   -->
     <xsl:variable name="offsets" as="xs:integer+">
-      <xsl:sequence select="map:get(fn:accumulator-after('elem-offset-seq')[last()], string(@xml:id))"/>
+      <xsl:sequence select="map:get(fn:accumulator-after('segmentation-offsets')[last()], string(@xml:id))"/>
     </xsl:variable>
     <xsl:variable name="my_name" select="local-name()" as="xs:string"/>
     <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
@@ -551,7 +654,7 @@
   
   <xsl:template name="create_named">
     <xsl:param name="compoundID" as="xs:string"/>
-    <xsl:param name="ann_segmentation.xml" as="document-node()"/>
+    <xsl:param name="ann_segmentation.xml" as="document-node()"/>   <!-- probably out -->
     <xsl:param name="ann_morphosyntax.xml" as="document-node()"/>
     <xsl:param name="ann_named.xml" as="document-node()"/>
     <xsl:param name="target" as="xs:string"/>
@@ -564,14 +667,47 @@
         <xsl:attribute name="version" select="$KorAP-XML_version"/>
         
         <xsl:element name="spanList" namespace="{$KorAP_namespace}">
-          <!--<xsl:apply-templates select="$ann_segmentation.xml//tei:text" mode="named">
-            <xsl:with-param name="ann_morphosyntax.xml" select="$ann_morphosyntax.xml" as="document-node()" tunnel="yes"/>
+          <xsl:apply-templates select="$ann_morphosyntax.xml//tei:text" mode="named">
+            <!--<xsl:with-param name="ann_segmentation.xml" select="$ann_segmentation.xml" as="document-node()" tunnel="yes"/>-->
             <xsl:with-param name="ann_named.xml" select="$ann_named.xml" as="document-node()" tunnel="yes"/>
-          </xsl:apply-templates>-->          
+          </xsl:apply-templates>          
         </xsl:element>
       </xsl:element>
     </xsl:result-document>
   </xsl:template>
+  
+  <xsl:template match="tei:seg" mode="named"/>
+  
+  <xsl:template match="tei:seg[tei:fs[tei:f[@name eq 'disamb']]]" mode="named">
+    <xsl:param name="ann_named.xml" as="document-node()" tunnel="yes"/>
+
+    <xsl:variable name="offsets" as="xs:integer+">
+      <xsl:sequence select="map:get(fn:accumulator-after('morpho-offsets')[last()], string(@xml:id))"/>
+    </xsl:variable>
+
+    <xsl:variable name="my_id" select="@xml:id" as="xs:string"/>
+    <xsl:variable name="my_index" select="fn:accumulator-before('element-index')" as="xs:integer"/>
+    
+    <xsl:element name="span" namespace="{$KorAP_namespace}">
+      <xsl:attribute name="id" select="'n' || $my_index"/>
+      <xsl:attribute name="from" select="$offsets[1]"/>
+      <xsl:attribute name="to" select="$offsets[2]"/>
+      <xsl:attribute name="l" select="f:compute_nesting(.)"/>
+      <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+        <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+          <xsl:attribute name="name" select="'ne'"/>
+          <xsl:comment select="(if(tei:fs/tei:f[@name eq 'nps']) then ' ' else '_') || tei:fs/tei:f[@name eq 'orth']/tei:string"/>
+          <xsl:element name="fs" namespace="http://www.tei-c.org/ns/1.0">
+            <xsl:element name="f" namespace="http://www.tei-c.org/ns/1.0">
+              <xsl:attribute name="name" select="'ent'"/>
+              <xsl:value-of select="'placeholder'"/>
+            </xsl:element>
+          </xsl:element>
+        </xsl:element>
+      </xsl:element>
+    </xsl:element>
+  </xsl:template>
+  
 
   <!--   **************************        syntactic chunks      *******************      -->