Added add_span() method to MultiTermToken

Change-Id: Iabe067079c47ac49995cfc66d18f7d78768bc25e
diff --git a/Changes b/Changes
index a28b5e6..2e14f56 100644
--- a/Changes
+++ b/Changes
@@ -9,6 +9,7 @@
         - Remove MultiTerm->add() in favor of
           MultiTerm->add_by_term().
         - Optimization by reducing calls to _offset().
+        - Introduced add_span() method to MultiTermToken.
 
 0.40 2020-03-03
         - Fixed XIP parser.
diff --git a/lib/KorAP/XML/Annotation/Base/Paragraphs.pm b/lib/KorAP/XML/Annotation/Base/Paragraphs.pm
index 922df0c..3f18360 100644
--- a/lib/KorAP/XML/Annotation/Base/Paragraphs.pm
+++ b/lib/KorAP/XML/Annotation/Base/Paragraphs.pm
@@ -10,13 +10,9 @@
     layer => 'paragraph',
     cb => sub {
       my ($stream, $span) = @_;
-      my $mtt = $stream->pos($span->get_p_start);
-      my $mt = $mtt->add_by_term('<>:base/s:p');
-      $mt->set_o_start($span->get_o_start);
-      $mt->set_o_end($span->get_o_end);
-      $mt->set_p_end($span->get_p_end);
-      $mt->set_payload('<b>1');
-      $mt->set_pti(64);
+      $stream->pos($span->get_p_start)
+        ->add_span('<>:base/s:p', $span)
+        ->set_payload('<b>1');
       $i++;
     }
   ) or return;
diff --git a/lib/KorAP/XML/Annotation/Base/Sentences.pm b/lib/KorAP/XML/Annotation/Base/Sentences.pm
index 9ccdca6..e5b586f 100644
--- a/lib/KorAP/XML/Annotation/Base/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/Base/Sentences.pm
@@ -5,26 +5,14 @@
   my $self = shift;
   my $i = 0;
 
-  my ($first, $last_p, $last_o);
-
   $$self->add_spandata(
     foundry => 'base',
     layer => 'sentences',
     cb => sub {
       my ($stream, $span) = @_;
-      my $mtt = $stream->pos($span->get_p_start);
-
-      $first = [$span->get_p_start, $span->get_o_start] unless defined $first;
-
-      my $mt = $mtt->add_by_term('<>:base/s:s');
-      $mt->set_o_start($span->get_o_start);
-      $mt->set_o_end($span->get_o_end);
-      $mt->set_p_end($span->get_p_end);
-      $mt->set_payload('<b>2');
-      $mt->set_pti(64);
-
-      $last_p = $span->get_p_end;
-      $last_o = $span->get_o_end;
+      $stream->pos($span->get_p_start)
+        ->add_span('<>:base/s:s', $span)
+        ->set_payload('<b>2');
       $i++;
     }
   ) or return;
diff --git a/lib/KorAP/XML/Annotation/Connexor/Phrase.pm b/lib/KorAP/XML/Annotation/Connexor/Phrase.pm
index e5f8012..f1c5f76 100644
--- a/lib/KorAP/XML/Annotation/Connexor/Phrase.pm
+++ b/lib/KorAP/XML/Annotation/Connexor/Phrase.pm
@@ -17,13 +17,9 @@
       my $type = $content->{'#text'};
 
       if ($type) {
-        my $mt = $stream->pos($span->get_p_start)
-          ->add_by_term('<>:cnx/c:' . $type);
-        $mt->set_o_start($span->get_o_start);
-        $mt->set_o_end($span->get_o_end);
-        $mt->set_p_end($span->get_p_end);
-        $mt->set_pti(64);
-        $mt->set_payload('<b>0'); # Pseudo-depth
+        $stream->pos($span->get_p_start)
+          ->add_span('<>:cnx/c:' . $type, $span)
+          ->set_payload('<b>0'); # Pseudo-depth
       };
     }
   ) or return;
diff --git a/lib/KorAP/XML/Annotation/Connexor/Sentences.pm b/lib/KorAP/XML/Annotation/Connexor/Sentences.pm
index 71b1e08..b467e46 100644
--- a/lib/KorAP/XML/Annotation/Connexor/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/Connexor/Sentences.pm
@@ -10,13 +10,9 @@
     layer => 'sentences',
     cb => sub {
       my ($stream, $span) = @_;
-      my $mt = $stream->pos($span->get_p_start)
-        ->add_by_term('<>:cnx/s:s');
-      $mt->set_o_start($span->get_o_start);
-      $mt->set_o_end($span->get_o_end);
-      $mt->set_p_end($span->get_p_end);
-      $mt->set_pti(64);
-      $mt->set_payload('<b>0');
+      $stream->pos($span->get_p_start)
+        ->add_span('<>:cnx/s:s', $span)
+        ->set_payload('<b>0');
       $i++;
     }
   ) or return;
diff --git a/lib/KorAP/XML/Annotation/CoreNLP/Constituency.pm b/lib/KorAP/XML/Annotation/CoreNLP/Constituency.pm
index 3f33802..3467ae4 100644
--- a/lib/KorAP/XML/Annotation/CoreNLP/Constituency.pm
+++ b/lib/KorAP/XML/Annotation/CoreNLP/Constituency.pm
@@ -1,6 +1,7 @@
 package KorAP::XML::Annotation::CoreNLP::Constituency;
 use KorAP::XML::Annotation::Base;
 use Set::Scalar;
+use feature 'current_sub';
 
 sub parse {
   my $self = shift;
@@ -59,14 +60,8 @@
     my $type = $f->{'#text'} or return;
 
     # $type is now NPA, NP, NUM ...
-    my $term = $mtt->add_by_term('<>:corenlp/c:' . $type);
-    $term->set_o_start($span->get_o_start);
-    $term->set_o_end($span->get_o_end);
-    $term->set_p_end($span->get_p_end);
-    $term->set_pti(64);
-    $term->set_payload('<b>' . ($level // 0));
-
-    my $this = $add_const;
+    $mtt->add_span('<>:corenlp/c:' . $type, $span)
+      ->set_payload('<b>' . ($level // 0));
 
     my $rel = $content->{rel} or return;
     $rel = [$rel] unless ref $rel eq 'ARRAY';
@@ -76,7 +71,7 @@
       my $subspan = delete $corenlp_const{$_->{-target}} or return;
 
       # This will be called recursively
-      $this->($subspan, $level + 1);
+      __SUB__->($subspan, $level + 1);
     };
   };
 
diff --git a/lib/KorAP/XML/Annotation/CoreNLP/Sentences.pm b/lib/KorAP/XML/Annotation/CoreNLP/Sentences.pm
index 37fb19a..31ef18c 100644
--- a/lib/KorAP/XML/Annotation/CoreNLP/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/CoreNLP/Sentences.pm
@@ -10,13 +10,9 @@
     layer => 'sentences',
     cb => sub {
       my ($stream, $span) = @_;
-      my $mtt = $stream->pos($span->get_p_start);
-      my $mt = $mtt->add_by_term('<>:corenlp/s:s');
-      $mt->set_o_start($span->get_o_start);
-      $mt->set_o_end($span->get_o_end);
-      $mt->set_p_end($span->get_p_end);
-      $mt->set_pti(64);
-      $mt->set_payload('<b>0'); # Could also be 2 for t/p/s
+      $stream->pos($span->get_p_start)
+        ->add_span('<>:corenlp/s:s', $span)
+        ->set_payload('<b>0'); # Could also be 2 for t/p/s
       $i++;
     }
   ) or return;
diff --git a/lib/KorAP/XML/Annotation/OpenNLP/Sentences.pm b/lib/KorAP/XML/Annotation/OpenNLP/Sentences.pm
index cf496c2..47aca86 100644
--- a/lib/KorAP/XML/Annotation/OpenNLP/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/OpenNLP/Sentences.pm
@@ -10,13 +10,9 @@
     layer => 'sentences',
     cb => sub {
       my ($stream, $span) = @_;
-      my $mt = $stream->pos($span->get_p_start)
-        ->add_by_term('<>:opennlp/s:s');
-      $mt->set_o_start($span->get_o_start);
-      $mt->set_o_end($span->get_o_end);
-      $mt->set_p_end($span->get_p_end);
-      $mt->set_pti(64);
-      $mt->set_payload('<b>0');
+      $stream->pos($span->get_p_start)
+        ->add_span('<>:opennlp/s:s', $span)
+        ->set_payload('<b>0');
       $i++;
     }
   ) or return;
diff --git a/lib/KorAP/XML/Annotation/TreeTagger/Sentences.pm b/lib/KorAP/XML/Annotation/TreeTagger/Sentences.pm
index bdfcbc5..b7ca230 100644
--- a/lib/KorAP/XML/Annotation/TreeTagger/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/TreeTagger/Sentences.pm
@@ -10,13 +10,9 @@
     layer => 'sentences',
     cb => sub {
       my ($stream, $span) = @_;
-      my $mtt = $stream->pos($span->get_p_start);
-      my $mt = $mtt->add_by_term('<>:tt/s:s');
-      $mt->set_o_start($span->get_o_start);
-      $mt->set_o_end($span->get_o_end);
-      $mt->set_p_end($span->get_p_end);
-      $mt->set_pti(64);
-      $mt->set_payload('<b>0'); # Could be 2 as well t/p/s
+      $stream->pos($span->get_p_start)
+        ->add_span('<>:tt/s:s',$span)
+        ->set_payload('<b>0'); # Could be 2 as well t/p/s
       $i++;
     }
   ) or return;
diff --git a/lib/KorAP/XML/Annotation/XIP/Sentences.pm b/lib/KorAP/XML/Annotation/XIP/Sentences.pm
index cc1474c..afb99e6 100644
--- a/lib/KorAP/XML/Annotation/XIP/Sentences.pm
+++ b/lib/KorAP/XML/Annotation/XIP/Sentences.pm
@@ -13,13 +13,9 @@
     cb => sub {
       my ($stream, $span) = @_;
 
-      my $mt = $stream->pos($span->get_p_start)
-        ->add_by_term('<>:xip/s:s');
-      $mt->set_o_start($span->get_o_start);
-      $mt->set_o_end($span->get_o_end);
-      $mt->set_p_end($span->get_p_end);
-      $mt->set_pti(64);
-      $mt->set_payload('<b>0'); # Could be 2 as well for t/p/s
+      $stream->pos($span->get_p_start)
+        ->add_span('<>:xip/s:s', $span)
+        ->set_payload('<b>0'); # Could be 2 as well for t/p/s
       $i++;
     }
   ) or return;
diff --git a/lib/KorAP/XML/Index/MultiTerm.pm b/lib/KorAP/XML/Index/MultiTerm.pm
index db75a38..b72705e 100644
--- a/lib/KorAP/XML/Index/MultiTerm.pm
+++ b/lib/KorAP/XML/Index/MultiTerm.pm
@@ -3,25 +3,23 @@
 use warnings;
 use MIME::Base64;
 
-# Todo: This should store only the pti and the payload - with clever access using the pti!
-# Everything should be stored as bytes already (if this is feasible)
-
 use constant {
   TERM           => 0,
   O_START        => 1,
   O_END          => 2,
   P_START        => 3,
   P_END          => 4,
-  STORED_OFFSETS => 5,
-  PTI            => 6,
-  TUI            => 7,
-  PAYLOAD        => 8,
+  PTI            => 5,
+  TUI            => 6,
+  PAYLOAD        => 7,
+  STORED_OFFSETS => 8,
 };
 
 
 # Construct a multiterm object by passing a term
 sub new {
-  bless [$_[1]], $_[0];
+  my $class = shift;
+  bless [@_], $class;
 };
 
 sub set_payload {
diff --git a/lib/KorAP/XML/Index/MultiTermToken.pm b/lib/KorAP/XML/Index/MultiTermToken.pm
index 35f7d7a..55ee251 100644
--- a/lib/KorAP/XML/Index/MultiTermToken.pm
+++ b/lib/KorAP/XML/Index/MultiTermToken.pm
@@ -21,6 +21,22 @@
   bless [[]], shift;
 };
 
+
+# Add span annotation
+sub add_span {
+  my ($class, $term, $span) = @_;
+  my $mt = KorAP::XML::Index::MultiTerm->new(
+    $term, # Term
+    $span->get_o_start, # o_start
+    $span->get_o_end, # o_end
+    undef, # p_start
+    $span->get_p_end, # p_end
+    64,    # pti
+  );
+  push(@{$_[0]->[MT]}, $mt);
+  $mt;
+};
+
 sub add_by_term {
   my $mt = KorAP::XML::Index::MultiTerm->new($_[1]);
   push(@{$_[0]->[MT]}, $mt);
diff --git a/lib/KorAP/XML/Tokenizer/Units.pm b/lib/KorAP/XML/Tokenizer/Units.pm
index 322df2c..48c83c1 100644
--- a/lib/KorAP/XML/Tokenizer/Units.pm
+++ b/lib/KorAP/XML/Tokenizer/Units.pm
@@ -271,7 +271,7 @@
 
   return if !$to;
   return unless $to > $from;
-  $from ||= 0;
+  $from //= 0;
 
   my $pos = $self->match->lookup($from, $to);