Improve checks regarding annotation boundaries

Resolves #6

Change-Id: I3a41d239ea44923720bb7d557de26f30f285ae67
diff --git a/Changes b/Changes
index 2c4883f..d04ebd3 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+2.4.2 2023-02-10
+        - Improve checks for numerical annotation bounds.
+
 2.4.1 2023-02-07
         - Fix test.
 
diff --git a/lib/KorAP/XML/TEI/Annotations/Annotation.pm b/lib/KorAP/XML/TEI/Annotations/Annotation.pm
index 305de49..892c3fd 100644
--- a/lib/KorAP/XML/TEI/Annotations/Annotation.pm
+++ b/lib/KorAP/XML/TEI/Annotations/Annotation.pm
@@ -3,6 +3,7 @@
 use warnings;
 use Log::Any '$log';
 use KorAP::XML::TEI 'escape_xml';
+use Scalar::Util qw'looks_like_number';
 
 # TODO:
 #   Make these parameters passable from the script
@@ -37,6 +38,23 @@
 # Create a new annotation object
 sub new {
   my $class = shift;
+
+  if (defined $_[1]) {
+    unless (looks_like_number($_[1])) {
+      return;
+    };
+
+    if (defined $_[2]) {
+      unless (looks_like_number($_[2])) {
+        return;
+      };
+
+      if (defined $_[3] && !looks_like_number($_[3])) {
+        return;
+      };
+    };
+  };
+
   my $self = bless [@_], $class;
 
   # Ensure minimum length for pushing attributes
@@ -47,7 +65,12 @@
 
 # Set 'from'
 sub set_from {
-  $_[0]->[FROM] = $_[1];
+  if (looks_like_number($_[1])) {
+    $_[0]->[FROM] = $_[1];
+    return 1;
+  };
+  $log->fatal('Passed non-numeric value as annotation start');
+  return;
 };
 
 
@@ -59,7 +82,12 @@
 
 # Set 'to'
 sub set_to {
-  $_[0]->[TO] = $_[1];
+  if (looks_like_number($_[1])) {
+    $_[0]->[TO] = $_[1];
+    return 1;
+  };
+  $log->fatal('Passed non-numeric value as annotation end');
+  return;
 };
 
 
@@ -73,7 +101,12 @@
 sub set_level {
   # Insert information about depth of element in XML-tree
   # (top element = level 1)
-  $_[0]->[LEVEL] = $_[1];
+  if (looks_like_number($_[1])) {
+    $_[0]->[LEVEL] = $_[1];
+    return 1;
+  };
+  $log->fatal('Passed non-numeric value as annotation level');
+  return;
 };
 
 
diff --git a/lib/KorAP/XML/TEI/Annotations/Collector.pm b/lib/KorAP/XML/TEI/Annotations/Collector.pm
index 887bbb0..6fd69da 100644
--- a/lib/KorAP/XML/TEI/Annotations/Collector.pm
+++ b/lib/KorAP/XML/TEI/Annotations/Collector.pm
@@ -14,7 +14,7 @@
 # Add new annotation to annotation list
 sub add_new_annotation {
   my $self = shift;
-  my $token = KorAP::XML::TEI::Annotations::Annotation->new(@_);
+  my $token = KorAP::XML::TEI::Annotations::Annotation->new(@_) or return;
   push @$self, $token;
   return $token;
 };
diff --git a/lib/KorAP/XML/TEI/Tokenizer/External.pm b/lib/KorAP/XML/TEI/Tokenizer/External.pm
index 92f0c31..e0f4f29 100644
--- a/lib/KorAP/XML/TEI/Tokenizer/External.pm
+++ b/lib/KorAP/XML/TEI/Tokenizer/External.pm
@@ -6,6 +6,7 @@
 use IO::Select;
 use IPC::Open2 qw(open2);
 use Encode qw(encode);
+use Scalar::Util qw'looks_like_number';
 
 # This tokenizer starts an external process for
 # tokenization. It writes the data to tokenize
@@ -126,6 +127,9 @@
     # Serialize all bounds
     my $c = 0;
     for (my $i = 0; $i < @bounds; $i +=  2 ){
+      unless (looks_like_number($bounds[$i]) && looks_like_number($bounds[$i+1])) {
+        die $log->fatal("Token bounds not numerical from external tokenizer ('$text_id')");
+      };
       $output .= qq!    <span id="t_$c" from="! . $bounds[$i] . '" to="' .
         $bounds[$i+1] . qq!" />\n!;
       $c++;
@@ -184,9 +188,9 @@
   my ($self, $structures) = @_;
 
   for (my $i=0; $i < @{$self->{sentence_starts}}; $i++) {
-    my $anno = $structures->add_new_annotation("s");
-    $anno->set_from($self->{sentence_starts}[$i]);
-    $anno->set_to($self->{sentence_endss}[$i]);
+    my $anno = $structures->add_new_annotation('s');
+    $anno->set_from($self->{sentence_starts}[$i]) or die $log->fatal('Sentence boundaries not numerical');
+    $anno->set_to($self->{sentence_endss}[$i]) or die $log->fatal('Sentence boundaries not numerical');
     $anno->set_level(-1);
   }
   $self->{sentence_starts} = [];
diff --git a/t/annotation.t b/t/annotation.t
index 1176c98..998e7ea 100644
--- a/t/annotation.t
+++ b/t/annotation.t
@@ -78,6 +78,11 @@
     ->text_is('span > fs > f > fs f:nth-of-type(3)', 'C & A')
 };
 
+subtest 'Wrong annotations' => sub {
+  ok(!KorAP::XML::TEI::Annotations::Annotation->new('p','Error',0));
+  ok(!KorAP::XML::TEI::Annotations::Annotation->new('p',0,'Occurred'));
+  ok(!KorAP::XML::TEI::Annotations::Annotation->new('p',0,5,'Fehler'));
+};
 
 done_testing;
 
diff --git a/t/annotations-collect.t b/t/annotations-collect.t
index 543e693..aa67937 100644
--- a/t/annotations-collect.t
+++ b/t/annotations-collect.t
@@ -44,7 +44,21 @@
 
 my $anno = KorAP::XML::TEI::Annotations::Annotation->new('x4', 20 => 21);
 
-$t->add_annotation($anno);
+ok($anno);
+
+ok($t->add_annotation($anno));
+
+$loy = Test::XML::Loy->new($t->to_string('text',0))
+  ->attr_is('layer', 'docid', 'text')
+  ->attr_is('span#s0', 'to', '8')
+  ->attr_is('span#s1', 'to', '14')
+  ->attr_is('span#s1', 'l', '2')
+  ->attr_is('span#s2', 'to', '20')
+  ->attr_is('span#s3', 'from', '20')
+  ->attr_is('span#s3', 'to', '21')
+;
+
+ok(!$t->add_new_annotation('x1','error','occurred'));
 
 $loy = Test::XML::Loy->new($t->to_string('text',0))
   ->attr_is('layer', 'docid', 'text')