Improve skipping of sentences Change-Id: I9fba0a92befd7b82bdcde46c5c4a0429c040e6a2

commit: ace1277d71f643cea7a022d7cc57e36788ea781d [log] [tgz]
author: Akron <nils@diewald-online.de> Fri Feb 19 13:16:26 2021 +0100
committer: Akron <nils@diewald-online.de> Tue Feb 23 16:19:31 2021 +0100
tree: 0293b47d2a7a93522d2863ac7db028fa696de196
parent: 0529e51347476b428dfce5fb98fd3a887e2b021c [diff]
diff --git a/lib/KorAP/XML/TEI/Annotations/Collector.pm b/lib/KorAP/XML/TEI/Annotations/Collector.pm
index a15a98f..ed11d23 100644
--- a/lib/KorAP/XML/TEI/Annotations/Collector.pm
+++ b/lib/KorAP/XML/TEI/Annotations/Collector.pm

@@ -12,12 +12,6 @@
 };
 
 
-# Dummy annotation that will not be added to output
-sub new_dummy_annotation {
-  my $token = KorAP::XML::TEI::Annotations::Annotation->new(@_);
-  return $token;
-};
-
 # Add new annotation to annotation list
 sub add_new_annotation {
   my $self = shift;

diff --git a/script/tei2korapxml b/script/tei2korapxml
index 046b98b..b307ceb 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml

@@ -431,11 +431,6 @@
   # (1 = topmost level inside retr_info() = should always be level of tag $_TEXT_BODY)
   my $rl = shift;
 
-  my $dummy_anno;
-  if ($use_tokenizer_sentence_splits) {
-    $dummy_anno = $structures->new_dummy_annotation;
-  }
-
   # Iteration through all array elements
   # ($_[0] is a reference to an array reference)
   # See notes on how 'XML::CompactTree::XS' works and
@@ -449,15 +444,16 @@
       # from here: tag-node (opening)
       #~~~~
 
-      my $anno;
-
       # $e->[1] represents the tag name
+      # Skip sentences
       if ($use_tokenizer_sentence_splits && $e->[1] eq "s") {
-        $anno = $dummy_anno;
-      } else {
-        $anno = $structures->add_new_annotation($e->[1]);
+        if (defined $e->[$_IDX]) {
+          retr_info($rl+1, \$e->[$_IDX]);
+        }
+        next;
       }
 
+      my $anno = $structures->add_new_annotation($e->[1]);
 
       # Add element also to token list
       if ($_TOKENS_PROC && $e->[1] eq $_TOKENS_TAG) {
commit	ace1277d71f643cea7a022d7cc57e36788ea781d	[log] [tgz]
author	Akron <nils@diewald-online.de>	Fri Feb 19 13:16:26 2021 +0100
committer	Akron <nils@diewald-online.de>	Tue Feb 23 16:19:31 2021 +0100
tree	0293b47d2a7a93522d2863ac7db028fa696de196
parent	0529e51347476b428dfce5fb98fd3a887e2b021c [diff]