Improve skipping of sentences
Change-Id: I9fba0a92befd7b82bdcde46c5c4a0429c040e6a2
diff --git a/lib/KorAP/XML/TEI/Annotations/Collector.pm b/lib/KorAP/XML/TEI/Annotations/Collector.pm
index a15a98f..ed11d23 100644
--- a/lib/KorAP/XML/TEI/Annotations/Collector.pm
+++ b/lib/KorAP/XML/TEI/Annotations/Collector.pm
@@ -12,12 +12,6 @@
};
-# Dummy annotation that will not be added to output
-sub new_dummy_annotation {
- my $token = KorAP::XML::TEI::Annotations::Annotation->new(@_);
- return $token;
-};
-
# Add new annotation to annotation list
sub add_new_annotation {
my $self = shift;
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 046b98b..b307ceb 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -431,11 +431,6 @@
# (1 = topmost level inside retr_info() = should always be level of tag $_TEXT_BODY)
my $rl = shift;
- my $dummy_anno;
- if ($use_tokenizer_sentence_splits) {
- $dummy_anno = $structures->new_dummy_annotation;
- }
-
# Iteration through all array elements
# ($_[0] is a reference to an array reference)
# See notes on how 'XML::CompactTree::XS' works and
@@ -449,15 +444,16 @@
# from here: tag-node (opening)
#~~~~
- my $anno;
-
# $e->[1] represents the tag name
+ # Skip sentences
if ($use_tokenizer_sentence_splits && $e->[1] eq "s") {
- $anno = $dummy_anno;
- } else {
- $anno = $structures->add_new_annotation($e->[1]);
+ if (defined $e->[$_IDX]) {
+ retr_info($rl+1, \$e->[$_IDX]);
+ }
+ next;
}
+ my $anno = $structures->add_new_annotation($e->[1]);
# Add element also to token list
if ($_TOKENS_PROC && $e->[1] eq $_TOKENS_TAG) {