Improve skipping of sentences
Change-Id: I9fba0a92befd7b82bdcde46c5c4a0429c040e6a2
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 046b98b..b307ceb 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -431,11 +431,6 @@
# (1 = topmost level inside retr_info() = should always be level of tag $_TEXT_BODY)
my $rl = shift;
- my $dummy_anno;
- if ($use_tokenizer_sentence_splits) {
- $dummy_anno = $structures->new_dummy_annotation;
- }
-
# Iteration through all array elements
# ($_[0] is a reference to an array reference)
# See notes on how 'XML::CompactTree::XS' works and
@@ -449,15 +444,16 @@
# from here: tag-node (opening)
#~~~~
- my $anno;
-
# $e->[1] represents the tag name
+ # Skip sentences
if ($use_tokenizer_sentence_splits && $e->[1] eq "s") {
- $anno = $dummy_anno;
- } else {
- $anno = $structures->add_new_annotation($e->[1]);
+ if (defined $e->[$_IDX]) {
+ retr_info($rl+1, \$e->[$_IDX]);
+ }
+ next;
}
+ my $anno = $structures->add_new_annotation($e->[1]);
# Add element also to token list
if ($_TOKENS_PROC && $e->[1] eq $_TOKENS_TAG) {