Allow closing elements to start with "text"
Change-Id: Ifa06a3e7ddf8f32ae80763501e30d4a9c7f05365
diff --git a/Changes b/Changes
index d04ebd3..7242737 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+2.4.3 2023-03-02
+ - Allow closing elements to start with "text".
+
2.4.2 2023-02-10
- Improve checks for numerical annotation bounds.
diff --git a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
index f38ba6c..0894a3c 100644
--- a/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
+++ b/lib/KorAP/XML/TEI/Tokenizer/KorAP.pm
@@ -4,7 +4,7 @@
use warnings;
use File::Share ':all';
-our $VERSION = '2.4.1';
+our $VERSION = '2.4.3';
use constant {
WAIT_SECS => 30
diff --git a/script/tei2korapxml b/script/tei2korapxml
index e043e9b..4e4c46e 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -24,7 +24,7 @@
use KorAP::XML::TEI::Header;
use KorAP::XML::TEI::Inline;
-our $VERSION = '2.4.1';
+our $VERSION = '2.4.3';
our $VERSION_MSG = "\ntei2korapxml - v$VERSION\n";
@@ -256,7 +256,7 @@
$_ = replace_entities($_);
# End of text body
- if ((my $pos = index($_, '</' . $_TEXT_BODY)) >= 0) {
+ if ((my $pos = index($_, "</$_TEXT_BODY>")) >= 0) {
# write data.xml, structure.xml and evtl. morpho.xml and/or tokenization files
diff --git a/t/data/text_with_textsigle_in_text.i5.xml b/t/data/text_with_textsigle_in_text.i5.xml
new file mode 100644
index 0000000..7a8af32
--- /dev/null
+++ b/t/data/text_with_textsigle_in_text.i5.xml
@@ -0,0 +1,9 @@
+<idsCorpus>
+ <idsHeader type="text">
+ <textSigle>CORP/DOC.00001</textSigle>
+ </idsHeader>
+ <text>
+ <textSigle>CORP/DOC.00001b</textSigle>
+ <p> This is a text, that starts with *2* (not "twenty-two" ;) blanks!</p>
+ </text>
+</idsCorpus>
diff --git a/t/script.t b/t/script.t
index 2847550..dfec340 100644
--- a/t/script.t
+++ b/t/script.t
@@ -798,4 +798,15 @@
)->stderr_like(qr!No tokenizer chosen!);
};
+subtest 'Test handling of textSigle in text' => sub {
+
+ my $t = test_tei2korapxml(
+ file => catfile($f, 'data', 'text_with_textsigle_in_text.i5.xml'),
+ tmp => 'script_out',
+ param => '-ti'
+ )
+ ->stderr_like(qr!tei2korapxml:.*? text_id=CORP_DOC.00001!)
+ ->stderr_unlike(qr!line with closing text-body tag 'text' contains additional information!);
+};
+
done_testing;