Fix parser error when closing body and text tags are on the same line
######.............. 33.4% ETA: 1:07input line number 869761: line
with closing text-body tag 'text' contains additional information ... =>
Aborting (line= </body> </text>
)
input line number 869761: line with closing text-body tag 'text'
contains additional information ... => Aborting (line=
</body> </text>
) at /opt/perl/perlbrew/perls/perl-5.38.2/bin/tei2korapxml line 314,
<$input_fh> line 869761.
Change-Id: I37facd51e6906760c2ab0c35bd6971145b76c513
diff --git a/t/script.t b/t/script.t
index 2989e72..b892236 100644
--- a/t/script.t
+++ b/t/script.t
@@ -809,6 +809,54 @@
->stderr_unlike(qr!line with closing text-body tag 'text' contains additional information!);
};
+subtest 'Handling of closing body and text tags on same line' => sub {
+
+ # Create a custom test file where </body> and </text> are on the same line
+ my ($fh, $testfile) = korap_tempfile('script_closing_tags');
+ print $fh <<'XML';
+<?xml version="1.0" encoding="UTF-8"?>
+<idsCorpus>
+ <idsHeader type="corpus">
+ <fileDesc>
+ <titleStmt>
+ <korpusSigle>AAA</korpusSigle>
+ </titleStmt>
+ </fileDesc>
+ </idsHeader>
+ <idsDoc version="1.0">
+ <idsHeader type="document">
+ <fileDesc>
+ <titleStmt>
+ <dokumentSigle>AAA/BBB</dokumentSigle>
+ </titleStmt>
+ </fileDesc>
+ </idsHeader>
+ <idsText version="1.0">
+ <idsHeader type="text">
+ <fileDesc>
+ <titleStmt>
+ <textSigle>AAA/BBB.00000</textSigle>
+ </titleStmt>
+ </fileDesc>
+ </idsHeader>
+ <text>
+ <body><p>some text</p>
+ </body> </text>
+ </idsText>
+ </idsDoc>
+</idsCorpus>
+XML
+ close($fh);
+
+ test_tei2korapxml(
+ file => $testfile,
+ tmp => 'script_closing_tags_out',
+ param => '-ti'
+ )
+ ->stderr_like(qr!tei2korapxml:.*? text_id=AAA_BBB\.00000!)
+ ->stderr_unlike(qr!line with closing text-body tag 'text' contains additional information!);
+};
+
subtest 'Handling of whitespace at linebreaks' => sub {
my $t = test_tei2korapxml(
file => catfile($f, 'data', 'stadigmer.p5.xml'),