Introduce --skip-inline-token-annotations parameter in favor of KORAPXMLTEI_INLINE
Change-Id: I6eec3873f872b07ab60de720e5a198c6e453d1d6
diff --git a/Changes b/Changes
index 9a8ad95..d122f20 100644
--- a/Changes
+++ b/Changes
@@ -6,6 +6,9 @@
- Minor cleanups and improvements
- Introduce --skip-inline-tags parameter
- Introduce KorAP::XML::TEI::Inline class
+ - Introduce --skip-inline-token-annotations parameter
+ - Deprecate KORAPXMLTEI_INLINE environment variable
+ in favor of --skip-inline-token-annotations
1.00 2021-02-18 Release
- -s option added that uses sentence boundaries
diff --git a/Readme.pod b/Readme.pod
index 5bf127f..f9814a2 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -117,6 +117,12 @@
Boolean flag indicating that inline tokens should not
be processed. Defaults to false (meaning inline tokens will be processed).
+=item B<--skip-inline-token-annotations>
+
+Boolean flag indicating that inline token annotations should not
+be processed. Defaults to true (meaning inline token annotations
+won't be processed).
+
=item B<--skip-inline-tags> <tags>
Expects a comma-separated list of tags to be ignored when the structure
@@ -182,11 +188,6 @@
Activate minimal debugging.
Defaults to C<false>.
-=item B<KORAPXMLTEI_INLINE>
-
-Process inline annotations, if present.
-Defaults to C<false>.
-
=back
=head1 COPYRIGHT AND LICENSE
@@ -206,4 +207,4 @@
This program is free software published under the
L<BSD-2 License|https://opensource.org/licenses/BSD-2-Clause>.
-=cut
\ No newline at end of file
+=cut
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 5740407..e0b07ea 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -38,6 +38,10 @@
DEBUG => $ENV{KORAPXMLTEI_DEBUG} // 0
};
+if ($ENV{KORAPXMLTEI_INLINE}) {
+ warn 'KORAPXMLTEI_INLINE is deprecated in favor of --skip-inline-token-annotations';
+};
+
# Parse options from the command line
GetOptions(
'root|r=s' => \(my $root_dir = '.'),
@@ -49,6 +53,8 @@
'inline-tokens=s' => \(my $inline_tokens = 'tokens#morpho'),
'inline-structures=s' => \(my $inline_structures = 'struct#structure'),
'skip-inline-tokens' => \(my $skip_inline_tokens = 0),
+ 'skip-inline-token-annotations' => \(
+ my $skip_inline_token_annotations = ($ENV{KORAPXMLTEI_INLINE} ? 0 : 1)),
'skip-inline-tags=s' => \(my $skip_inline_tags_str = ''),
'base-foundry=s' => \(my $base_dir = 'base'),
'data-file=s' => \(my $data_file = 'data'),
@@ -130,9 +136,6 @@
# i.e. tokens of the $_TOKENS_TAG, if $_TOKENS_PROC is set
my ($_tokens_dir, $_tokens_file) = split '#', $inline_tokens . '#morpho';
-# Handling inline annotations (inside $_TOKENS_TAG)
-my $_INLINE_ANNOT = $ENV{KORAPXMLTEI_INLINE} ? 1 : 0;
-
# Initialize zipper
my $zipper = KorAP::XML::TEI::Zipper->new($root_dir);
@@ -282,7 +285,8 @@
$inline->tokens->to_zip(
$zipper->new_stream("$dir/$_tokens_dir/${_tokens_file}.xml"),
$text_id_esc,
- $_INLINE_ANNOT # Either 0 = tokens without inline or 1 = tokens with inline
+ # Either 0 = tokens without inline or 1 = tokens with inline
+ !$skip_inline_token_annotations
);
};
@@ -504,6 +508,12 @@
Boolean flag indicating that inline tokens should not
be processed. Defaults to false (meaning inline tokens will be processed).
+=item B<--skip-inline-token-annotations>
+
+Boolean flag indicating that inline token annotations should not
+be processed. Defaults to true (meaning inline token annotations
+won't be processed).
+
=item B<--skip-inline-tags> <tags>
Expects a comma-separated list of tags to be ignored when the structure
@@ -569,11 +579,6 @@
Activate minimal debugging.
Defaults to C<false>.
-=item B<KORAPXMLTEI_INLINE>
-
-Process inline annotations, if present.
-Defaults to C<false>.
-
=back
=head1 COPYRIGHT AND LICENSE
diff --git a/t/script.t b/t/script.t
index 6f1d2d1..614ba1d 100644
--- a/t/script.t
+++ b/t/script.t
@@ -506,6 +506,7 @@
tmp => 'script_tagged'
)
->stderr_like(qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!)
+ ->stderr_like(qr!KORAPXMLTEI_INLINE is deprecated!)
# Check zip using xml loy
->unzip_xml('GOE/AGA/00000/tokens/morpho.xml')
@@ -584,11 +585,11 @@
my $t = test_tei2korapxml(
file => $file,
- env => 'KORAPXMLTEI_INLINE=1',
tmp => 'script_tagged',
- param => '--inline-tokens=myfoundry#myfile'
+ param => '--inline-tokens=myfoundry#myfile --skip-inline-token-annotations=0'
)
->stderr_like(qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!)
+ ->stderr_unlike(qr!KORAPXMLTEI_INLINE is deprecated!)
->file_exists_not('GOE/AGA/00000/tokens/morpho.xml', 'Morpho not generated')
@@ -604,9 +605,8 @@
$t = test_tei2korapxml(
file => $file,
- env => 'KORAPXMLTEI_INLINE=1',
tmp => 'script_tagged',
- param => '--inline-tokens=myfoundry'
+ param => '--inline-tokens=myfoundry --skip-inline-token-annotations=0'
)
->stderr_like(qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!)
@@ -632,14 +632,14 @@
# Generate zip file (unportable!)
stderr_like(
- sub { `cat '$file' | KORAPXMLTEI_INLINE=1 perl '$script' > '$outzip'` },
+ sub { `cat '$file' | perl '$script' --skip-token-inline-annotations=0 > '$outzip'` },
qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!,
'Processing 1'
);
# TODO: there should be a better way to test this
stderr_unlike(
- sub { `cat '$file' | KORAPXMLTEI_INLINE=1 perl '$script' > '$outzip'` },
+ sub { `cat '$file' | perl '$script' --skip-token-inline-annotations=0 > '$outzip'` },
qr!.*undefined value.*!,
'Processing 2'
);
@@ -666,8 +666,8 @@
# Load example file
test_tei2korapxml(
file => catfile($f, 'data', 'goe_sample.i5.xml'),
- env => 'KORAPXMLTEI_INLINE=1',
- tmp => 'script_utf8_enc'
+ tmp => 'script_utf8_enc',
+ param => '--skip-inline-token-annotations=0',
)
->stderr_like(qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!)
->unzip_xml('GOE/AGA/00000/data.xml')
@@ -677,7 +677,7 @@
test_tei2korapxml(
file => catfile($f, 'data', 'goe_sample.i5.iso.xml'),
- env => 'KORAPXMLTEI_INLINE=1',
+ param => '--skip-inline-token-annotations=0',
tmp => 'script_iso_enc'
)
->stderr_like(qr!tei2korapxml:.*? text_id=GOE_AGA\.00000!)