Support --inline-structures parameter
Change-Id: I4e0e951f2f688e42b52818b86a22f7cb722e67dc
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 785621e..9a0cbf9 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -49,6 +49,7 @@
'tokenizer-internal|ti' => \(my $tokenizer_intern), # use intern tokenization (default = no)
'use-tokenizer-sentence-splits|s' => (\my $use_tokenizer_sentence_splits), # use KorAP tokenizer to split s (default=no)
'inline-tokens=s' => \(my $inline_tokens = 'tokens#morpho'),
+ 'inline-structures=s' => \(my $inline_structures = 'struct#structure'),
'log|l=s' => \(my $log_level = 'notice'),
'help|h' => sub {
pod2usage(
@@ -121,13 +122,17 @@
my $_header_file = "header.xml"; # name of files containing the text, document and corpus header
my $_data_file = "data.xml"; # name of file containing the primary text data (tokens)
-my $_structure_dir = "struct"; # name of directory containing the $_structure_file
-my $_structure_file = "structure.xml"; # name of file containing all tags (except ${_TOKEN_TAG}'s) related information
- # (= their names and byte offsets in $_data)
+
## TODO: optional (different annotation tools can produce more zip-files for feeding into KorAP-XML-Krill)
my $_TOKENS_PROC = 1; # on/off: processing of ${_TOKEN_TAG}'s (default: 1)
+# Name of the directory and the file containing all inline structure informations
+# except for $_TOKEN_TAG information
+my ($_structure_dir, $_structure_file) = split '#', $inline_structures . '#structure';
+$_structure_file .= '.xml';
+
+
# Name of the directory and the file containing all inline token informations
# i.e. tokens of the $_TOKENS_TAG, if $_TOKENS_PROC is set
my ($_tokens_dir, $_tokens_file) = split '#', $inline_tokens . '#morpho';
@@ -714,6 +719,12 @@
annotations as well.
Defaults to C<tokens> and C<morpho>.
+=item B<--inline-structures> <foundry>#[<file>]
+
+Define the foundry and file (without extension)
+to store inline structure information in.
+Defaults to C<struct> and C<structures>.
+
=item B<--use-tokenizer-sentence-splits|-s>
Replace existing with, or add new, sentence boundary information