Support --inline-structures parameter Change-Id: I4e0e951f2f688e42b52818b86a22f7cb722e67dc

commit: dd0be8fc2e5774b207c2a92037ed0e5a28ecd59b [log] [tgz]
author: Akron <nils@diewald-online.de> Thu Feb 18 19:29:41 2021 +0100
committer: Akron <nils@diewald-online.de> Tue Feb 23 15:17:53 2021 +0100
tree: e5749495065cf196de4ba051690cebc7ee5d8255
parent: d658df73a6bd03ac1099a40733a1d7739035e3e7 [diff] [blame]
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 785621e..9a0cbf9 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml

@@ -49,6 +49,7 @@
   'tokenizer-internal|ti' => \(my $tokenizer_intern), # use intern tokenization (default = no)
   'use-tokenizer-sentence-splits|s' => (\my $use_tokenizer_sentence_splits), # use KorAP tokenizer to split s (default=no)
   'inline-tokens=s' => \(my $inline_tokens = 'tokens#morpho'),
+  'inline-structures=s' => \(my $inline_structures = 'struct#structure'),
   'log|l=s' => \(my $log_level = 'notice'),
   'help|h'    => sub {
     pod2usage(
@@ -121,13 +122,17 @@
 
 my $_header_file     = "header.xml";                 # name of files      containing the  text, document and corpus header
 my $_data_file       = "data.xml";                   # name of file       containing the  primary text data (tokens)
-my $_structure_dir   = "struct";                     # name of directory  containing the  $_structure_file
-my $_structure_file  = "structure.xml";              # name of file       containing all  tags (except ${_TOKEN_TAG}'s) related information
-                                                     #                                     (= their names and byte offsets in $_data)
+
 ## TODO: optional (different annotation tools can produce more zip-files for feeding into KorAP-XML-Krill)
 my $_TOKENS_PROC     = 1;                            # on/off: processing of ${_TOKEN_TAG}'s (default: 1)
 
 
+# Name of the directory and the file containing all inline structure informations
+# except for $_TOKEN_TAG information
+my ($_structure_dir, $_structure_file) = split '#', $inline_structures . '#structure';
+$_structure_file .= '.xml';
+
+
 # Name of the directory and the file containing all inline token informations
 # i.e. tokens of the $_TOKENS_TAG, if $_TOKENS_PROC is set
 my ($_tokens_dir, $_tokens_file) = split '#', $inline_tokens . '#morpho';
@@ -714,6 +719,12 @@
 annotations as well.
 Defaults to C<tokens> and C<morpho>.
 
+=item B<--inline-structures> <foundry>#[<file>]
+
+Define the foundry and file (without extension)
+to store inline structure information in.
+Defaults to C<struct> and C<structures>.
+
 =item B<--use-tokenizer-sentence-splits|-s>
 
 Replace existing with, or add new, sentence boundary information
commit	dd0be8fc2e5774b207c2a92037ed0e5a28ecd59b	[log] [tgz]
author	Akron <nils@diewald-online.de>	Thu Feb 18 19:29:41 2021 +0100
committer	Akron <nils@diewald-online.de>	Tue Feb 23 15:17:53 2021 +0100
tree	e5749495065cf196de4ba051690cebc7ee5d8255
parent	d658df73a6bd03ac1099a40733a1d7739035e3e7 [diff] [blame]