Introduce --skip-inline-tags parameter

Change-Id: Icc6317a7076b5164c16b8cfd63b2758445aff71c
diff --git a/script/tei2korapxml b/script/tei2korapxml
index fb58d8e..af1bc05 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -62,6 +62,7 @@
   'inline-tokens=s'       => \(my $inline_tokens = 'tokens#morpho'),
   'inline-structures=s'   => \(my $inline_structures = 'struct#structure'),
   'skip-inline-tokens'    => \(my $skip_inline_tokens = 0),
+  'skip-inline-tags=s'    => \(my $skip_inline_tags_str = ''),
   'base-foundry=s'        => \(my $base_dir    = 'base'),
   'data-file=s'           => \(my $data_file   = 'data'),
   'header-file=s'         => \(my $header_file = 'header'),
@@ -110,6 +111,14 @@
     );
 };
 
+# Remember to skip certain inline tags
+my %skip_inline_tags = ();
+if ($skip_inline_tags_str) {
+  foreach (split /\s*,\s*/, $skip_inline_tags_str) {
+    $skip_inline_tags{$_} = 1;
+  };
+};
+
 # External tokenization
 my $ext_tok;
 if ($tokenizer_call) {
@@ -118,6 +127,9 @@
 
 elsif ($tokenizer_korap) {
   $ext_tok = KorAP::XML::TEI::Tokenizer::KorAP->new($use_tokenizer_sentence_splits);
+  if ($use_tokenizer_sentence_splits) {
+    $skip_inline_tags{s} = 1;
+  };
 };
 
 
@@ -444,8 +456,8 @@
       # This is likely to be optimized away by the compiler.
       my $children = $e->[DEBUG ? 5 : 4];
 
-      # Skip sentences
-      if ($use_tokenizer_sentence_splits && $node_info eq 's') {
+      # Skip certain tags
+      if ($skip_inline_tags{$node_info}) {
         descend($depth + 1, $children) if defined $children;
         next;
       };
@@ -675,6 +687,11 @@
 Boolean flag indicating that inline tokens should not
 be processed. Defaults to false (meaning inline tokens will be processed).
 
+=item B<--skip-inline-tags>
+
+Expects a comma-separated list of tags to be ignored when the structure
+is parsed. Content of these tags however will be processed.
+
 =item B<--inline-tokens> <foundry>#[<file>]
 
 Define the foundry and file (without extension)