Cleanup: Simplify header types

Change-Id: I95ce4f8bf56c2dcf0cd8db504a0874313abb84e7
diff --git a/script/tei2korapxml b/script/tei2korapxml
index aad3006..046b98b 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -80,14 +80,12 @@
 #
 # ~~~ parameter (mandatory) ~~~
 #
-my $_TEXT_BODY        = "text";                        # tag (without attributes), which contains the primary text
+# tag (without attributes), which contains the primary text
+my $_TEXT_BODY = 'text';
 # optional
-my $_CORP_HEADER_BEG  = "idsHeader type=\"corpus\"";   # just keep the correct order of the attributes and evtl. add an '.*' between them
-# optional
-my $_DOC_HEADER_BEG   = "idsHeader type=\"document\""; # analog
-# mandatory
-my $_TEXT_HEADER_BEG  = "idsHeader type=\"text\"";     # analog
 
+# TODO: IDS-specific (and redundant)
+my $_HEADER_TAG = 'idsHeader';
 
 if ($use_tokenizer_sentence_splits && !$tokenizer_korap) {
   die $log->fatal("Sentence splitting is currently only supported by KorAP tokenizer (use -tk to activate it");
@@ -182,14 +180,6 @@
 
 $fval = 0;
 
-# Normalize regex for header parsing
-for ($_CORP_HEADER_BEG,
-     $_DOC_HEADER_BEG,
-     $_TEXT_HEADER_BEG) {
-  s!^([^\s]+)(.*)$!$1\[\^>\]*$2!;
-};
-
-
 # ~ read input and write output (text by text) ~
 
 my $tl = 0; # text line (needed for whitespace handling)
@@ -387,7 +377,7 @@
       $buf_in .= $_;
     };
 
-  } elsif (m#^(.*)(<(?:${_TEXT_HEADER_BEG}|${_DOC_HEADER_BEG}|${_CORP_HEADER_BEG}).*)$#) {
+  } elsif (m#^(.*)(\<${_HEADER_TAG}[^>]*?type=["'].*)$#) {
 
     # ~ start of header ~
     my $content = "$2\n";