Improve handling of unknown header types

Change-Id: I5c1f0d12c9dcb11421745c0ca55865e4efc388db
diff --git a/Changes b/Changes
index f30ae16..c2efa8d 100644
--- a/Changes
+++ b/Changes
@@ -10,6 +10,7 @@
         - Deprecate KORAPXMLTEI_INLINE environment variable
           in favor of --skip-inline-token-annotations
         - Improve script handling of broken data
+        - Improve handling of unknown header types
 
 1.00 2021-02-18 Release
         - -s option added that uses sentence boundaries
diff --git a/lib/KorAP/XML/TEI/Header.pm b/lib/KorAP/XML/TEI/Header.pm
index 870a2f9..a5adac1 100644
--- a/lib/KorAP/XML/TEI/Header.pm
+++ b/lib/KorAP/XML/TEI/Header.pm
@@ -25,6 +25,7 @@
 # convert header type to sigle type
 our %sig = (
   corpus   => 'korpusSigle',
+  doc      => 'dokumentSigle',
   document => 'dokumentSigle',
   text     => 'textSigle'
 );
@@ -39,6 +40,10 @@
   # Check header types to distinguish between siglen types
   if ($text =~ m!^<${_HEADER_TAG}\s+[^<]*type="([^"]+)"!) {
     $self->[HEADTYPE] = $1;
+
+    unless (exists $sig{$1}) {
+      $log->error("Unknown header type '$1' - treated as textSigle");
+    };
   }
 
   # Unexpected header init
diff --git a/t/data/goe_sample.i5.xml b/t/data/goe_sample.i5.xml
index e29daa5..6b52bcd 100644
--- a/t/data/goe_sample.i5.xml
+++ b/t/data/goe_sample.i5.xml
@@ -296,7 +296,7 @@
   </profileDesc>
  </idsHeader>
  <idsDoc type="text" version="1.0" TEIform="TEI.2">
-  <idsHeader type="document" pattern="text" status="new" version="1.1" TEIform="teiHeader">
+  <idsHeader type="doc" pattern="text" status="new" version="1.1" TEIform="teiHeader">
    <fileDesc>
     <titleStmt>
      <dokumentSigle>GOE/AGA</dokumentSigle>