Improve support for text siglen including underscore

Change-Id: I86204f2aae32834e1592335dca83f40cbf46f559
diff --git a/Changes b/Changes
index 63f3079..c6c3553 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,7 @@
+0.48 2022-11-10
+        - Improve support for text siglen including
+          underscore in corpus parts.
+
 0.47 2022-08-08
         - Support for preferred language transformation.
         - Support for NKJP taxonomies.
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index 564cc14..ebe55c1 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
 
 our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
 
-our $VERSION = '0.47';
+our $VERSION = '0.48';
 
 has 'path';
 has [qw/text_sigle doc_sigle corpus_sigle/];
@@ -86,7 +86,7 @@
 
   # Get document id and corpus id
   if ($rt && $rt->{'-docid'}) {
-    if ($rt->{'-docid'} =~ /^([^_]+)_([^\._]+?)\.(.+?)$/) {
+    if ($rt->{'-docid'} =~ /^(.+?)_([^\._]+)\.(.+)$/) {
       $self->text_sigle(join('/', $1, $2, $3));
       $self->doc_sigle(join('/', $1, $2));
       $self->corpus_sigle($1);
diff --git a/t/corpus/artificial/base/paragraph.xml b/t/corpus/artificial/base/paragraph.xml
index be19d62..6aee3cc 100644
--- a/t/corpus/artificial/base/paragraph.xml
+++ b/t/corpus/artificial/base/paragraph.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer docid="ART_00001" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+<layer docid="A_RT_ABC.00001" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
   <spanList>
     <span from="0" to="129" />
   </spanList>
diff --git a/t/corpus/artificial/base/sentences.xml b/t/corpus/artificial/base/sentences.xml
index be19d62..6aee3cc 100644
--- a/t/corpus/artificial/base/sentences.xml
+++ b/t/corpus/artificial/base/sentences.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer docid="ART_00001" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
+<layer docid="A_RT_ABC.00001" xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4">
   <spanList>
     <span from="0" to="129" />
   </spanList>
diff --git a/t/corpus/artificial/connexor/morpho.xml b/t/corpus/artificial/connexor/morpho.xml
index 637ec63..5b8d30d 100644
--- a/t/corpus/artificial/connexor/morpho.xml
+++ b/t/corpus/artificial/connexor/morpho.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="ART_00001">
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="A_RT_ABC.00001">
   <spanList>
     <span id="s8" from="0" to="2">
       <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
diff --git a/t/corpus/artificial/connexor/phrase.xml b/t/corpus/artificial/connexor/phrase.xml
index 2b7636b..61943d4 100644
--- a/t/corpus/artificial/connexor/phrase.xml
+++ b/t/corpus/artificial/connexor/phrase.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="ART_00001">
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="A_RT_ABC.00001">
   <spanList>
     <span from="4" to="30">
       <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
diff --git a/t/corpus/artificial/connexor/sentences.xml b/t/corpus/artificial/connexor/sentences.xml
index 9dfe84b..bf3b270 100644
--- a/t/corpus/artificial/connexor/sentences.xml
+++ b/t/corpus/artificial/connexor/sentences.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="ART_00001">
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="A_RT_ABC.00001">
   <spanList>
     <span from="0" to="129" />
   </spanList>
diff --git a/t/corpus/artificial/connexor/syntax.xml b/t/corpus/artificial/connexor/syntax.xml
index 5b69c0d..6a1529a 100644
--- a/t/corpus/artificial/connexor/syntax.xml
+++ b/t/corpus/artificial/connexor/syntax.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="A01_APR.13047">
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" version="KorAP-0.4" docid="A_RT_ABC.00001">
   <spanList>
     <span id="s8" from="0" to="2">
       <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
diff --git a/t/corpus/artificial/corenlp/constituency.xml b/t/corpus/artificial/corenlp/constituency.xml
index 37a40db..356bb4b 100644
--- a/t/corpus/artificial/corenlp/constituency.xml
+++ b/t/corpus/artificial/corenlp/constituency.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A01_APR.13047" version="KorAP-0.4">
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A_RT_ABC.00001" version="KorAP-0.4">
    <spanList>
       <span id="s1_n1" from="0" to="42">
          <fs xmlns="http://www.tei-c.org/ns/1.0" type="node">
diff --git a/t/corpus/artificial/corenlp/morpho.xml b/t/corpus/artificial/corenlp/morpho.xml
index 6712c0a..fb6e31e 100644
--- a/t/corpus/artificial/corenlp/morpho.xml
+++ b/t/corpus/artificial/corenlp/morpho.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="ART_00001" version="KorAP-0.4">
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A_RT_ABC.00001" version="KorAP-0.4">
    <spanList>
       <span id="s2_n4" from="0" to="3">
          <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
diff --git a/t/corpus/artificial/corenlp/ne_dewac_175m_600.xml b/t/corpus/artificial/corenlp/ne_dewac_175m_600.xml
index 6359ac8..fef3efc 100644
--- a/t/corpus/artificial/corenlp/ne_dewac_175m_600.xml
+++ b/t/corpus/artificial/corenlp/ne_dewac_175m_600.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="ART_00001" version="KorAP-0.4">
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A_RT_ABC.00001" version="KorAP-0.4">
    <spanList>
       <span id="s_18" from="64" to="73">
          <fs xmlns="http://www.tei-c.org/ns/1.0" type="ne">
diff --git a/t/corpus/artificial/corenlp/ne_hgc_175m_600.xml b/t/corpus/artificial/corenlp/ne_hgc_175m_600.xml
index 6359ac8..fef3efc 100644
--- a/t/corpus/artificial/corenlp/ne_hgc_175m_600.xml
+++ b/t/corpus/artificial/corenlp/ne_hgc_175m_600.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="ART_00001" version="KorAP-0.4">
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A_RT_ABC.00001" version="KorAP-0.4">
    <spanList>
       <span id="s_18" from="64" to="73">
          <fs xmlns="http://www.tei-c.org/ns/1.0" type="ne">
diff --git a/t/corpus/artificial/corenlp/sentences.xml b/t/corpus/artificial/corenlp/sentences.xml
index 4975944..52c4ace 100644
--- a/t/corpus/artificial/corenlp/sentences.xml
+++ b/t/corpus/artificial/corenlp/sentences.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="ART_00001" version="KorAP-0.4">
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A_RT_ABC.00001" version="KorAP-0.4">
    <spanList>
       <span from="0" to="129"/>
    </spanList>
diff --git a/t/corpus/artificial/corenlp/tokens.xml b/t/corpus/artificial/corenlp/tokens.xml
index 78f562b..3a19e5f 100644
--- a/t/corpus/artificial/corenlp/tokens.xml
+++ b/t/corpus/artificial/corenlp/tokens.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A01_APR.13047" version="KorAP-0.4">
+<layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A_RT_ABC.00001" version="KorAP-0.4">
    <spanList>
       <span id="s1_n4" from="0" to="7"/>
       <span id="s1_n6" from="8" to="11"/>
diff --git a/t/corpus/artificial/data.xml b/t/corpus/artificial/data.xml
index 6b5af0f..1e5b88c 100644
--- a/t/corpus/artificial/data.xml
+++ b/t/corpus/artificial/data.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="text.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<raw_text docid="ART_ABC.00001" xmlns="http://ids-mannheim.de/ns/KorAP">
+<raw_text docid="A_RT_ABC.00001" xmlns="http://ids-mannheim.de/ns/KorAP">
   <metadata file="metadata.xml" />
   <text>Zum letzten kulturellen Anlass lädt die Leitung des Schulheimes Hofbergli ein, bevor der Betrieb Ende Schuljahr eingestellt wird.</text>
 </raw_text>
diff --git a/t/corpus/artificial/header.xml b/t/corpus/artificial/header.xml
index 10e14f8..a01fb11 100644
--- a/t/corpus/artificial/header.xml
+++ b/t/corpus/artificial/header.xml
@@ -4,7 +4,7 @@
 <idsHeader type="text" pattern="text" status="new" version="1.1" TEIform="teiHeader">
   <fileDesc>
     <titleStmt>
-      <textSigle>ART/ABC.00001</textSigle>
+      <textSigle>A_RT/ABC.00001</textSigle>
       <t.title assemblage="regular"/>
     </titleStmt>
     <publicationStmt>
diff --git a/t/corpus/artificial/metadata.xml b/t/corpus/artificial/metadata.xml
index 0ac19ba..16e8ef4 100644
--- a/t/corpus/artificial/metadata.xml
+++ b/t/corpus/artificial/metadata.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="metadata.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
 
-<metadata docid="ART_ABC.0001" type="document" xmlns="http://ids-mannheim.de/ns/KorAP">
+<metadata docid="A_RT_ABC.0001" type="document" xmlns="http://ids-mannheim.de/ns/KorAP">
   <doc file="data.xml" />
   <!--
   <foundry name="corenlp" path="corenlp" />
diff --git a/t/corpus/artificial/opennlp/morpho.xml b/t/corpus/artificial/opennlp/morpho.xml
index 1145c4e..92c96ba 100644
--- a/t/corpus/artificial/opennlp/morpho.xml
+++ b/t/corpus/artificial/opennlp/morpho.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A01_APR.13047" VERSION="KorAP-0.4">
+<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A_RT_ABC.00001" VERSION="KorAP-0.4">
    <spanList>
       <span id="s_7" from="0" to="3">
          <fs xmlns="http://www.tei-c.org/ns/1.0" type="lex">
diff --git a/t/corpus/artificial/opennlp/sentences.xml b/t/corpus/artificial/opennlp/sentences.xml
index fcb2188..4ac4474 100644
--- a/t/corpus/artificial/opennlp/sentences.xml
+++ b/t/corpus/artificial/opennlp/sentences.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A01_APR.13047" VERSION="KorAP-0.4">
+<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A_RT_ABC.00001" VERSION="KorAP-0.4">
    <spanList>
       <span from="0" to="129"/>
    </spanList>
diff --git a/t/corpus/artificial/opennlp/tokens.xml b/t/corpus/artificial/opennlp/tokens.xml
index b181a49..b147135 100644
--- a/t/corpus/artificial/opennlp/tokens.xml
+++ b/t/corpus/artificial/opennlp/tokens.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="ART_00001" VERSION="KorAP-0.4">
+<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A_RT/ABC.00001" VERSION="KorAP-0.4">
 <spanList>
       <span id="s_7" from="0" to="3"/>
       <span id="s_8" from="4" to="11"/>
diff --git a/t/corpus/artificial/opennlp/tokens_wrong.xml b/t/corpus/artificial/opennlp/tokens_wrong.xml
index 781dc6e..b490119 100644
--- a/t/corpus/artificial/opennlp/tokens_wrong.xml
+++ b/t/corpus/artificial/opennlp/tokens_wrong.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="ART_00001" VERSION="KorAP-0.4">
+<?xml version="1.0" encoding="UTF-8"?><?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?><layer xmlns="http://ids-mannheim.de/ns/KorAP" docid="A_RT/ABC.00001" VERSION="KorAP-0.4">
 <spanList>
       <span id="s_7" from="0" to="3"/>
       <span id="s_8" from="4" to="11"/>
diff --git a/t/corpus/artificial/xip/constituency.xml b/t/corpus/artificial/xip/constituency.xml
index 7db3ada..1d4de2a 100644
--- a/t/corpus/artificial/xip/constituency.xml
+++ b/t/corpus/artificial/xip/constituency.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
-<layer docid="A01_APR.13047" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
+<layer docid="A_RT/ABC.00001" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
   <spanList> 
     <span from="0" id="s2_n40" to="130">
       <fs type="node" xmlns="http://www.tei-c.org/ns/1.0">
diff --git a/t/corpus/artificial/xip/dependency.xml b/t/corpus/artificial/xip/dependency.xml
index af50dc8..4885d1e 100644
--- a/t/corpus/artificial/xip/dependency.xml
+++ b/t/corpus/artificial/xip/dependency.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
-<layer docid="A01_APR.13047" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
+<layer docid="A_RT/ABC.00001" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
   <spanList> 
     <span from="0" id="s2_n57" to="30">
       <rel label="VMOD">
diff --git a/t/corpus/artificial/xip/morpho.xml b/t/corpus/artificial/xip/morpho.xml
index 72eeec0..b884cdb 100644
--- a/t/corpus/artificial/xip/morpho.xml
+++ b/t/corpus/artificial/xip/morpho.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
-<layer docid="A01_APR.13047" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
+<layer docid="A_RT/ABC.00001" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
   <spanList>
     <span from="0" id="s2_n0" to="3">
       <fs type="lex" xmlns="http://www.tei-c.org/ns/1.0">
diff --git a/t/corpus/artificial/xip/sentences.xml b/t/corpus/artificial/xip/sentences.xml
index 18e7dbc..390a3c4 100644
--- a/t/corpus/artificial/xip/sentences.xml
+++ b/t/corpus/artificial/xip/sentences.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-model href="span.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
-<layer docid="A01_APR.13047" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
+<layer docid="A_RT/ABC.00001" version="KorAP-0.4" xmlns="http://ids-mannheim.de/ns/KorAP">
   <spanList> 
     <span from="0" to="130" />
   </spanList>
diff --git a/t/meta_artificial.t b/t/meta_artificial.t
index e6c3d10..6cd33cc 100644
--- a/t/meta_artificial.t
+++ b/t/meta_artificial.t
@@ -28,8 +28,8 @@
 # Metdata
 is($meta->{T_title}, 'Artificial Title', 'title');
 is($meta->{T_sub_title}, 'Artificial Subtitle', 'subTitle');
-is($doc->text_sigle, 'ART/ABC/00001', 'ID');
-is($doc->corpus_sigle, 'ART', 'corpusID');
+is($doc->text_sigle, 'A_RT/ABC/00001', 'ID');
+is($doc->corpus_sigle, 'A_RT', 'corpusID');
 is($meta->{D_pub_date}, '20010402', 'pubDate');
 is($meta->{S_pub_place}, 'Mannheim', 'pubPlace');
 is($meta->{S_pub_place_key}, 'DE', 'pubPlace key');
diff --git a/t/tar_builder.t b/t/tar_builder.t
index 9413440..9cff269 100644
--- a/t/tar_builder.t
+++ b/t/tar_builder.t
@@ -33,10 +33,10 @@
 ok($tar_read->contains_file('example2.xml'), 'File exists');
 
 my $content = $tar_read->get_content('example1.xml');
-like($content, qr!ART_ABC\.00001!, 'Content is correct');
+like($content, qr!A_RT_ABC\.00001!, 'Content is correct');
 
 $content = $tar_read->get_content('example2.xml');
-like($content, qr!ART\/ABC\.00001!, 'Content is correct');
+like($content, qr!A_RT\/ABC\.00001!, 'Content is correct');
 
 
 
@@ -74,10 +74,10 @@
   ok($tar_read->contains_file('example2.xml'), 'File exists');
 
   $content = $tar_read->get_content('example1.xml');
-  like($content, qr!ART_ABC\.00001!, 'Content is correct');
+  like($content, qr!A_RT_ABC\.00001!, 'Content is correct');
 
   $content = $tar_read->get_content('example2.xml');
-  like($content, qr!ART\/ABC\.00001!, 'Content is correct');
+  like($content, qr!A_RT\/ABC\.00001!, 'Content is correct');
 }
 else {
   diag 'Archive::Tar::Builder not installed.';