Add --auto-textsigle <start-sigle> option

Also allows for processing plain TEI P5 files without any IDs.

Change-Id: Ic16b089c916d2e50458aa1aa6cb80ce4d37d97ba
diff --git a/Changes b/Changes
index 191d067..a487f61 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,7 @@
 2.6.0 2024-09-19
         - Add -o parameter.
         - Add support for inline dependency relations.
+        - Add support for --auto-textsigle.
 
 2.5.0 2024-01-24
         - Upgrade minimal Perl version to 5.36 to improve
diff --git a/Readme.pod b/Readme.pod
index 1c95540..e890733 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -165,6 +165,17 @@
 Expects a comma-separated list of tags to be ignored when the structure
 is parsed. Content of these tags however will be processed.
 
+=item B<--auto-textsigle> <textsigle>
+
+Expects a text sigle thats serves as fallback if no text sigles
+are given in the input data.
+The auto text sigle will be incremented for each text processed.
+
+Example:
+
+  tei2korapxml --auto-textsigle 'ICC/GER.00001' -s -tk - \
+  < data.i5.xml > korapxml.zip
+
 =item B<--xmlid-to-textsigle> <from-regex>@<to-c/to-d/to-t>
 
 Expects a regular replacement expression (separated by B<@> between the
diff --git a/lib/KorAP/XML/TEI.pm b/lib/KorAP/XML/TEI.pm
index 1111c8b..f7768e7 100644
--- a/lib/KorAP/XML/TEI.pm
+++ b/lib/KorAP/XML/TEI.pm
@@ -4,7 +4,7 @@
 use warnings;
 
 use Exporter 'import';
-our @EXPORT_OK = qw(remove_xml_comments escape_xml escape_xml_minimal replace_entities);
+our @EXPORT_OK = qw(remove_xml_comments escape_xml escape_xml_minimal replace_entities increase_auto_textsigle);
 
 # convert '&', '<' and '>' into their corresponding sgml-entities
 my %ent_without_quot = (
@@ -180,4 +180,16 @@
   return($_);
 };
 
+sub increase_auto_textsigle {
+  my $sigle = shift;
+
+  if ($sigle =~ /(\d+)$/) {
+    my $number = $1;
+    my $length = length($number);
+    $number++;
+    my $new_number = sprintf("%0${length}d", $number);
+    $sigle =~ s/\d+$/$new_number/;
+  }
+  return $sigle;
+}
 1;
diff --git a/script/tei2korapxml b/script/tei2korapxml
index 418408e..86f7527 100755
--- a/script/tei2korapxml
+++ b/script/tei2korapxml
@@ -6,6 +6,7 @@
 use Log::Any::Adapter;
 use Pod::Usage;
 use Getopt::Long qw(GetOptions :config no_auto_abbrev);
+use KorAP::XML::TEI qw(increase_auto_textsigle);
 
 use File::Basename qw(dirname);
 
@@ -45,6 +46,7 @@
 
 # Parse options from the command line
 GetOptions(
+  'auto-textsigle|A=s'    => \(my $auto_textsigle = ''),
   'root|r=s'              => \(my $root_dir    = '.'),
   'input|i=s'             => \(my $input_fname = ''),
   'output|o=s'            => \(my $output_fname = ''),
@@ -460,8 +462,11 @@
     };
 
     # Parse header
-    my $header = KorAP::XML::TEI::Header->new($content, $input_enc, $text_id_esc)->parse($input_fh);
-
+    my $header = KorAP::XML::TEI::Header->new($content, $input_enc, $text_id_esc // $auto_textsigle)->parse($input_fh);
+    if ($auto_textsigle) {
+      $auto_textsigle = increase_auto_textsigle($auto_textsigle);
+      $log->debug("Auto-incremented text sigle to $auto_textsigle");
+    };
     # Header was parseable
     if ($header) {
 
@@ -666,6 +671,17 @@
 Expects a comma-separated list of tags to be ignored when the structure
 is parsed. Content of these tags however will be processed.
 
+=item B<--auto-textsigle> <textsigle>
+
+Expects a text sigle thats serves as fallback if no text sigles
+are given in the input data.
+The auto text sigle will be incremented for each text processed.
+
+Example:
+
+  tei2korapxml --auto-textsigle 'ICC/GER.00001' -s -tk - \
+  < data.i5.xml > korapxml.zip
+
 =item B<--xmlid-to-textsigle> <from-regex>@<to-c/to-d/to-t>
 
 Expects a regular replacement expression (separated by B<@> between the