Support K2K_PUBLISHER_STRING

Change-Id: I11c7333fd55f80ed9d868fe7041f8e11da18d238
diff --git a/Changes b/Changes
index 7caba31..69be794 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,8 @@
+0.63 2025-10-17
+        - Publisher is now indexed as string, when
+          K2K_PUBLISHER_STRING is set as an environment
+          variable.
+
 0.62 2025-07-15
         - Remove lock from tar builder.
         - Don't create a list of files (that is passed to forks).
diff --git a/Readme.pod b/Readme.pod
index e272014..86d50ac 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -329,6 +329,7 @@
 Writes the output into a tar archive.
 The tar needs to be opened with C<--ignore-zeros> afterwards.
 
+
 =item B<--sigle|-sg>
 
 Extract the given texts.
@@ -373,6 +374,8 @@
 
 There are some ways to improve performance for large tasks:
 
+=over 2
+
 =item First unpack
 
 Using the archive or serial command on one or multiple zip files
@@ -396,6 +399,7 @@
 For full extraction of data, L<ripunzip|https://github.com/google/ripunzip> can be
 used for improved performance.
 
+=back
 
 =head1 ANNOTATION SUPPORT
 
@@ -514,6 +518,21 @@
 
 Meta data for all I5 files
 
+Environment variables:
+
+=over 4
+
+=item C<K2K_TRANSLATOR_TEXT>
+
+Index the translator as a text field (attachement otherwise).
+
+=item C<K2K_PUBLISHER_STRING>
+
+Index the publisher as a string field (attachement otherwise).
+
+
+=back
+
 =item B<Sgbr>
 
 Meta data from the Schreibgebrauch project
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index f43b9df..1b357a8 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
 
 our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
 
-our $VERSION = '0.62';
+our $VERSION = '0.63';
 
 has 'path';
 has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/lib/KorAP/XML/Meta/I5.pm b/lib/KorAP/XML/Meta/I5.pm
index 703b609..3527ad5 100644
--- a/lib/KorAP/XML/Meta/I5.pm
+++ b/lib/KorAP/XML/Meta/I5.pm
@@ -337,7 +337,13 @@
   # Get Publisher
   if ($temp = $dom->at('imprint publisher')) {
     $temp = _squish $temp->all_text;
-    $self->{A_publisher} = $temp if $temp;
+    if ($temp) {
+      if (!!($ENV{K2K_PUBLISHER_STRING})) {
+        $self->{'S_publisher'} = $temp;
+      } else {
+        $self->{'A_publisher'} = $temp;
+      };
+    };
   };
 
   # Get text type
diff --git a/script/korapxml2krill b/script/korapxml2krill
index f2ebe85..d4e5cb6 100755
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -1504,6 +1504,7 @@
 Only valid for the C<archive> command.
 
 Writes the output into a tar archive.
+The tar needs to be opened with C<--ignore-zeros> afterwards.
 
 
 =item B<--sigle|-sg>
@@ -1550,6 +1551,8 @@
 
 There are some ways to improve performance for large tasks:
 
+=over 2
+
 =item First unpack
 
 Using the archive or serial command on one or multiple zip files
@@ -1573,6 +1576,7 @@
 For full extraction of data, L<ripunzip|https://github.com/google/ripunzip> can be
 used for improved performance.
 
+=back
 
 =head1 ANNOTATION SUPPORT
 
@@ -1691,6 +1695,21 @@
 
 Meta data for all I5 files
 
+Environment variables:
+
+=over 4
+
+=item C<K2K_TRANSLATOR_TEXT>
+
+Index the translator as a text field (attachement otherwise).
+
+=item C<K2K_PUBLISHER_STRING>
+
+Index the publisher as a string field (attachement otherwise).
+
+
+=back
+
 =item B<Sgbr>
 
 Meta data from the Schreibgebrauch project
@@ -1904,7 +1923,7 @@
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2015-2024, L<IDS Mannheim|https://www.ids-mannheim.de/>
+Copyright (C) 2015-2025, L<IDS Mannheim|https://www.ids-mannheim.de/>
 
 Author: L<Nils Diewald|https://www.nils-diewald.de/>
 
diff --git a/t/meta_artificial.t b/t/meta_artificial.t
index 6cd33cc..8069a65 100644
--- a/t/meta_artificial.t
+++ b/t/meta_artificial.t
@@ -59,6 +59,20 @@
 is('data:application/x.korap-link;example=%20Das%20war%20einfach;title=Hallo%21,https%3A%2F%2Fwww.test.de',
    $meta->korap_data_uri('https://www.test.de', title => 'Hallo!', example => ' Das war einfach'));
 
+our %ENV;
+$ENV{K2K_PUBLISHER_STRING} = 1;
+
+# ART
+$path = catdir(dirname(__FILE__), 'corpus','artificial');
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
+ok($doc->parse, 'Parse document');
+$meta = $doc->meta;
+ok(!$meta->{A_publisher}, 'Publisher');
+is($meta->{S_publisher}, 'Artificial articles Inc.', 'Publisher');
+
+$ENV{K2K_PUBLISHER_STRING} = 0;
+
+
 done_testing;
 __END__
 
diff --git a/t/real/drukola.t b/t/real/drukola.t
index 74dfef3..1d99dee 100644
--- a/t/real/drukola.t
+++ b/t/real/drukola.t
@@ -162,6 +162,7 @@
 $meta = $doc->meta;
 is($meta->{T_translator}, '[TRANSLATOR]', 'Translator');
 ok(!$meta->{A_translator}, 'Translator');
+$ENV{K2K_TRANSLATOR_TEXT} = 0;
 
 
 done_testing;