Support K2K_PUBLISHER_STRING
Change-Id: I11c7333fd55f80ed9d868fe7041f8e11da18d238
diff --git a/Changes b/Changes
index 7caba31..69be794 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,8 @@
+0.63 2025-10-17
+ - Publisher is now indexed as string, when
+ K2K_PUBLISHER_STRING is set as an environment
+ variable.
+
0.62 2025-07-15
- Remove lock from tar builder.
- Don't create a list of files (that is passed to forks).
diff --git a/Readme.pod b/Readme.pod
index e272014..86d50ac 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -329,6 +329,7 @@
Writes the output into a tar archive.
The tar needs to be opened with C<--ignore-zeros> afterwards.
+
=item B<--sigle|-sg>
Extract the given texts.
@@ -373,6 +374,8 @@
There are some ways to improve performance for large tasks:
+=over 2
+
=item First unpack
Using the archive or serial command on one or multiple zip files
@@ -396,6 +399,7 @@
For full extraction of data, L<ripunzip|https://github.com/google/ripunzip> can be
used for improved performance.
+=back
=head1 ANNOTATION SUPPORT
@@ -514,6 +518,21 @@
Meta data for all I5 files
+Environment variables:
+
+=over 4
+
+=item C<K2K_TRANSLATOR_TEXT>
+
+Index the translator as a text field (attachement otherwise).
+
+=item C<K2K_PUBLISHER_STRING>
+
+Index the publisher as a string field (attachement otherwise).
+
+
+=back
+
=item B<Sgbr>
Meta data from the Schreibgebrauch project
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index f43b9df..1b357a8 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -16,7 +16,7 @@
our @EXPORT_OK = qw(get_file_name get_file_name_from_glob);
-our $VERSION = '0.62';
+our $VERSION = '0.63';
has 'path';
has [qw/text_sigle doc_sigle corpus_sigle/];
diff --git a/lib/KorAP/XML/Meta/I5.pm b/lib/KorAP/XML/Meta/I5.pm
index 703b609..3527ad5 100644
--- a/lib/KorAP/XML/Meta/I5.pm
+++ b/lib/KorAP/XML/Meta/I5.pm
@@ -337,7 +337,13 @@
# Get Publisher
if ($temp = $dom->at('imprint publisher')) {
$temp = _squish $temp->all_text;
- $self->{A_publisher} = $temp if $temp;
+ if ($temp) {
+ if (!!($ENV{K2K_PUBLISHER_STRING})) {
+ $self->{'S_publisher'} = $temp;
+ } else {
+ $self->{'A_publisher'} = $temp;
+ };
+ };
};
# Get text type
diff --git a/script/korapxml2krill b/script/korapxml2krill
index f2ebe85..d4e5cb6 100755
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -1504,6 +1504,7 @@
Only valid for the C<archive> command.
Writes the output into a tar archive.
+The tar needs to be opened with C<--ignore-zeros> afterwards.
=item B<--sigle|-sg>
@@ -1550,6 +1551,8 @@
There are some ways to improve performance for large tasks:
+=over 2
+
=item First unpack
Using the archive or serial command on one or multiple zip files
@@ -1573,6 +1576,7 @@
For full extraction of data, L<ripunzip|https://github.com/google/ripunzip> can be
used for improved performance.
+=back
=head1 ANNOTATION SUPPORT
@@ -1691,6 +1695,21 @@
Meta data for all I5 files
+Environment variables:
+
+=over 4
+
+=item C<K2K_TRANSLATOR_TEXT>
+
+Index the translator as a text field (attachement otherwise).
+
+=item C<K2K_PUBLISHER_STRING>
+
+Index the publisher as a string field (attachement otherwise).
+
+
+=back
+
=item B<Sgbr>
Meta data from the Schreibgebrauch project
@@ -1904,7 +1923,7 @@
=head1 COPYRIGHT AND LICENSE
-Copyright (C) 2015-2024, L<IDS Mannheim|https://www.ids-mannheim.de/>
+Copyright (C) 2015-2025, L<IDS Mannheim|https://www.ids-mannheim.de/>
Author: L<Nils Diewald|https://www.nils-diewald.de/>
diff --git a/t/meta_artificial.t b/t/meta_artificial.t
index 6cd33cc..8069a65 100644
--- a/t/meta_artificial.t
+++ b/t/meta_artificial.t
@@ -59,6 +59,20 @@
is('data:application/x.korap-link;example=%20Das%20war%20einfach;title=Hallo%21,https%3A%2F%2Fwww.test.de',
$meta->korap_data_uri('https://www.test.de', title => 'Hallo!', example => ' Das war einfach'));
+our %ENV;
+$ENV{K2K_PUBLISHER_STRING} = 1;
+
+# ART
+$path = catdir(dirname(__FILE__), 'corpus','artificial');
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
+ok($doc->parse, 'Parse document');
+$meta = $doc->meta;
+ok(!$meta->{A_publisher}, 'Publisher');
+is($meta->{S_publisher}, 'Artificial articles Inc.', 'Publisher');
+
+$ENV{K2K_PUBLISHER_STRING} = 0;
+
+
done_testing;
__END__
diff --git a/t/real/drukola.t b/t/real/drukola.t
index 74dfef3..1d99dee 100644
--- a/t/real/drukola.t
+++ b/t/real/drukola.t
@@ -162,6 +162,7 @@
$meta = $doc->meta;
is($meta->{T_translator}, '[TRANSLATOR]', 'Translator');
ok(!$meta->{A_translator}, 'Translator');
+$ENV{K2K_TRANSLATOR_TEXT} = 0;
done_testing;