Support for Gingko metadata

Change-Id: I913444b85000da6be8af05d1e376a5b83e888515
diff --git a/Readme.pod b/Readme.pod
index 89abd5f..3009ba8 100644
--- a/Readme.pod
+++ b/Readme.pod
@@ -426,6 +426,26 @@
 See the built-in annotation importers as examples.
 
 
+=head1 METADATA SUPPORT
+
+L<KorAP::XML::Krill> has built-in importer for some meta data variants
+developed in the KorAP project that are part of the KorAP preprocessing pipeline.
+
+=over 2
+
+=item I5 - Meta data for all I5 files
+
+=item Sgbr - Meta data from the Schreibgebrauch project
+
+=item Gingko - Meta data from the Gingko project in addition to I5
+
+=back
+
+More importers are in preparation.
+New meta data importers can be defined in the C<KorAP::XML::Meta> namespace.
+See the built-in meta data importers as examples.
+
+
 =head1 About KorAP-XML
 
 KorAP-XML (Bański et al. 2012) is an implementation of the KorAP
diff --git a/lib/KorAP/XML/Meta/Gingko.pm b/lib/KorAP/XML/Meta/Gingko.pm
new file mode 100644
index 0000000..821b0d3
--- /dev/null
+++ b/lib/KorAP/XML/Meta/Gingko.pm
@@ -0,0 +1,60 @@
+package KorAP::XML::Meta::Gingko;
+use KorAP::XML::Meta::Base;
+use KorAP::XML::Meta::I5;
+
+my $squish = \&KorAP::XML::Meta::I5::_squish;
+
+sub parse {
+  my ($self, $dom, $type) = @_;
+
+  unless (KorAP::XML::Meta::I5::parse($self, $dom, $type)) {
+    return 0;
+  };
+
+  my $temp;
+
+  if ($type eq 'text') {
+    if ($temp = $dom->at('textClass > classCode[scheme=gingkoGenre.top]')) {
+      $temp = $squish->($temp->all_text);
+      $self->{S_gingko_genre_main} = $temp if $temp;
+    };
+
+    if ($temp = $dom->at('textClass > classCode[scheme=gingkoGenre.sub]')) {
+      $temp = $squish->($temp->all_text);
+      $self->{S_gingko_genre_sub} = $temp if $temp;
+    };
+
+    if (my $mono = $dom->at('sourceDesc > biblStruct > monogr')) {
+      if ($temp = $mono->at('h\.title[type=main]')) {
+        $temp = $squish->($temp->all_text);
+        $self->{T_gingko_source} = $temp if $temp;
+      };
+
+      if ($temp = $mono->at('h\.title[type=short]')) {
+        $temp = $squish->($temp->all_text);
+        $self->{S_gingko_source_short} = $temp if $temp;
+      };
+    };
+
+    if ($temp = $dom->at('correction')) {
+      $temp = $squish->($temp->all_text);
+      $self->{S_gingko_lemma_corr} = $temp if $temp;
+    };
+  }
+
+  elsif ($type eq 'corpus') {
+    if (my $mono = $dom->at('sourceDesc > biblStruct > monogr')) {
+      if ($temp = $mono->at('biblNote[n=collection]')) {
+        $temp = $squish->($temp->all_text);
+        $self->{T_gingko_collection} = $temp if $temp;
+      };
+
+      if ($temp = $mono->at('biblNote[n=collectionShort]')) {
+        $temp = $squish->($temp->all_text);
+        $self->{S_gingko_collection_short} = $temp if $temp;
+      };
+    };
+  };
+};
+
+1;
diff --git a/script/korapxml2krill b/script/korapxml2krill
index 2376a5e..b2533bc 100644
--- a/script/korapxml2krill
+++ b/script/korapxml2krill
@@ -1431,6 +1431,26 @@
 See the built-in annotation importers as examples.
 
 
+=head1 METADATA SUPPORT
+
+L<KorAP::XML::Krill> has built-in importer for some meta data variants
+developed in the KorAP project that are part of the KorAP preprocessing pipeline.
+
+=over 2
+
+=item I5 - Meta data for all I5 files
+
+=item Sgbr - Meta data from the Schreibgebrauch project
+
+=item Gingko - Meta data from the Gingko project in addition to I5
+
+=back
+
+More importers are in preparation.
+New meta data importers can be defined in the C<KorAP::XML::Meta> namespace.
+See the built-in meta data importers as examples.
+
+
 =head1 About KorAP-XML
 
 KorAP-XML (Bański et al. 2012) is an implementation of the KorAP
diff --git a/t/real/gingko.t b/t/real/gingko.t
index 5edd877..35cb8de 100644
--- a/t/real/gingko.t
+++ b/t/real/gingko.t
@@ -26,7 +26,10 @@
 # ATZ07/JAN/00001
 my $path = catdir(dirname(__FILE__), 'corpus','Gingko', 'ATZ07','JAN','00001');
 
-ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
+ok(my $doc = KorAP::XML::Krill->new(
+  path => $path . '/',
+  meta_type => 'Gingko'
+), 'Load Korap::Document');
 ok($doc->parse, 'Parse document');
 
 is($doc->text_sigle, 'ATZ07/JAN/00001', 'Correct text sigle');
@@ -66,6 +69,15 @@
 ok(!$meta->{T_doc_author}, 'Correct Doc author');
 is($meta->{A_doc_editor}, 'Prof. Dr. Christian Fandrych, Leipzig University', 'Correct Doc editor');
 
+# Ginkgo Metadata
+is($meta->{S_gingko_genre_main}, 'wissenschaftlich');
+is($meta->{S_gingko_genre_sub}, 'wissenschaftlich');
+is($meta->{T_gingko_source}, 'ATZ - Automobiltechnische Zeitschrift');
+is($meta->{S_gingko_source_short}, 'ATZ');
+is($meta->{S_gingko_lemma_corr}, 'no');
+is($meta->{T_gingko_collection}, 'Gingko - Geschriebenes Ingenieurwissenschaftliches Korpus');
+is($meta->{S_gingko_collection_short}, 'Gingko');
+
 # Tokenization
 use_ok('KorAP::XML::Tokenizer');
 
@@ -106,6 +118,25 @@
 like($token, qr!ginkgo/p:ADJA!, 'data');
 like($token, qr!gingko/l:heutig!, 'data');
 
+# Check Ginkgo meta in Koral
+my $koral = decode_json($tokens->to_json(0.4));
+
+my $test = 0;
+foreach (@{$koral->{fields}}) {
+  if ($_->{key} eq 'gingkoGenreMain') {
+    is($_->{'type'},'type:string');
+    is($_->{'value'},'wissenschaftlich');
+    $test++;
+  }
+  elsif ($_->{key} eq 'gingkoCollection') {
+    is($_->{'type'},'type:text');
+    is($_->{'value'},'Gingko - Geschriebenes Ingenieurwissenschaftliches Korpus');
+    $test++;
+  };
+};
+
+is($test,2);
+
 done_testing;
 __END__