Added 'distributor' field to I5 metadata
Change-Id: I464480fd82511c91d4ab252b8657a5db95b35c5e
diff --git a/Changes b/Changes
index c734824..f88bac6 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,6 @@
-0.39 2019-08-30
+0.39 2019-11-13
- Added Talismane support.
+ - Added "distributor" field to I5 metadata.
0.38 2019-05-22
- Stop file processing when base tokenization
diff --git a/lib/KorAP/XML/Meta/I5.pm b/lib/KorAP/XML/Meta/I5.pm
index a34c82e..6bd839a 100644
--- a/lib/KorAP/XML/Meta/I5.pm
+++ b/lib/KorAP/XML/Meta/I5.pm
@@ -44,6 +44,7 @@
# "biblEditionStatement",
# "reference",
# "corpusEditor"
+# "distributor"
#
# DATE:
# "pubDate",
@@ -324,10 +325,17 @@
};
if ($temp = $dom->at('fileDesc')) {
+ my $temp2;
+
if (my $availability = $temp->at('publicationStmt > availability')) {
- $temp = _squish $availability->all_text;
- $self->{S_availability} = $temp if $temp;
+ $temp2 = _squish $availability->all_text;
+ $self->{S_availability} = $temp2 if $temp2;
};
+
+ if (my $distributor = $temp->at('publicationStmt > distributor')) {
+ $temp2 = _squish $distributor->all_text;
+ $self->{A_distributor} = $temp2 if $temp2;
+ }
};
if ($temp = $dom->at('profileDesc > langUsage > language[id]')) {
diff --git a/t/real/hnc.t b/t/real/hnc.t
index df2a162..88578db 100644
--- a/t/real/hnc.t
+++ b/t/real/hnc.t
@@ -57,6 +57,9 @@
ok(!$meta->{A_reference}, 'Reference');
is($meta->{S_language}, 'hu', 'Language');
+is($meta->{S_availability}, 'Kutatási célokra, megállapodás alapján, hozzáférhetÅ‘', 'Availability');
+is($meta->{A_distributor}, 'MTA Nyelvtudományi Intézet', 'Distributor');
+
ok(!$meta->{T_corpus_title}, 'Correct Corpus title');
ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
ok(!$meta->{T_corpus_author}, 'Correct Corpus author');
diff --git a/t/real/wpe.t b/t/real/wpe.t
index a3413d2..962b361 100644
--- a/t/real/wpe.t
+++ b/t/real/wpe.t
@@ -93,7 +93,8 @@
my $koral = decode_json($tokens->to_json(0.4));
-my $link = $koral->{fields}->[5];
+my $link = $koral->{fields}->[6];
+
is($link->{'@type'}, 'koral:field', 'attachement');
is($link->{type}, 'type:attachement', 'attachement');
is($link->{key}, 'externalLink', 'attachement');
diff --git a/t/script/single.t b/t/script/single.t
index 3f43da5..8f2c4db 100644
--- a/t/script/single.t
+++ b/t/script/single.t
@@ -284,21 +284,26 @@
is($json->{fields}->[0]->{value}, 'Corpus');
is($json->{fields}->[0]->{'@type'}, 'koral:field');
-is($json->{fields}->[8]->{key}, 'textClass');
-is($json->{fields}->[8]->{value}->[0], 'freizeit-unterhaltung');
-is($json->{fields}->[8]->{value}->[1], 'vereine-veranstaltungen');
-is($json->{fields}->[8]->{type}, 'type:keywords');
-is($json->{fields}->[8]->{'@type'}, 'koral:field');
+is($json->{fields}->[4]->{key}, 'distributor');
+is($json->{fields}->[4]->{value}, 'data:,Institut für Deutsche Sprache');
+is($json->{fields}->[4]->{type}, 'type:attachement');
+is($json->{fields}->[4]->{'@type'}, 'koral:field');
-is($json->{fields}->[13]->{key}, 'textType');
-is($json->{fields}->[13]->{value}, 'Zeitung: Tageszeitung');
-is($json->{fields}->[13]->{type}, 'type:string');
-is($json->{fields}->[13]->{'@type'}, 'koral:field');
+is($json->{fields}->[9]->{key}, 'textClass');
+is($json->{fields}->[9]->{value}->[0], 'freizeit-unterhaltung');
+is($json->{fields}->[9]->{value}->[1], 'vereine-veranstaltungen');
+is($json->{fields}->[9]->{type}, 'type:keywords');
+is($json->{fields}->[9]->{'@type'}, 'koral:field');
-is($json->{fields}->[21]->{key}, 'title');
-is($json->{fields}->[21]->{value}, 'Beispiel Text');
-is($json->{fields}->[21]->{type}, 'type:text');
-is($json->{fields}->[21]->{'@type'}, 'koral:field');
+is($json->{fields}->[14]->{key}, 'textType');
+is($json->{fields}->[14]->{value}, 'Zeitung: Tageszeitung');
+is($json->{fields}->[14]->{type}, 'type:string');
+is($json->{fields}->[14]->{'@type'}, 'koral:field');
+
+is($json->{fields}->[22]->{key}, 'title');
+is($json->{fields}->[22]->{value}, 'Beispiel Text');
+is($json->{fields}->[22]->{type}, 'type:text');
+is($json->{fields}->[22]->{'@type'}, 'koral:field');
is($json->{data}->{tokenSource}, 'opennlp#tokens', 'Title');
is($json->{data}->{foundries}, 'base base/paragraphs base/sentences connexor connexor/morpho connexor/phrase connexor/sentences connexor/syntax corenlp corenlp/constituency corenlp/morpho corenlp/sentences dereko dereko/structure glemm glemm/morpho mate mate/dependency mate/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho treetagger/sentences xip xip/constituency xip/morpho xip/sentences', 'Foundries');