Added external link for AGD data in I5 meta
Change-Id: Ice3f5c0aff6d30a6f113b77edc1b6ff503833c31
diff --git a/Changes b/Changes
index f88bac6..dd7d4bd 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,7 @@
-0.39 2019-11-13
+0.39 2019-11-29
- Added Talismane support.
- Added "distributor" field to I5 metadata.
+ - Added DGD link field to I5 metadata.
0.38 2019-05-22
- Stop file processing when base tokenization
diff --git a/lib/KorAP/XML/Meta/I5.pm b/lib/KorAP/XML/Meta/I5.pm
index 6bd839a..c82fef8 100644
--- a/lib/KorAP/XML/Meta/I5.pm
+++ b/lib/KorAP/XML/Meta/I5.pm
@@ -1,7 +1,9 @@
package KorAP::XML::Meta::I5;
use KorAP::XML::Meta::Base;
+use Mojo::Util qw/url_escape/;
our $SIGLE_RE = qr/^([^_\/]+)(?:[_\/]([^\._\/]+?)(?:\.(.+?))?)?$/;
+our $KORAP_LINK_PREF = 'data:application/x.korap-link;';
# STRING:
# "pubPlace",
@@ -355,9 +357,9 @@
$ref_text =~ s!$REF_RE!!;
$self->{A_reference} = $ref_text;
- # In case of Wikipedia texts, take the URL
+ # In case of Wikipedia texts, take the URL
if ($ref_text =~ /URL:(http:.+?):\s+Wikipedia,\s+\d+\s*$/) {
- $self->{A_externalLink} = 'data:application/x.korap-link;title=Wikipedia,' . $1;
+ $self->{A_externalLink} = $KORAP_LINK_PREF . 'title=Wikipedia,' . $1;
};
};
};
@@ -373,6 +375,15 @@
$self->{A_src_pages} = $1 . '-' . $2;
};
};
+
+ # DGD treatment
+ if ($self->{T_title} && !$self->{A_externalLink} && $self->{_corpus_sigle} eq 'AGD') {
+ my $transcript = $self->{T_title};
+ $transcript =~ s/_DF_\d+$//i;
+ $self->{A_externalLink} = $KORAP_LINK_PREF . 'title=DGD,' .
+ 'https://dgd.ids-mannheim.de/DGD2Web/ExternalAccessServlet?command=displayData&id=' .
+ url_escape($transcript);
+ }
};
return 1;
diff --git a/t/real/agd.t b/t/real/agd.t
index 37029ae..1e87bfe 100644
--- a/t/real/agd.t
+++ b/t/real/agd.t
@@ -37,8 +37,9 @@
is($meta->{T_title}, 'FOLK_E_00321_SE_01_T_01_DF_01', 'Title');
is($meta->{D_creation_date}, '20181112', 'Title');
-# TODO:
-# Add source as asset!
+is($meta->{A_externalLink}, 'data:application/x.korap-link;title=DGD,'.
+ 'https://dgd.ids-mannheim.de/DGD2Web/ExternalAccessServlet?command=displayData'.
+ '&id=FOLK_E_00321_SE_01_T_01', 'External link');
# Tokenization
use_ok('KorAP::XML::Tokenizer');