Always percentage encode data URIs
Change-Id: I1f41f6bc15acb54d5e8d695304ce11120c950e06
diff --git a/t/real/agd.t b/t/real/agd.t
index c1f8cc5..0bf6f12 100644
--- a/t/real/agd.t
+++ b/t/real/agd.t
@@ -32,8 +32,8 @@
is($meta->{D_creation_date}, '20181112', 'Title');
is($meta->{A_externalLink}, 'data:application/x.korap-link;title=DGD,'.
- 'https://dgd.ids-mannheim.de/DGD2Web/ExternalAccessServlet?command=displayData'.
- '&id=FOLK_E_00321_SE_01_T_01', 'External link');
+ 'https%3A%2F%2Fdgd.ids-mannheim.de%2FDGD2Web%2FExternalAccessServlet%3F'.
+ 'command%3DdisplayData%26id%3DFOLK_E_00321_SE_01_T_01', 'External link');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
@@ -136,7 +136,9 @@
$meta = $doc->meta;
is($meta->{T_title}, 'FOLK_E_00068_SE_01_T_05_DF_01', 'Title');
-is($meta->{A_externalLink}, 'data:application/x.korap-link;title=DGD,https://dgd.ids-mannheim.de/DGD2Web/ExternalAccessServlet?command=displayData&id=FOLK_E_00068_SE_01_T_05');
+is($meta->{A_externalLink}, 'data:application/x.korap-link;title=DGD,'.
+ 'https%3A%2F%2Fdgd.ids-mannheim.de%2FDGD2Web%2FExternalAccessServlet'.
+ '%3Fcommand%3DdisplayData%26id%3DFOLK_E_00068_SE_01_T_05');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
diff --git a/t/real/gingko.t b/t/real/gingko.t
index 35cb8de..84ae9d7 100644
--- a/t/real/gingko.t
+++ b/t/real/gingko.t
@@ -69,7 +69,7 @@
ok(!$meta->{T_doc_author}, 'Correct Doc author');
is($meta->{A_doc_editor}, 'Prof. Dr. Christian Fandrych, Leipzig University', 'Correct Doc editor');
-# Ginkgo Metadata
+# Gingko Metadata
is($meta->{S_gingko_genre_main}, 'wissenschaftlich');
is($meta->{S_gingko_genre_sub}, 'wissenschaftlich');
is($meta->{T_gingko_source}, 'ATZ - Automobiltechnische Zeitschrift');
@@ -110,15 +110,15 @@
# Unknown
unlike($token, qr!gingko/l!, 'data');
-like($token, qr!ginkgo/p:NN!, 'data');
+like($token, qr!gingko/p:NN!, 'data');
$token = join('||', @{$output->{data}->{stream}->[9]});
like($token, qr!i:heutige!, 'data');
-like($token, qr!ginkgo/p:ADJA!, 'data');
+like($token, qr!gingko/p:ADJA!, 'data');
like($token, qr!gingko/l:heutig!, 'data');
-# Check Ginkgo meta in Koral
+# Check Gingko meta in Koral
my $koral = decode_json($tokens->to_json(0.4));
my $test = 0;
diff --git a/t/real/wdd.t b/t/real/wdd.t
index 779a44e..89cbba1 100644
--- a/t/real/wdd.t
+++ b/t/real/wdd.t
@@ -52,7 +52,7 @@
REF
is($meta->{S_language}, 'de', 'Language');
-is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http://de.wikipedia.org/wiki/Diskussion:Gunter_A._Pilz', 'link');
+is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http%3A%2F%2Fde.wikipedia.org%2Fwiki%2FDiskussion%3AGunter_A._Pilz', 'link');
is($meta->{T_corpus_title}, 'Wikipedia', 'Correct Corpus title');
ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus sub title');
@@ -281,7 +281,7 @@
is($doc->corpus_sigle, 'WDD15', 'Correct corpus sigle');
$meta = $doc->meta;
-is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http://de.wikipedia.org/wiki/Diskussion:Arteria_interossea_communis', 'link');
+is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http%3A%2F%2Fde.wikipedia.org%2Fwiki%2FDiskussion%3AArteria_interossea_communis', 'link');
# Get tokenization
$tokens = KorAP::XML::Tokenizer->new(
diff --git a/t/real/wpd.t b/t/real/wpd.t
index ad14631..8ae04a6 100644
--- a/t/real/wpd.t
+++ b/t/real/wpd.t
@@ -48,7 +48,7 @@
ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
# This link is broken, but that's due to the data
-is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http://de.wikipedia.org', 'No link');
+is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http%3A%2F%2Fde.wikipedia.org', 'No link');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
@@ -111,7 +111,7 @@
is($doc->corpus_sigle, 'WPD15', 'Correct corpus sigle');
$meta = $doc->meta;
-is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http://de.wikipedia.org/wiki/Wolfgang_Krebs_(Schauspieler)', 'link');
+is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http%3A%2F%2Fde.wikipedia.org%2Fwiki%2FWolfgang_Krebs_%28Schauspieler%29', 'link');
# Get tokenization
$tokens = KorAP::XML::Tokenizer->new(
@@ -144,7 +144,7 @@
is($doc->text_sigle, 'WPD15/U43/34816', 'Correct text sigle');
$meta = $doc->meta;
-is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http://de.wikipedia.org/wiki/Universitätsbibliothek_Augsburg', 'link');
+is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http%3A%2F%2Fde.wikipedia.org%2Fwiki%2FUniversit%E4tsbibliothek_Augsburg');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
diff --git a/t/real/wpe.t b/t/real/wpe.t
index 33a4236..08be909 100644
--- a/t/real/wpe.t
+++ b/t/real/wpe.t
@@ -38,7 +38,7 @@
is($meta->{A_reference}, 'Generation X, In: Wikipedia - URL:http://en.wikipedia.org/wiki/Generation_X: Wikipedia, 2015', 'Reference');
-is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http://en.wikipedia.org/wiki/Generation_X', 'link');
+is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http%3A%2F%2Fen.wikipedia.org%2Fwiki%2FGeneration_X', 'link');
is($meta->{'S_availability'}, 'CC-BY-SA', 'Availability');
is($meta->{'S_language'}, 'en', 'Language');
@@ -102,7 +102,7 @@
is($link->{'@type'}, 'koral:field', 'attachement');
is($link->{type}, 'type:attachement', 'attachement');
is($link->{key}, 'externalLink', 'attachement');
-is($link->{value}, 'data:application/x.korap-link;title=Wikipedia,http://en.wikipedia.org/wiki/Generation_X', 'attachement');
+is($link->{value}, 'data:application/x.korap-link;title=Wikipedia,http%3A%2F%2Fen.wikipedia.org%2Fwiki%2FGeneration_X', 'attachement');
done_testing;
__END__
diff --git a/t/real/wpf.t b/t/real/wpf.t
index e44e0ba..a7b07e5 100644
--- a/t/real/wpf.t
+++ b/t/real/wpf.t
@@ -47,7 +47,7 @@
ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
# This link is broken, but that's due to the data
-is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http://fr.wikipedia.org/wiki/Psychanalyse', 'No link');
+is($meta->{A_externalLink}, 'data:application/x.korap-link;title=Wikipedia,http%3A%2F%2Ffr.wikipedia.org%2Fwiki%2FPsychanalyse', 'No link');
# Tokenization
use_ok('KorAP::XML::Tokenizer');