Set field types and serialize as koral:fields
Change-Id: I6f256fcbf3996e6ae45db60bc3aef98ff431b0b9
diff --git a/lib/KorAP/XML/Krill.pm b/lib/KorAP/XML/Krill.pm
index 221c5f4..89fadd0 100644
--- a/lib/KorAP/XML/Krill.pm
+++ b/lib/KorAP/XML/Krill.pm
@@ -280,7 +280,7 @@
sub _k {
- my $x = $_[0];
+ my $x = substr($_[0], 2);
$x =~ s/_(\w)/\U$1\E/g;
$x =~ s/id$/ID/gi;
return $x;
diff --git a/lib/KorAP/XML/Meta/Base.pm b/lib/KorAP/XML/Meta/Base.pm
index a5cb3bd..b4997c4 100644
--- a/lib/KorAP/XML/Meta/Base.pm
+++ b/lib/KorAP/XML/Meta/Base.pm
@@ -130,4 +130,112 @@
return 0;
};
+
+# Generate koral_fields
+sub to_koral_fields {
+ my $self = shift;
+ my @fields = ();
+
+ if ($self->corpus_sigle) {
+ push @fields, _string_field('corpusSigle', $self->corpus_sigle);
+ if ($self->doc_sigle) {
+ push @fields, _string_field('docSigle', $self->doc_sigle);
+ if ($self->text_sigle) {
+ push @fields, _string_field('textSigle', $self->text_sigle);
+ }
+ }
+ };
+
+ # Iterate over all keys
+ foreach (sort {$a cmp $b } $self->keys) {
+ if (index($_, 'D_') == 0) {
+ push @fields, _date_field(_k($_), $self->{$_});
+ }
+ elsif (index($_, 'S_') == 0) {
+ push @fields, _string_field(_k($_), $self->{$_});
+ }
+ elsif (index($_, 'T_') == 0) {
+ push @fields, _text_field(_k($_), $self->{$_});
+ }
+ # elsif (index($_, 'I_') == 0) {
+ # _int_field(_k($_), $self->{$_});
+ # }
+ elsif (index($_, 'A_') == 0) {
+ push @fields, _attachement_field(_k($_), $self->{$_});
+ }
+ elsif (index($_, 'K_') == 0) {
+ push @fields, _keywords_field(_k($_), $self->{$_});
+ }
+ else {
+ warn 'Unknown field type: ' . $_;
+ }
+ };
+
+ return \@fields;
+};
+
+sub _k {
+ my $x = substr($_[0], 2);
+ $x =~ s/_(\w)/\U$1\E/g;
+ $x =~ s/id$/ID/gi;
+ return $x;
+};
+
+
+sub _string_field {
+ return {
+ '@type' => 'koral:field',
+ type => 'type:string',
+ key => $_[0],
+ value => $_[1]
+ };
+};
+
+sub _text_field {
+ return {
+ '@type' => 'koral:field',
+ type => 'type:text',
+ key => $_[0],
+ value => $_[1]
+ };
+};
+
+sub _date_field {
+ my ($key, $value) = @_;
+ my $new_value;
+ if ($value =~ /^(\d\d\d\d)(\d\d)(\d\d)$/) {
+ $new_value = "$1";
+ if ($2 ne '00') {
+ $new_value .= "-$2";
+ if ($3 ne '00') {
+ $new_value .= "-$3";
+ };
+ };
+ };
+ return {
+ '@type' => 'koral:field',
+ type => 'type:date',
+ key => $key,
+ value => $new_value
+ };
+};
+
+sub _keywords_field {
+ return {
+ '@type' => 'koral:field',
+ type => 'type:keywords',
+ key => $_[0],
+ value => $_[1]
+ };
+};
+
+sub _attachement_field {
+ return {
+ '@type' => 'koral:field',
+ type => 'type:attachement',
+ key => $_[0],
+ value => 'data:,' . $_[1]
+ };
+};
+
1;
diff --git a/lib/KorAP/XML/Meta/I5.pm b/lib/KorAP/XML/Meta/I5.pm
index 0599b30..ed55d72 100644
--- a/lib/KorAP/XML/Meta/I5.pm
+++ b/lib/KorAP/XML/Meta/I5.pm
@@ -3,6 +3,53 @@
our $SIGLE_RE = qr/^([^_\/]+)(?:[_\/]([^\._\/]+?)(?:\.(.+?))?)?$/;
+# STRING:
+# "pubPlace",
+# "textSigle",
+# "docSigle",
+# "corpusSigle",
+# "textType",
+# "textTypeArt",
+# "textTypeRef",
+# "textColumn",
+# "textDomain",
+# "availability",
+# "language",
+# "corpusID", // Deprecated!
+# "ID" // Deprecated!
+#
+# TEXT:
+# "author",
+# "title",
+# "subTitle",
+# "corpusTitle",
+# "corpusSubTitle",
+# "corpusAuthor",
+# "docTitle",
+# "docSubTitle",
+# "docAuthor"
+#
+# KEYWORDS:
+# "textClass",
+# "foundries",
+# "keywords"
+#
+# STORE:
+# "docEditor",
+# "tokenSource",
+# "layerInfos",
+# "publisher",
+# "editor",
+# "fileEditionStatement",
+# "biblEditionStatement",
+# "reference",
+# "corpusEditor"
+#
+# DATE:
+# "pubDate",
+# "creationDate"
+
+
sub _squish ($) {
for ($_[0]) {
s!\s\s+! !g;
@@ -75,7 +122,7 @@
if ($editor && $editor->attr('role') && $editor->attr('role') eq 'translator') {
# Translator is only supported on the text level currently
$translator = _squish $editor->all_text;
- $self->{translator} = $translator if $translator;
+ $self->{A_translator} = $translator if $translator;
$editor = undef;
}
else {
@@ -88,32 +135,32 @@
# Text meta data
if ($type eq 'text') {
- unless ($self->{title} || $self->{sub_title}) {
- $self->{title} = _remove_prefix($title, $self->text_sigle) if $title;
- $self->{sub_title} = $sub_title if $sub_title;
+ unless ($self->{T_title} || $self->{T_sub_title}) {
+ $self->{T_title} = _remove_prefix($title, $self->text_sigle) if $title;
+ $self->{T_sub_title} = $sub_title if $sub_title;
};
- $self->{editor} //= $editor if $editor;
- $self->{author} //= $author if $author;
+ $self->{A_editor} //= $editor if $editor;
+ $self->{T_author} //= $author if $author;
}
# Doc meta data
elsif ($type eq 'doc') {
- unless ($self->{doc_title} || $self->{doc_sub_title}) {
- $self->{doc_title} //= _remove_prefix($title, $self->doc_sigle) if $title;
- $self->{doc_sub_title} //= $sub_title if $sub_title;
+ unless ($self->{T_doc_title} || $self->{T_doc_sub_title}) {
+ $self->{T_doc_title} //= _remove_prefix($title, $self->doc_sigle) if $title;
+ $self->{T_doc_sub_title} //= $sub_title if $sub_title;
};
- $self->{doc_author} //= $author if $author;
- $self->{doc_editor} //= $editor if $editor;
+ $self->{T_doc_author} //= $author if $author;
+ $self->{A_doc_editor} //= $editor if $editor;
}
# Corpus meta data
elsif ($type eq 'corpus') {
- unless ($self->{corpus_title} || $self->{corpus_sub_title}) {
- $self->{corpus_title} //= _remove_prefix($title, $self->corpus_sigle) if $title;
- $self->{corpus_sub_title} //= $sub_title if $sub_title;
+ unless ($self->{T_corpus_title} || $self->{T_corpus_sub_title}) {
+ $self->{T_corpus_title} //= _remove_prefix($title, $self->corpus_sigle) if $title;
+ $self->{T_corpus_sub_title} //= $sub_title if $sub_title;
};
- $self->{corpus_author} //= $author if $author;
- $self->{corpus_editor} //= $editor if $editor;
+ $self->{T_corpus_author} //= $author if $author;
+ $self->{A_corpus_editor} //= $editor if $editor;
};
};
@@ -122,12 +169,12 @@
if ($type eq 'corpus') {
# Corpus title not yet given
- unless ($self->{corpus_title}) {
+ unless ($self->{T_corpus_title}) {
if ($title = $dom->at('fileDesc > titleStmt > c\.title')) {
$title = _squish($title->all_text);
if ($title) {
- $self->{corpus_title} = _remove_prefix($title, $self->corpus_sigle);
+ $self->{T_corpus_title} = _remove_prefix($title, $self->corpus_sigle);
};
};
};
@@ -135,12 +182,12 @@
# doc title
elsif ($type eq 'doc') {
- unless ($self->{doc_title}) {
+ unless ($self->{T_doc_title}) {
if ($title = $dom->at('fileDesc > titleStmt > d\.title')) {
$title = _squish($title->all_text);
if ($title) {
- $self->{doc_title} = _remove_prefix($title, $self->doc_sigle);
+ $self->{T_doc_title} = _remove_prefix($title, $self->doc_sigle);
};
};
};
@@ -148,11 +195,11 @@
# text title
elsif ($type eq 'text') {
- unless ($self->{title}) {
+ unless ($self->{T_title}) {
if ($title = $dom->at('fileDesc > titleStmt > t\.title')) {
$title = _squish($title->all_text);
if ($title) {
- $self->{title} = _remove_prefix($title, $self->text_sigle);
+ $self->{T_title} = _remove_prefix($title, $self->text_sigle);
};
}
};
@@ -163,15 +210,15 @@
# Get PubPlace
if ($temp = $dom->at('pubPlace')) {
my $place_attr = $temp->attr('key');
- $self->{pub_place_key} = $place_attr if $place_attr;
+ $self->{S_pub_place_key} = $place_attr if $place_attr;
$temp = _squish $temp->all_text;
- $self->{pub_place} = $temp if $temp;
+ $self->{S_pub_place} = $temp if $temp;
};
# Get Publisher
if ($temp = $dom->at('imprint publisher')) {
$temp = _squish $temp->all_text;
- $self->{publisher} = $temp if $temp;
+ $self->{A_publisher} = $temp if $temp;
};
# Get text type
@@ -181,25 +228,25 @@
if ($temp) {
if ($temp_2 = $temp->at('textType')) {
$temp_2 = _squish $temp_2->all_text;
- $self->{text_type} = $temp_2 if $temp_2;
+ $self->{S_text_type} = $temp_2 if $temp_2;
};
# Get text domain
if ($temp_2 = $temp->at('textDomain')) {
$temp_2 = _squish $temp_2->all_text;
- $self->{text_domain} = $temp_2 if $temp_2;
+ $self->{S_text_domain} = $temp_2 if $temp_2;
};
# Get text type art
if ($temp_2 = $temp->at('textTypeArt')) {
$temp_2 = _squish $temp_2->all_text;
- $self->{text_type_art} = $temp_2 if $temp_2;
+ $self->{S_text_type_art} = $temp_2 if $temp_2;
};
# Get text type ref
if ($temp_2 = $temp->at('textTypeRef')) {
$temp_2 = _squish $temp_2->all_text;
- $self->{text_type_ref} = $temp_2 if $temp_2;
+ $self->{S_text_type_ref} = $temp_2 if $temp_2;
};
};
@@ -225,7 +272,7 @@
my $date = $year ? ($year < 100 ? '20' . $year : $year) : '0000';
$date .= length($month) == 1 ? '0' . $month : $month;
$date .= length($day) == 1 ? '0' . $day : $day;
- $self->{pub_date} = $date;
+ $self->{D_pub_date} = $date;
});
# creatDate
@@ -243,7 +290,7 @@
};
if ($create_date =~ /^\d{4}(?:\.\d{2}(?:\.\d{2})?)?$/) {
$create_date =~ tr/\.//d;
- $self->{creation_date} = $create_date;
+ $self->{D_creation_date} = $create_date;
};
};
@@ -259,32 +306,32 @@
push(@topic, @ttopic);
}
);
- $self->{text_class} = [@topic] if @topic > 0;
+ $self->{K_text_class} = [@topic] if @topic > 0;
- my $kws = $self->{keywords};
+ my $kws = $self->{K_keywords};
my @keywords = $temp->find("h\.keywords > keyTerm")->map(sub {_squish($_) })->grep(sub { $_ })->each;
push(@$kws, @keywords) if @keywords > 0;
};
if ($temp = $dom->at('biblFull editionStmt')) {
$temp = _squish $temp->all_text;
- $self->{bibl_edition_statement} = $temp if $temp;
+ $self->{A_bibl_edition_statement} = $temp if $temp;
};
if ($temp = $dom->at('fileDescl editionStmt')) {
$temp = _squish $temp->all_text;
- $self->{file_edition_statement} = $temp if $temp;
+ $self->{A_file_edition_statement} = $temp if $temp;
};
if ($temp = $dom->at('fileDesc')) {
if (my $availability = $temp->at('publicationStmt > availability')) {
$temp = _squish $availability->all_text;
- $self->{availability} = $temp if $temp;
+ $self->{S_availability} = $temp if $temp;
};
};
if ($temp = $dom->at('profileDesc > langUsage > language[id]')) {
- $self->{language} = $temp->attr('id') if $temp->attr('id');
+ $self->{S_language} = $temp->attr('id') if $temp->attr('id');
};
@@ -293,25 +340,24 @@
#}
# Some meta data only reevant from the text
- #els
if ($type eq 'text') {
if ($temp = $dom->at('sourceDesc reference[type=complete]')) {
if (my $ref_text = _squish $temp->all_text) {
$ref_text =~ s!$REF_RE!!;
- $self->{reference} = $ref_text;
+ $self->{A_reference} = $ref_text;
};
};
$temp = $dom->at('textDesc > column');
if ($temp && ($temp = _squish $temp->all_text)) {
- $self->{text_column} = $temp;
+ $self->{S_text_column} = $temp;
};
if ($temp = $dom->at('biblStruct biblScope[type=pp]')) {
$temp = _squish $temp->all_text;
if ($temp && $temp =~ m/(\d+)\s*-\s*(\d+)/) {
- $self->{src_pages} = $1 . '-' . $2;
+ $self->{A_src_pages} = $1 . '-' . $2;
};
};
};
diff --git a/lib/KorAP/XML/Meta/Sgbr.pm b/lib/KorAP/XML/Meta/Sgbr.pm
index 2d33975..5e3ffb9 100644
--- a/lib/KorAP/XML/Meta/Sgbr.pm
+++ b/lib/KorAP/XML/Meta/Sgbr.pm
@@ -13,18 +13,18 @@
# Publisher
try {
- $self->{publisher} = $dom->at('publisher')->all_text;
+ $self->{A_publisher} = $dom->at('publisher')->all_text;
};
# Date of publication
try {
my $date = $dom->at('date')->all_text;
- $self->{sgbr_date} = $date;
+ $self->{D_sgbr_date} = $date;
if ($date =~ s!^\s*(\d{4})-(\d{2})-(\d{2}).*$!$1$2$3!) {
- $self->{pub_date} = $date;
+ $self->{D_pub_date} = $date;
}
else {
- $self->log->warn('"' . $date . '" is not a compatible pubDate');
+ $self->log->warn('"' . $date . '" is not a compatible pubDate');
};
};
@@ -32,56 +32,56 @@
try {
my $pp = $dom->at('pubPlace');
if ($pp) {
- $self->{pub_place} = $pp->all_text if $pp->all_text;
+ $self->{S_pub_place} = $pp->all_text if $pp->all_text;
};
if ($pp->attr('ref')) {
- $self->{reference} = $pp->attr('ref');
+ $self->{A_reference} = $pp->attr('ref');
};
};
if ($stmt = $dom->at('titleStmt')) {
# Title
try {
- $stmt->find('title')->each(
- sub {
- my $type = $_->attr('type') || 'main';
- $self->{title} = $_->all_text if $type eq 'main';
+ $stmt->find('title')->each(
+ sub {
+ my $type = $_->attr('type') || 'main';
+ $self->{T_title} = $_->all_text if $type eq 'main';
- # Only support the first subtitle
- $self->{sub_title} = $_->all_text
- if $type eq 'sub' && !$self->sub_title;
- }
- );
+ # Only support the first subtitle
+ $self->{T_sub_title} = $_->all_text
+ if $type eq 'sub' && !$self->sub_title;
+ }
+ );
};
# Author
try {
- my $author = $stmt->at('author')->attr('ref');
+ my $author = $stmt->at('author')->attr('ref');
- $author = $self->{_ref_author}->{$author};
+ $author = $self->{_ref_author}->{$author};
- if ($author) {
- my $array = ($self->{keywords} //= []);
- $self->{author} = $author->{name} // $author->{id};
+ if ($author) {
+ my $array = ($self->{K_keywords} //= []);
+ $self->{T_author} = $author->{name} // $author->{id};
- if ($author->{age}) {
- $self->{'sgbr_author_age_class'} = $author->{age};
- push @$array, 'sgbrAuthorAgeClass:' . $author->{age};
- };
- if ($author->{sex}) {
- $self->{'sgbr_author_sex'} = $author->{sex};
- push @$array, 'sgbrAuthorSex:' . $author->{sex};
- };
- };
+ if ($author->{age}) {
+ $self->{'S_sgbr_author_age_class'} = $author->{age};
+ push @$array, 'sgbrAuthorAgeClass:' . $author->{age};
+ };
+ if ($author->{sex}) {
+ $self->{'S_sgbr_author_sex'} = $author->{sex};
+ push @$array, 'sgbrAuthorSex:' . $author->{sex};
+ };
+ };
};
};
try {
my $kodex = $dom->at('item[rend]')->attr('rend');
if ($kodex) {
- my $array = ($self->{keywords} //= []);
- $self->{'sgbr_kodex'} = $kodex;
- push @$array, 'sgbrKodex:' . $kodex;
+ my $array = ($self->{K_keywords} //= []);
+ $self->{'S_sgbr_kodex'} = $kodex;
+ push @$array, 'sgbrKodex:' . $kodex;
};
};
}
@@ -89,41 +89,41 @@
elsif ($type eq 'doc') {
try {
$dom->find('particDesc person')->each(
- sub {
+ sub {
- my $hash = $self->{_ref_author}->{'#' . $_->attr('xml:id')} = {
- age => $_->attr('age'),
- sex => $_->attr('sex'),
- id => $_->attr('xml:id')
- };
+ my $hash = $self->{_ref_author}->{'#' . $_->attr('xml:id')} = {
+ age => $_->attr('age'),
+ sex => $_->attr('sex'),
+ id => $_->attr('xml:id')
+ };
- # Get name
- if ($_->at('persName')) {
- $hash->{name} = $_->at('persName')->all_text;
- };
- });
+ # Get name
+ if ($_->at('persName')) {
+ $hash->{name} = $_->at('persName')->all_text;
+ };
+ });
};
try {
my $lang = $dom->at('language[ident]')->attr('ident');
- $self->{language} = $lang;
+ $self->{S_language} = $lang;
};
try {
- $self->{'funder'} = $dom->at('funder > orgName')->all_text;
+ $self->{'A_funder'} = $dom->at('funder > orgName')->all_text;
};
try {
$stmt = $dom->find('fileDesc > titleStmt > title')->each(
- sub {
- my $type = $_->attr('type') || 'main';
- $self->{doc_title} = $_->all_text if $type eq 'main';
- if ($type eq 'sub') {
- my $sub_title = $self->{doc_sub_title};
- $self->{doc_sub_title} =
- ($sub_title ? $sub_title . ', ' : '') . $_->all_text;
- };
- }
+ sub {
+ my $type = $_->attr('type') || 'main';
+ $self->{T_doc_title} = $_->all_text if $type eq 'main';
+ if ($type eq 'sub') {
+ my $sub_title = $self->{T_doc_sub_title};
+ $self->{T_doc_sub_title} =
+ ($sub_title ? $sub_title . ', ' : '') . $_->all_text;
+ };
+ }
);
};
};
diff --git a/t/annotation/meta.t b/t/annotation/meta.t
index d4773d7..deea71f 100644
--- a/t/annotation/meta.t
+++ b/t/annotation/meta.t
@@ -25,36 +25,36 @@
my $meta = $doc->meta;
-is($meta->{title}, 'Beispiel Text', 'title');
-is($meta->{sub_title}, 'Beispiel Text Untertitel', 'title');
-is($meta->{pub_date}, '20010402', 'Publication date');
-is($meta->{pub_place}, 'Mannheim', 'Publication place');
-is($meta->{author}, 'Mustermann, Max', 'Author');
+is($meta->{T_title}, 'Beispiel Text', 'title');
+is($meta->{T_sub_title}, 'Beispiel Text Untertitel', 'title');
+is($meta->{D_pub_date}, '20010402', 'Publication date');
+is($meta->{S_pub_place}, 'Mannheim', 'Publication place');
+is($meta->{T_author}, 'Mustermann, Max', 'Author');
-is($meta->{publisher}, 'Artificial articles Inc.', 'Publisher');
-is($meta->{editor}, 'Monika Mustermann', 'Editor');
-is($meta->{text_type}, 'Zeitung: Tageszeitung', 'Text Type');
-is($meta->{text_type_art}, 'Bericht', 'Text Type Art');
-is($meta->{text_type_ref}, 'Aphorismen', 'Text Type Ref');
-ok(!$meta->{text_column}, 'Text Column');
-ok(!$meta->{text_domain}, 'Text Domain');
-is($meta->{creation_date}, '19990601', 'Creation Date');
+is($meta->{A_publisher}, 'Artificial articles Inc.', 'Publisher');
+is($meta->{A_editor}, 'Monika Mustermann', 'Editor');
+is($meta->{S_text_type}, 'Zeitung: Tageszeitung', 'Text Type');
+is($meta->{S_text_type_art}, 'Bericht', 'Text Type Art');
+is($meta->{S_text_type_ref}, 'Aphorismen', 'Text Type Ref');
+ok(!$meta->{S_text_column}, 'Text Column');
+ok(!$meta->{S_text_domain}, 'Text Domain');
+is($meta->{D_creation_date}, '19990601', 'Creation Date');
ok(!$meta->{license}, 'License');
ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Edition Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Edition Statement');
-ok(!$meta->{reference}, 'Reference');
-is($meta->{language}, 'de', 'Language');
+ok(!$meta->{A_file_edition_statement}, 'File Edition Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Edition Statement');
+ok(!$meta->{A_reference}, 'Reference');
+is($meta->{S_language}, 'de', 'Language');
-is($meta->{doc_title}, 'Beispiel Dokument', 'Doc: title');
-ok(!$meta->{doc_sub_title}, 'Doc: subtitle');
-ok(!$meta->{doc_editor}, 'Doc: editor');
-ok(!$meta->{doc_author}, 'Doc: author');
+is($meta->{T_doc_title}, 'Beispiel Dokument', 'Doc: title');
+ok(!$meta->{T_doc_sub_title}, 'Doc: subtitle');
+ok(!$meta->{A_doc_editor}, 'Doc: editor');
+ok(!$meta->{T_doc_author}, 'Doc: author');
-is($meta->{corpus_title}, 'Werke von Beispiel', 'Corpus: title');
-ok(!$meta->{corpus_sub_title}, 'Corpus: subtitle');
-is($meta->{corpus_editor}, 'Mustermann, Monika', 'Corpus: editor');
-is($meta->{corpus_author}, 'Mustermann, Max', 'Corpus: author');
+is($meta->{T_corpus_title}, 'Werke von Beispiel', 'Corpus: title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus: subtitle');
+is($meta->{A_corpus_editor}, 'Mustermann, Monika', 'Corpus: editor');
+is($meta->{T_corpus_author}, 'Mustermann, Max', 'Corpus: author');
done_testing;
diff --git a/t/meta.t b/t/meta.t
index c8f580d..cb8c96a 100644
--- a/t/meta.t
+++ b/t/meta.t
@@ -28,18 +28,18 @@
is($doc->text_sigle, 'WPD/AAA/00001', 'ID');
my $meta = $doc->meta;
-is($meta->{title}, 'A', 'title');
+is($meta->{T_title}, 'A', 'title');
-ok(!$meta->{sub_title}, 'subTitle');
+ok(!$meta->{T_sub_title}, 'subTitle');
is($doc->corpus_sigle, 'WPD', 'corpusID');
-is($meta->{pub_date}, '20050328', 'pubDate');
-is($meta->{pub_place}, 'URL:http://de.wikipedia.org', 'pubPlace');
-is($meta->{text_class}->[0], 'freizeit-unterhaltung', 'TextClass');
-is($meta->{text_class}->[1], 'reisen', 'TextClass');
-is($meta->{text_class}->[2], 'wissenschaft', 'TextClass');
-is($meta->{text_class}->[3], 'populaerwissenschaft', 'TextClass');
-ok(!$meta->{text_class}->[4], 'TextClass');
-is($meta->{author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
+is($meta->{D_pub_date}, '20050328', 'pubDate');
+is($meta->{S_pub_place}, 'URL:http://de.wikipedia.org', 'pubPlace');
+is($meta->{K_text_class}->[0], 'freizeit-unterhaltung', 'TextClass');
+is($meta->{K_text_class}->[1], 'reisen', 'TextClass');
+is($meta->{K_text_class}->[2], 'wissenschaft', 'TextClass');
+is($meta->{K_text_class}->[3], 'populaerwissenschaft', 'TextClass');
+ok(!$meta->{K_text_class}->[4], 'TextClass');
+is($meta->{T_author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
#is($doc->author->[0], 'Ruru', 'author');
@@ -48,16 +48,16 @@
#ok(!$doc->author->[3], 'author');
# Additional information
-is($meta->{editor}, 'wikipedia.org', 'Editor');
-is($meta->{publisher}, 'Wikipedia', 'Publisher');
-is($meta->{creation_date}, '20050000', 'Creation date');
-ok(!$meta->{text_type}, 'No text_type');
-ok(!$meta->{text_type_art}, 'no text_type art');
-ok(!$meta->{text_type_ref}, 'no text_type ref');
-ok(!$meta->{text_domain}, 'no text_domain');
-ok(!$meta->{text_column}, 'no text_column');
-ok(!$meta->keywords('keywords'), 'no keywords');
-is($meta->keywords('text_class'), 'freizeit-unterhaltung reisen wissenschaft populaerwissenschaft', 'no text classes');
+is($meta->{A_editor}, 'wikipedia.org', 'Editor');
+is($meta->{A_publisher}, 'Wikipedia', 'Publisher');
+is($meta->{D_creation_date}, '20050000', 'Creation date');
+ok(!$meta->{S_text_type}, 'No text_type');
+ok(!$meta->{S_text_type_art}, 'no text_type art');
+ok(!$meta->{S_text_type_ref}, 'no text_type ref');
+ok(!$meta->{S_text_domain}, 'no text_domain');
+ok(!$meta->{S_text_column}, 'no text_column');
+ok(!$meta->keywords('K_keywords'), 'no keywords');
+is($meta->keywords('K_text_class'), 'freizeit-unterhaltung reisen wissenschaft populaerwissenschaft', 'no text classes');
#is($doc->coll_title, 'Wikipedia', 'Collection title');
#is($doc->coll_sub_title, 'Die freie Enzyklopädie', 'Collection subtitle');
@@ -70,28 +70,28 @@
ok($doc->parse, 'Parse document');
$meta = $doc->meta;
-is($meta->{title}, 'Fischer und Kolp im Sonnenhügel', 'title');
+is($meta->{T_title}, 'Fischer und Kolp im Sonnenhügel', 'title');
-ok(!$meta->{sub_title}, 'subTitle');
+ok(!$meta->{T_sub_title}, 'subTitle');
is($doc->text_sigle, 'A01/APR/13047', 'ID');
is($doc->corpus_sigle, 'A01', 'corpusID');
-is($meta->{pub_date}, '20010402', 'pubDate');
-ok(!$meta->{pub_place}, 'pubPlace');
-is($meta->{text_class}->[0], 'freizeit-unterhaltung', 'TextClass');
-is($meta->{text_class}->[1], 'vereine-veranstaltungen', 'TextClass');
-ok(!$meta->{text_class}->[2], 'TextClass');
-ok(!$meta->{author}, 'author');
+is($meta->{D_pub_date}, '20010402', 'pubDate');
+ok(!$meta->{S_pub_place}, 'pubPlace');
+is($meta->{K_text_class}->[0], 'freizeit-unterhaltung', 'TextClass');
+is($meta->{K_text_class}->[1], 'vereine-veranstaltungen', 'TextClass');
+ok(!$meta->{K_text_class}->[2], 'TextClass');
+ok(!$meta->{T_author}, 'author');
# Additional information
-ok(!$meta->{editor}, 'Editor');
-ok(!$meta->{publisher}, 'Publisher');
-is($meta->{creation_date}, '20010402', 'Creation date');
+ok(!$meta->{A_editor}, 'Editor');
+ok(!$meta->{A_publisher}, 'Publisher');
+is($meta->{D_creation_date}, '20010402', 'Creation date');
#ok(!$doc->coll_title, 'Collection title');
#ok(!$doc->coll_sub_title, 'Collection subtitle');
#ok(!$doc->coll_editor, 'Collection editor');
#ok(!$doc->coll_author, 'Collection author');
-ok(!$meta->{text_type}, 'text_type');
-is($meta->{text_type_art}, 'Bericht', 'text_type art');
+ok(!$meta->{S_text_type}, 'text_type');
+is($meta->{S_text_type_art}, 'Bericht', 'text_type art');
# ERL/0001
$path = catdir(dirname(__FILE__), 'corpus/ERL/00001');
@@ -100,23 +100,23 @@
ok($doc->parse, 'Parse document');
$meta = $doc->meta;
-is($meta->{title}, 'Amtsblatt des Landesbezirks Baden [diverse Erlasse]', 'title'); # Amtsblatt des Landesbezirks Baden [diverse Erlasse]
+is($meta->{T_title}, 'Amtsblatt des Landesbezirks Baden [diverse Erlasse]', 'title'); # Amtsblatt des Landesbezirks Baden [diverse Erlasse]
# MK2/ERL.00001
-ok(!$meta->{sub_title}, 'subTitle');
+ok(!$meta->{T_sub_title}, 'subTitle');
is($doc->text_sigle, 'MK2/ERL/00001', 'ID');
is($doc->corpus_sigle, 'MK2', 'corpusID');
-is($meta->{pub_date}, '00000000', 'pubDate');
-is($meta->{pub_place}, 'Karlsruhe', 'pubPlace');
-is($meta->{text_class}->[0], 'politik', 'TextClass');
-is($meta->{text_class}->[1], 'kommunalpolitik', 'TextClass');
-ok(!$meta->{text_class}->[2], 'TextClass');
-ok(!$meta->{author}, 'author');
+is($meta->{D_pub_date}, '00000000', 'pubDate');
+is($meta->{S_pub_place}, 'Karlsruhe', 'pubPlace');
+is($meta->{K_text_class}->[0], 'politik', 'TextClass');
+is($meta->{K_text_class}->[1], 'kommunalpolitik', 'TextClass');
+ok(!$meta->{K_text_class}->[2], 'TextClass');
+ok(!$meta->{T_author}, 'author');
# Additional information
-ok(!$meta->{editor}, 'Editor');
-is($meta->{publisher}, 'Badenia Verlag und Druckerei', 'Publisher');
-is($meta->{creation_date}, '19600000', 'Creation date');
+ok(!$meta->{A_editor}, 'Editor');
+is($meta->{A_publisher}, 'Badenia Verlag und Druckerei', 'Publisher');
+is($meta->{D_creation_date}, '19600000', 'Creation date');
# !!!
# diag 'Non-acceptance of creation date ranges may be temporary';
@@ -126,8 +126,8 @@
#ok(!$doc->coll_sub_title, 'Collection subtitle');
#ok(!$doc->coll_editor, 'Collection editor');
#ok(!$doc->coll_author, 'Collection author');
-is($meta->{text_type}, 'Erlass', 'text_type');
-ok(!$meta->{text_type_art}, 'text_type art');
+is($meta->{S_text_type}, 'Erlass', 'text_type');
+ok(!$meta->{S_text_type_art}, 'text_type art');
# A01/02035-substring
@@ -137,27 +137,27 @@
$meta = $doc->meta;
-is($meta->{title}, 'St. Galler Tagblatt, 11.01.2000, Ressort: TB-RSP (Abk.)', 'title'); # A00/JAN.02035
-ok(!$meta->{sub_title}, 'subTitle');
+is($meta->{T_title}, 'St. Galler Tagblatt, 11.01.2000, Ressort: TB-RSP (Abk.)', 'title'); # A00/JAN.02035
+ok(!$meta->{T_sub_title}, 'subTitle');
is($doc->text_sigle, 'A00/JAN/02035', 'ID');
is($doc->corpus_sigle, 'A00', 'corpusID');
-is($meta->{pub_date}, '20000111', 'pubDate');
-ok(!$meta->{pub_place}, 'pubPlace');
-is($meta->{text_class}->[0], 'sport', 'TextClass');
-is($meta->{text_class}->[1], 'ballsport', 'TextClass');
-ok(!$meta->{text_class}->[2], 'TextClass');
-ok(!$meta->{author}, 'author');
+is($meta->{D_pub_date}, '20000111', 'pubDate');
+ok(!$meta->{S_pub_place}, 'pubPlace');
+is($meta->{K_text_class}->[0], 'sport', 'TextClass');
+is($meta->{K_text_class}->[1], 'ballsport', 'TextClass');
+ok(!$meta->{K_text_class}->[2], 'TextClass');
+ok(!$meta->{T_author}, 'author');
# Additional information
-ok(!$meta->{editor}, 'Editor');
-ok(!$meta->{publisher}, 'Publisher');
-is($meta->{creation_date}, "20000111", 'Creation date');
+ok(!$meta->{A_editor}, 'Editor');
+ok(!$meta->{A_publisher}, 'Publisher');
+is($meta->{D_creation_date}, "20000111", 'Creation date');
#ok(!$doc->coll_title, 'Collection title');
#ok(!$doc->coll_sub_title, 'Collection subtitle');
#ok(!$doc->coll_editor, 'Collection editor');
#ok(!$doc->coll_author, 'Collection author');
-ok(!$meta->{text_type}, 'text_type');
-is($meta->{text_type_art}, 'Bericht', 'text_type art');
+ok(!$meta->{S_text_type}, 'text_type');
+is($meta->{S_text_type_art}, 'Bericht', 'text_type art');
# A01/02873-meta
$path = catdir(dirname(__FILE__), 'corpus/A00/02873-meta');
@@ -165,28 +165,28 @@
ok($doc->parse, 'Parse document');
$meta = $doc->meta;
-is($meta->{title}, 'Tradition und Moderne', 'title');
-ok(!$meta->{sub_title}, 'subTitle');
+is($meta->{T_title}, 'Tradition und Moderne', 'title');
+ok(!$meta->{T_sub_title}, 'subTitle');
is($doc->text_sigle, 'A00/JAN/02873', 'ID');
is($doc->corpus_sigle, 'A00', 'corpusID');
-is($meta->{pub_date}, '20000113', 'pubDate');
-ok(!$meta->{pub_place}, 'pubPlace');
-is($meta->{text_class}->[0], 'kultur', 'TextClass');
-is($meta->{text_class}->[1], 'film', 'TextClass');
-ok(!$meta->{text_class}->[2], 'TextClass');
-ok(!$meta->{author}, 'author');
+is($meta->{D_pub_date}, '20000113', 'pubDate');
+ok(!$meta->{S_pub_place}, 'pubPlace');
+is($meta->{K_text_class}->[0], 'kultur', 'TextClass');
+is($meta->{K_text_class}->[1], 'film', 'TextClass');
+ok(!$meta->{K_text_class}->[2], 'TextClass');
+ok(!$meta->{T_author}, 'author');
# Additional information
-ok(!$meta->{editor}, 'Editor');
-ok(!$meta->{publisher}, 'Publisher');
-is($meta->{creation_date}, "20000113", 'Creation date');
+ok(!$meta->{A_editor}, 'Editor');
+ok(!$meta->{A_publisher}, 'Publisher');
+is($meta->{D_creation_date}, "20000113", 'Creation date');
#ok(!$doc->coll_title, 'Collection title');
#ok(!$doc->coll_sub_title, 'Collection subtitle');
#ok(!$doc->coll_editor, 'Collection editor');
#ok(!$doc->coll_author, 'Collection author');
-ok(!$meta->{text_type}, 'text_type');
-is($meta->{text_type_art}, 'Bericht', 'text_type art');
+ok(!$meta->{S_text_type}, 'text_type');
+is($meta->{S_text_type_art}, 'Bericht', 'text_type art');
# A01/05663-unbalanced
@@ -195,28 +195,28 @@
ok($doc->parse, 'Parse document');
$meta = $doc->meta;
-is($meta->{title}, 'Mehr Arbeitslose im Dezember', 'title');
-ok(!$meta->{sub_title}, 'subTitle');
+is($meta->{T_title}, 'Mehr Arbeitslose im Dezember', 'title');
+ok(!$meta->{T_sub_title}, 'subTitle');
is($doc->text_sigle, 'A00/JAN/05663', 'ID');
is($doc->corpus_sigle, 'A00', 'corpusID');
-is($meta->{pub_date}, '20000124', 'pubDate');
-ok(!$meta->{pub_place}, 'pubPlace');
-is($meta->{text_class}->[0], 'gesundheit-ernaehrung', 'TextClass');
-is($meta->{text_class}->[1], 'gesundheit', 'TextClass');
-ok(!$meta->{text_class}->[2], 'TextClass');
-ok(!$meta->{author}, 'author');
+is($meta->{D_pub_date}, '20000124', 'pubDate');
+ok(!$meta->{S_pub_place}, 'pubPlace');
+is($meta->{K_text_class}->[0], 'gesundheit-ernaehrung', 'TextClass');
+is($meta->{K_text_class}->[1], 'gesundheit', 'TextClass');
+ok(!$meta->{K_text_class}->[2], 'TextClass');
+ok(!$meta->{T_author}, 'author');
# Additional information
-ok(!$meta->{editor}, 'Editor');
-ok(!$meta->{publisher}, 'Publisher');
-is($meta->{creation_date}, "20000124", 'Creation date');
+ok(!$meta->{A_editor}, 'Editor');
+ok(!$meta->{A_publisher}, 'Publisher');
+is($meta->{D_creation_date}, "20000124", 'Creation date');
#ok(!$doc->coll_title, 'Collection title');
#ok(!$doc->coll_sub_title, 'Collection subtitle');
#ok(!$doc->coll_editor, 'Collection editor');
#ok(!$doc->coll_author, 'Collection author');
-ok(!$meta->{text_type}, 'text_type');
-is($meta->{text_type_art}, 'Bericht', 'text_type art');
+ok(!$meta->{S_text_type}, 'text_type');
+is($meta->{S_text_type_art}, 'Bericht', 'text_type art');
# A01/07452-deep
$path = catdir(dirname(__FILE__), 'corpus/A00/07452-deep');
@@ -224,28 +224,28 @@
ok($doc->parse, 'Parse document');
$meta = $doc->meta;
-is($meta->{title}, 'Wil im Dezember 1999', 'title');
-ok(!$meta->{sub_title}, 'subTitle');
+is($meta->{T_title}, 'Wil im Dezember 1999', 'title');
+ok(!$meta->{T_sub_title}, 'subTitle');
is($doc->text_sigle, 'A00/JAN/07452', 'ID');
is($doc->corpus_sigle, 'A00', 'corpusID');
-is($meta->{pub_date}, '20000129', 'pubDate');
-ok(!$meta->{pub_place}, 'pubPlace');
-is($meta->{text_class}->[0], 'politik', 'TextClass');
-is($meta->{text_class}->[1], 'kommunalpolitik', 'TextClass');
-ok(!$meta->{text_class}->[2], 'TextClass');
-ok(!$meta->{author}, 'author');
+is($meta->{D_pub_date}, '20000129', 'pubDate');
+ok(!$meta->{S_pub_place}, 'pubPlace');
+is($meta->{K_text_class}->[0], 'politik', 'TextClass');
+is($meta->{K_text_class}->[1], 'kommunalpolitik', 'TextClass');
+ok(!$meta->{K_text_class}->[2], 'TextClass');
+ok(!$meta->{T_author}, 'author');
# Additional information
-ok(!$meta->{editor}, 'Editor');
-ok(!$meta->{publisher}, 'Publisher');
-is($meta->{creation_date}, "20000129", 'Creation date');
+ok(!$meta->{A_editor}, 'Editor');
+ok(!$meta->{A_publisher}, 'Publisher');
+is($meta->{D_creation_date}, "20000129", 'Creation date');
#ok(!$doc->coll_title, 'Collection title');
#ok(!$doc->coll_sub_title, 'Collection subtitle');
#ok(!$doc->coll_editor, 'Collection editor');
#ok(!$doc->coll_author, 'Collection author');
-ok(!$meta->{text_type}, 'text_type');
-is($meta->{text_type_art}, 'Bericht', 'text_type art');
+ok(!$meta->{S_text_type}, 'text_type');
+is($meta->{S_text_type_art}, 'Bericht', 'text_type art');
# ART
$path = catdir(dirname(__FILE__), 'corpus/artificial');
@@ -259,31 +259,31 @@
$meta = $doc->meta;
# Metdata
-is($meta->{title}, 'Artificial Title', 'title');
-is($meta->{sub_title}, 'Artificial Subtitle', 'subTitle');
+is($meta->{T_title}, 'Artificial Title', 'title');
+is($meta->{T_sub_title}, 'Artificial Subtitle', 'subTitle');
is($doc->text_sigle, 'ART/ABC/00001', 'ID');
is($doc->corpus_sigle, 'ART', 'corpusID');
-is($meta->{pub_date}, '20010402', 'pubDate');
-is($meta->{pub_place}, 'Mannheim', 'pubPlace');
-is($meta->{pub_place_key}, 'DE', 'pubPlace key');
-is($meta->{text_class}->[0], 'freizeit-unterhaltung', 'TextClass');
-is($meta->{text_class}->[1], 'vereine-veranstaltungen', 'TextClass');
-ok(!$meta->{text_class}->[2], 'TextClass');
+is($meta->{D_pub_date}, '20010402', 'pubDate');
+is($meta->{S_pub_place}, 'Mannheim', 'pubPlace');
+is($meta->{S_pub_place_key}, 'DE', 'pubPlace key');
+is($meta->{K_text_class}->[0], 'freizeit-unterhaltung', 'TextClass');
+is($meta->{K_text_class}->[1], 'vereine-veranstaltungen', 'TextClass');
+ok(!$meta->{K_text_class}->[2], 'TextClass');
#is($doc->author->[0], 'Ruru', 'author');
#is($doc->author->[1], 'Jens.Ol', 'author');
#is($doc->author->[2], 'Aglarech', 'author');
-is($meta->{author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
+is($meta->{T_author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
# Additional information
-is($meta->{editor}, 'Nils Diewald', 'Editor');
-is($meta->{publisher}, 'Artificial articles Inc.', 'Publisher');
-is($meta->{creation_date}, '19990601', 'Creation date');
+is($meta->{A_editor}, 'Nils Diewald', 'Editor');
+is($meta->{A_publisher}, 'Artificial articles Inc.', 'Publisher');
+is($meta->{D_creation_date}, '19990601', 'Creation date');
#is($doc->coll_title, 'Artificial articles', 'Collection title');
#is($doc->coll_sub_title, 'Best of!', 'Collection subtitle');
#is($doc->coll_editor, 'Nils Diewald', 'Collection editor');
#is($doc->coll_author, 'Nils Diewald', 'Collection author');
-is($meta->{text_type}, 'Zeitung: Tageszeitung', 'No text_type');
-is($meta->{text_type_art}, 'Bericht', 'text_type art');
+is($meta->{S_text_type}, 'Zeitung: Tageszeitung', 'No text_type');
+is($meta->{S_text_type_art}, 'Bericht', 'text_type art');
# Multipath headers
@@ -301,43 +301,43 @@
is($doc->doc_sigle, 'VDI14/JAN', 'doc sigle');
is($meta->corpus_sigle, 'VDI14', 'corpus sigle');
-is($meta->{title}, '10- Zz mit Zahl', 'title');
+is($meta->{T_title}, '10- Zz mit Zahl', 'title');
-ok(!$meta->{sub_title}, 'subtitle');
-is($meta->{pub_date}, '20140117', 'pubdate');
-is($meta->{pub_place}, 'Düsseldorf', 'pubplace');
-is($meta->{author}, 'Windhövel, Kerstin', 'author');
-is($meta->{publisher}, 'VDI Verlag GmbH', 'publisher');
-ok(!$meta->{editor}, 'editor');
+ok(!$meta->{T_sub_title}, 'subtitle');
+is($meta->{D_pub_date}, '20140117', 'pubdate');
+is($meta->{S_pub_place}, 'Düsseldorf', 'pubplace');
+is($meta->{T_author}, 'Windhövel, Kerstin', 'author');
+is($meta->{A_publisher}, 'VDI Verlag GmbH', 'publisher');
+ok(!$meta->{A_editor}, 'editor');
-ok(!$meta->{text_type}, 'text type');
-ok(!$meta->{text_type_art}, 'text type art');
-ok(!$meta->{text_type_ref}, 'text type ref');
-ok(!$meta->{text_column}, 'text column');
-ok(!$meta->{text_domain}, 'text domain');
-ok(!$meta->{creation_date}, 'creation date');
-ok(!$meta->{availability}, 'License');
+ok(!$meta->{S_text_type}, 'text type');
+ok(!$meta->{S_text_type_art}, 'text type art');
+ok(!$meta->{S_text_type_ref}, 'text type ref');
+ok(!$meta->{S_text_column}, 'text column');
+ok(!$meta->{S_text_domain}, 'text domain');
+ok(!$meta->{D_creation_date}, 'creation date');
+ok(!$meta->{S_availability}, 'License');
ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'file edition statement');
-ok(!$meta->{bibl_edition_statement}, 'bibl edition statement');
-is($meta->{reference}, 'VDI nachrichten, 17.01.2014, S. 10; 10- Zz mit Zahl [Ausführliche Zitierung nicht verfügbar]', 'Reference');
+ok(!$meta->{A_file_edition_statement}, 'file edition statement');
+ok(!$meta->{A_bibl_edition_statement}, 'bibl edition statement');
+is($meta->{A_reference}, 'VDI nachrichten, 17.01.2014, S. 10; 10- Zz mit Zahl [Ausführliche Zitierung nicht verfügbar]', 'Reference');
-ok(!$doc->{language}, 'Language');
+ok(!$doc->{S_language}, 'Language');
# !!!
# diag 'This may be "de" in the future';
-is($meta->{doc_title}, 'VDI nachrichten, Januar 2014', 'Doc title');
-ok(!$meta->{doc_sub_title}, 'Doc Sub title');
-ok(!$meta->{doc_editor}, 'Doc editor');
-ok(!$meta->{doc_author}, 'Doc author');
+is($meta->{T_doc_title}, 'VDI nachrichten, Januar 2014', 'Doc title');
+ok(!$meta->{T_doc_sub_title}, 'Doc Sub title');
+ok(!$meta->{A_doc_editor}, 'Doc editor');
+ok(!$meta->{T_doc_author}, 'Doc author');
-is($meta->{corpus_title}, 'VDI nachrichten', 'Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Corpus Sub title');
-is($meta->{corpus_editor}, 'Verein Deutscher Ingenieure', 'Corpus editor');
-ok(!$meta->{corpus_author}, 'Corpus author');
+is($meta->{T_corpus_title}, 'VDI nachrichten', 'Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus Sub title');
+is($meta->{A_corpus_editor}, 'Verein Deutscher Ingenieure', 'Corpus editor');
+ok(!$meta->{T_corpus_author}, 'Corpus author');
-is($meta->keywords('keywords'), '', 'Keywords');
-is($meta->keywords('text_class'), 'Freizeit-Unterhaltung Reisen Politik Ausland', 'Text class');
+is($meta->keywords('K_keywords'), '', 'Keywords');
+is($meta->keywords('K_text_class'), 'Freizeit-Unterhaltung Reisen Politik Ausland', 'Text class');
# WDD
$path = catdir(dirname(__FILE__), 'corpus/WDD/G27/38989');
@@ -350,44 +350,44 @@
is($doc->doc_sigle, 'WDD11/G27', 'doc sigle');
is($doc->corpus_sigle, 'WDD11', 'corpus sigle');
-is($meta->{title}, 'Diskussion:Gunter A. Pilz', 'title');
-ok(!$meta->{sub_title}, 'subtitle');
-is($meta->{pub_date}, '20111029', 'pubdate');
-is($meta->{pub_place}, 'URL:http://de.wikipedia.org', 'pubplace');
+is($meta->{T_title}, 'Diskussion:Gunter A. Pilz', 'title');
+ok(!$meta->{T_sub_title}, 'subtitle');
+is($meta->{D_pub_date}, '20111029', 'pubdate');
+is($meta->{S_pub_place}, 'URL:http://de.wikipedia.org', 'pubplace');
-is($meta->{author}, '€pa, u.a.', 'author');
-is($meta->{publisher}, 'Wikipedia', 'publisher');
-is($meta->{editor}, 'wikipedia.org', 'Editor');
+is($meta->{T_author}, '€pa, u.a.', 'author');
+is($meta->{A_publisher}, 'Wikipedia', 'publisher');
+is($meta->{A_editor}, 'wikipedia.org', 'Editor');
-is($meta->{text_type}, 'Diskussionen zu Enzyklopädie-Artikeln', 'text type');
-ok(!$meta->{text_type_art}, 'text type art');
-ok(!$meta->{text_type_ref}, 'text type ref');
-ok(!$meta->{text_column}, 'text column');
-ok(!$meta->{text_domain}, 'text domain');
+is($meta->{S_text_type}, 'Diskussionen zu Enzyklopädie-Artikeln', 'text type');
+ok(!$meta->{S_text_type_art}, 'text type art');
+ok(!$meta->{S_text_type_ref}, 'text type ref');
+ok(!$meta->{S_text_column}, 'text column');
+ok(!$meta->{S_text_domain}, 'text domain');
-is($meta->{creation_date}, '20070707', 'creation date');
-is($meta->{availability}, 'CC-BY-SA', 'License');
+is($meta->{D_creation_date}, '20070707', 'creation date');
+is($meta->{S_availability}, 'CC-BY-SA', 'License');
ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'file edition statement');
-ok(!$meta->{bibl_edition_statement}, 'bibl edition statement');
-is($meta->{reference}, 'Diskussion:Gunter A. Pilz, In: Wikipedia - URL:http://de.wikipedia.org/wiki/Diskussion:Gunter_A._Pilz: Wikipedia, 2007', 'Reference');
+ok(!$meta->{A_file_edition_statement}, 'file edition statement');
+ok(!$meta->{A_bibl_edition_statement}, 'bibl edition statement');
+is($meta->{A_reference}, 'Diskussion:Gunter A. Pilz, In: Wikipedia - URL:http://de.wikipedia.org/wiki/Diskussion:Gunter_A._Pilz: Wikipedia, 2007', 'Reference');
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-is($meta->{doc_title}, 'Wikipedia, Diskussionen zu Artikeln mit Anfangsbuchstabe G, Teil 27', 'Doc title');
-ok(!$meta->{doc_sub_title}, 'Doc Sub title');
-ok(!$meta->{doc_editor}, 'Doc editor');
-ok(!$meta->{doc_author}, 'Doc author');
+is($meta->{T_doc_title}, 'Wikipedia, Diskussionen zu Artikeln mit Anfangsbuchstabe G, Teil 27', 'Doc title');
+ok(!$meta->{T_doc_sub_title}, 'Doc Sub title');
+ok(!$meta->{A_doc_editor}, 'Doc editor');
+ok(!$meta->{T_doc_author}, 'Doc author');
-is($meta->{corpus_title}, 'Wikipedia', 'Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Corpus Sub title');
-is($meta->{corpus_editor}, 'wikipedia.org', 'Corpus editor');
-ok(!$meta->{corpus_author}, 'Corpus author');
+is($meta->{T_corpus_title}, 'Wikipedia', 'Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus Sub title');
+is($meta->{A_corpus_editor}, 'wikipedia.org', 'Corpus editor');
+ok(!$meta->{T_corpus_author}, 'Corpus author');
is($meta->keywords('keywords'), '', 'Keywords');
is($meta->keywords('text_class'), '', 'Text class');
-is($meta->{availability}, 'CC-BY-SA', 'Availability');
+is($meta->{S_availability}, 'CC-BY-SA', 'Availability');
use_ok('KorAP::XML::Meta::I5');
@@ -397,42 +397,42 @@
ok($meta->parse($dom->at('idsHeader'), 'corpus'), 'Parse corpus header');
my $hash = $meta->to_hash;
-is($hash->{availability}, 'CC-BY-SA', 'Availability');
-is($hash->{language}, 'de', 'Language');
-is($hash->{corpus_title}, 'Reden und Interviews', 'Corpus title');
+is($hash->{S_availability}, 'CC-BY-SA', 'Availability');
+is($hash->{S_language}, 'de', 'Language');
+is($hash->{T_corpus_title}, 'Reden und Interviews', 'Corpus title');
is($hash->{corpus_sigle}, 'REI', 'Corpus Sigle');
ok($meta->parse($dom->find('idsHeader')->[1], 'doc'), 'Parse corpus header');
$hash = $meta->to_hash;
-is($hash->{availability}, 'CC-BY-SA', 'Availability');
-is($hash->{language}, 'de', 'Language');
-is($hash->{corpus_title}, 'Reden und Interviews', 'Corpus title');
+is($hash->{S_availability}, 'CC-BY-SA', 'Availability');
+is($hash->{S_language}, 'de', 'Language');
+is($hash->{T_corpus_title}, 'Reden und Interviews', 'Corpus title');
is($hash->{corpus_sigle}, 'REI', 'Corpus Sigle');
is($hash->{doc_sigle}, 'REI/BNG', 'Document Sigle');
-is($hash->{doc_title}, 'Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)', 'Document Sigle');
+is($hash->{T_doc_title}, 'Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)', 'Document Sigle');
ok($meta->parse($dom->find('idsHeader')->[2], 'text'), 'Parse corpus header');
$hash = $meta->to_hash;
-is($hash->{availability}, 'CC-BY-SA', 'Availability');
-is($hash->{language}, 'de', 'Language');
-is($hash->{corpus_title}, 'Reden und Interviews', 'Corpus title');
+is($hash->{S_availability}, 'CC-BY-SA', 'Availability');
+is($hash->{S_language}, 'de', 'Language');
+is($hash->{T_corpus_title}, 'Reden und Interviews', 'Corpus title');
is($hash->{corpus_sigle}, 'REI', 'Corpus Sigle');
is($hash->{doc_sigle}, 'REI/BNG', 'Document Sigle');
-is($hash->{doc_title}, 'Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)', 'Document Sigle');
+is($hash->{T_doc_title}, 'Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)', 'Document Sigle');
is($hash->{text_sigle}, 'REI/BNG/00001');
-is($hash->{title}, 'Energiewirtschaft');
-is($hash->{sub_title}, 'Rede im Deutschen Bundestag am 19.01.2002');
-is($hash->{creation_date}, '20020119');
-is($hash->{pub_date}, '20020119');
-is($hash->{pub_place_key}, 'DE');
-is($hash->{reference}, 'Hustedt, Michaele: Energiewirtschaft. Rede im Deutschen Bundestag am 19.01.2002, Hrsg: Bundestagsfraktion Bündnis 90/DIE GRÜNEN [Ausführliche Zitierung nicht verfügbar]');
-is($hash->{text_class}->[0], 'politik');
-is($hash->{text_class}->[1], 'inland');
-is($hash->{author}, 'Hustedt, Michaele');
-is($hash->{pub_place}, 'Berlin');
+is($hash->{T_title}, 'Energiewirtschaft');
+is($hash->{T_sub_title}, 'Rede im Deutschen Bundestag am 19.01.2002');
+is($hash->{D_creation_date}, '20020119');
+is($hash->{D_pub_date}, '20020119');
+is($hash->{S_pub_place_key}, 'DE');
+is($hash->{A_reference}, 'Hustedt, Michaele: Energiewirtschaft. Rede im Deutschen Bundestag am 19.01.2002, Hrsg: Bundestagsfraktion Bündnis 90/DIE GRÜNEN [Ausführliche Zitierung nicht verfügbar]');
+is($hash->{K_text_class}->[0], 'politik');
+is($hash->{K_text_class}->[1], 'inland');
+is($hash->{T_author}, 'Hustedt, Michaele');
+is($hash->{S_pub_place}, 'Berlin');
# UMB45/D38/00001
@@ -447,7 +447,7 @@
is($doc->doc_sigle, 'UMB45/D38', 'doc sigle');
is($doc->corpus_sigle, 'UMB45', 'corpus sigle');
-is($meta->{title}, 'In: Über Schuld und Aufgabe der geistigen Führungsschicht im deutschen politischen Leben der Gegenwart. - Göttingen, 1955', 'title');
+is($meta->{T_title}, 'In: Über Schuld und Aufgabe der geistigen Führungsschicht im deutschen politischen Leben der Gegenwart. - Göttingen, 1955', 'title');
done_testing;
diff --git a/t/meta_caching.t b/t/meta_caching.t
index a0787fa..14e3826 100644
--- a/t/meta_caching.t
+++ b/t/meta_caching.t
@@ -37,17 +37,17 @@
# REI
my $rei = $cache->get('REI');
-is($rei->{availability}, 'CC-BY-SA');
-is($rei->{language}, 'de');
-is($rei->{corpus_title}, 'Reden und Interviews');
+is($rei->{S_availability}, 'CC-BY-SA');
+is($rei->{S_language}, 'de');
+is($rei->{T_corpus_title}, 'Reden und Interviews');
# REI/BNG
my $rei_bng = $cache->get('REI/BNG');
-is($rei_bng->{availability}, 'CC-BY-SA');
-is($rei_bng->{language}, 'de');
-is($rei_bng->{corpus_title}, 'Reden und Interviews');
-is($rei_bng->{doc_title}, 'Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)');
+is($rei_bng->{S_availability}, 'CC-BY-SA');
+is($rei_bng->{S_language}, 'de');
+is($rei_bng->{T_corpus_title}, 'Reden und Interviews');
+is($rei_bng->{T_doc_title}, 'Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)');
done_testing;
__END__
diff --git a/t/meta_koral.t b/t/meta_koral.t
new file mode 100644
index 0000000..931f106
--- /dev/null
+++ b/t/meta_koral.t
@@ -0,0 +1,106 @@
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use Benchmark ':hireswallclock';
+use Mojo::DOM;
+use Mojo::File;
+use Mojo::ByteStream 'b';
+use Data::Dumper;
+
+use File::Basename 'dirname';
+use File::Spec::Functions 'catdir';
+
+use_ok('KorAP::XML::Krill');
+
+# WPD/00001
+my $path = catdir(dirname(__FILE__), 'corpus/WPD/00001');
+ok(my $doc = KorAP::XML::Krill->new( path => $path . '/' ), 'Load Korap::Document');
+like($doc->path, qr!\Q$path\E/!, 'Path');
+
+ok($doc = KorAP::XML::Krill->new( path => $path ), 'Load Korap::Document');
+like($doc->path, qr!\Q$path\E/$!, 'Path');
+
+ok($doc->parse, 'Parse document');
+
+my $meta = $doc->meta;
+
+my $fields = $meta->to_koral_fields;
+
+# TODO: Check for foundries, tokenSource, layerInfos!
+
+_contains($fields, 'title', 'A', 'text');
+_contains($fields, 'textSigle', 'WPD/AAA/00001', 'string');
+_contains($fields, 'docSigle', 'WPD/AAA', 'string');
+_contains($fields, 'corpusSigle', 'WPD', 'string');
+_contains($fields, 'pubDate', '2005-03-28', 'date');
+_contains($fields, 'pubPlace', 'URL:http://de.wikipedia.org', 'string');
+_contains($fields, 'textClass', 'freizeit-unterhaltung reisen wissenschaft populaerwissenschaft', 'keywords');
+_contains($fields, 'author', 'Ruru; Jens.Ol; Aglarech; u.a.', 'text');
+
+_contains($fields, 'editor', 'data:,wikipedia.org', 'attachement');
+_contains($fields, 'publisher', 'data:,Wikipedia', 'attachement');
+_contains($fields, 'creationDate', '2005', 'date');
+_contains_not($fields, 'textType');
+_contains_not($fields, 'textTypeArt');
+_contains_not($fields, 'textTypeRef');
+_contains_not($fields, 'textDomain');
+_contains_not($fields, 'keywords');
+
+# diag Dumper $fields;
+
+_contains_not($fields, 'subTitle');
+
+
+sub _contains {
+ my ($fields, $key, $value, $type) = @_;
+
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my $tb = Test::More->builder;
+
+ foreach (@$fields) {
+ if ($_->{key} eq $key) {
+
+ my $cmp_value = $_->{value};
+ if ($_->{type} eq 'type:keywords' && ref($cmp_value) eq 'ARRAY') {
+ $cmp_value = join(' ', @{$cmp_value});
+ };
+
+ if ($cmp_value eq $value) {
+ if ($_->{type} eq 'type:' . $type) {
+ $tb->ok(1, 'Contains ' . $key);
+ }
+ else {
+ $tb->ok(0, 'Contains ' . $key . ' but type ' . $_->{type} . ' != ' . $type);
+ };
+ }
+ else {
+ $tb->ok(0, 'Contains ' . $key . ' but value ' . $cmp_value . ' != ' . $value);
+ };
+ return;
+ }
+ };
+
+ $tb->ok(0, 'Contains ' . $key);
+};
+
+sub _contains_not {
+ my ($fields, $key) = @_;
+
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my $tb = Test::More->builder;
+
+ foreach (@$fields) {
+ if ($_->{key} eq $key) {
+ $tb->ok(0, 'Contains not ' . $key);
+ return;
+ }
+ };
+
+ $tb->ok(1, 'Contains not ' . $key);
+};
+
+done_testing;
+__END__
diff --git a/t/real/bzk.t b/t/real/bzk.t
index 90a0c58..f45200f 100644
--- a/t/real/bzk.t
+++ b/t/real/bzk.t
@@ -34,45 +34,45 @@
is($doc->corpus_sigle, 'BZK', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, 'Unser gemeinsames Werk wird siegreich sein', 'Title');
-ok(!$meta->{sub_title}, 'No SubTitle');
-ok(!$meta->{author}, 'Author');
-ok(!$meta->{editor}, 'Editor');
-is($meta->{pub_place}, 'Berlin', 'PubPlace');
-ok(!$meta->{publisher}, 'Publisher');
+is($meta->{T_title}, 'Unser gemeinsames Werk wird siegreich sein', 'Title');
+ok(!$meta->{T_sub_title}, 'No SubTitle');
+ok(!$meta->{T_author}, 'Author');
+ok(!$meta->{A_editor}, 'Editor');
+is($meta->{S_pub_place}, 'Berlin', 'PubPlace');
+ok(!$meta->{A_publisher}, 'Publisher');
-is($meta->{text_type}, 'Zeitung: Tageszeitung', 'Correct Text Type');
+is($meta->{S_text_type}, 'Zeitung: Tageszeitung', 'Correct Text Type');
-ok(!$meta->{text_type_art}, 'Correct Text Type Art');
-is($meta->{text_type_ref}, 'Tageszeitung', 'Correct Text Type Ref');
-is($meta->{text_domain}, 'Politik', 'Correct Text Domain');
-is($meta->{text_column}, 'POLITIK', 'Correct Text Column');
-is($meta->{text_class}->[0], 'politik', 'Correct Text Class');
-is($meta->{text_class}->[1], 'ausland', 'Correct Text Class');
-ok(!$meta->{text_class}->[2], 'Correct Text Class');
+ok(!$meta->{S_text_type_art}, 'Correct Text Type Art');
+is($meta->{S_text_type_ref}, 'Tageszeitung', 'Correct Text Type Ref');
+is($meta->{S_text_domain}, 'Politik', 'Correct Text Domain');
+is($meta->{S_text_column}, 'POLITIK', 'Correct Text Column');
+is($meta->{K_text_class}->[0], 'politik', 'Correct Text Class');
+is($meta->{K_text_class}->[1], 'ausland', 'Correct Text Class');
+ok(!$meta->{K_text_class}->[2], 'Correct Text Class');
-is($meta->{pub_date}, '19590101', 'Creation date');
-is($meta->{creation_date}, '19590101', 'Creation date');
-is($meta->{availability}, 'ACA-NC-LC', 'License');
+is($meta->{D_pub_date}, '19590101', 'Creation date');
+is($meta->{D_creation_date}, '19590101', 'Creation date');
+is($meta->{S_availability}, 'ACA-NC-LC', 'License');
ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Statement');
+ok(!$meta->{A_file_edition_statement}, 'File Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Statement');
-is($meta->{reference} . "\n", <<'REF', 'Reference');
+is($meta->{A_reference} . "\n", <<'REF', 'Reference');
Neues Deutschland, [Tageszeitung], 01.01.1959, Jg. 14, Berliner Ausgabe, S. 1. - Sachgebiet: Politik, Originalressort: POLITIK; Unser gemeinsames Werk wird siegreich sein
REF
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-is($meta->{corpus_title}, 'Bonner Zeitungskorpus', 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus sub title');
-ok(!$meta->{corpus_author}, 'Correct Corpus author');
-ok(!$meta->{corpus_editor}, 'Correct Corpus editor');
+is($meta->{T_corpus_title}, 'Bonner Zeitungskorpus', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus sub title');
+ok(!$meta->{T_corpus_author}, 'Correct Corpus author');
+ok(!$meta->{A_corpus_editor}, 'Correct Corpus editor');
-is($meta->{doc_title}, 'Neues Deutschland', 'Correct Doc title');
-is($meta->{doc_sub_title}, 'Organ des Zentralkomitees der Sozialistischen Einheitspartei Deutschlands', 'Correct Doc sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct doc editor');
+is($meta->{T_doc_title}, 'Neues Deutschland', 'Correct Doc title');
+is($meta->{T_doc_sub_title}, 'Organ des Zentralkomitees der Sozialistischen Einheitspartei Deutschlands', 'Correct Doc sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct doc editor');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
@@ -110,7 +110,7 @@
ok(!exists $output->{editor}, 'Publisher');
is($output->{pubPlace}, 'Berlin', 'PubPlace');
-ok(!exists $output->{publisher}, 'Publisher');
+ok(!exists $output->{A_publisher}, 'Publisher');
is($output->{textType}, 'Zeitung: Tageszeitung', 'Correct Text Type');
ok(!exists $output->{textTypeArt}, 'Correct Text Type Art');
diff --git a/t/real/bzk_2.t b/t/real/bzk_2.t
index 4597541..3e39924 100644
--- a/t/real/bzk_2.t
+++ b/t/real/bzk_2.t
@@ -26,46 +26,46 @@
is($doc->corpus_sigle, 'BZK', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, 'Saragat-Partei zerfällt', 'Title');
-ok(!$meta->{sub_title}, 'No SubTitle');
-ok(!$meta->{author}, 'Author');
-ok(!$meta->{editor}, 'Editor');
-is($meta->{pub_place}, 'Berlin', 'PubPlace');
-is($meta->{pub_date}, '19590219', 'PubDate');
-ok(!$meta->{publisher}, 'Publisher');
+is($meta->{T_title}, 'Saragat-Partei zerfällt', 'Title');
+ok(!$meta->{T_sub_title}, 'No SubTitle');
+ok(!$meta->{T_author}, 'Author');
+ok(!$meta->{A_editor}, 'Editor');
+is($meta->{S_pub_place}, 'Berlin', 'PubPlace');
+is($meta->{D_pub_date}, '19590219', 'PubDate');
+ok(!$meta->{A_publisher}, 'Publisher');
-is($meta->{text_type}, 'Zeitung: Tageszeitung', 'Correct Text Type');
+is($meta->{S_text_type}, 'Zeitung: Tageszeitung', 'Correct Text Type');
-ok(!$meta->{text_type_art}, 'Correct Text Type Art');
-is($meta->{text_type_ref}, 'Tageszeitung', 'Correct Text Type Ref');
-is($meta->{text_domain}, 'Politik', 'Correct Text Domain');
-is($meta->{text_column}, 'POLITIK', 'Correct Text Column');
-is($meta->{text_class}->[0], 'politik', 'Correct Text Class');
-is($meta->{text_class}->[1], 'ausland', 'Correct Text Class');
-ok(!$meta->{text_class}->[2], 'Correct Text Class');
+ok(!$meta->{S_text_type_art}, 'Correct Text Type Art');
+is($meta->{S_text_type_ref}, 'Tageszeitung', 'Correct Text Type Ref');
+is($meta->{S_text_domain}, 'Politik', 'Correct Text Domain');
+is($meta->{S_text_column}, 'POLITIK', 'Correct Text Column');
+is($meta->{K_text_class}->[0], 'politik', 'Correct Text Class');
+is($meta->{K_text_class}->[1], 'ausland', 'Correct Text Class');
+ok(!$meta->{K_text_class}->[2], 'Correct Text Class');
-is($meta->{creation_date}, '19590219', 'Creation date');
-is($meta->{availability}, 'ACA-NC-LC', 'License');
+is($meta->{D_creation_date}, '19590219', 'Creation date');
+is($meta->{S_availability}, 'ACA-NC-LC', 'License');
ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Statement');
+ok(!$meta->{A_file_edition_statement}, 'File Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Statement');
-is($meta->{reference} . "\n", <<'REF', 'Reference');
+is($meta->{A_reference} . "\n", <<'REF', 'Reference');
Neues Deutschland, [Tageszeitung], 19.02.1959, Jg. 14, Berliner Ausgabe, S. 7. - Sachgebiet: Politik, Originalressort: POLITIK; Saragat-Partei zerfällt
REF
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-is($meta->{corpus_title}, 'Bonner Zeitungskorpus', 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus sub title');
-ok(!$meta->{corpus_author}, 'Correct Corpus author');
-ok(!$meta->{corpus_editor}, 'Correct Corpus editor');
+is($meta->{T_corpus_title}, 'Bonner Zeitungskorpus', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus sub title');
+ok(!$meta->{T_corpus_author}, 'Correct Corpus author');
+ok(!$meta->{A_corpus_editor}, 'Correct Corpus editor');
-is($meta->{doc_title}, 'Neues Deutschland', 'Correct Doc title');
-is($meta->{doc_sub_title}, 'Organ des Zentralkomitees der Sozialistischen Einheitspartei Deutschlands', 'Correct Doc sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct doc editor');
+is($meta->{T_doc_title}, 'Neues Deutschland', 'Correct Doc title');
+is($meta->{T_doc_sub_title}, 'Organ des Zentralkomitees der Sozialistischen Einheitspartei Deutschlands', 'Correct Doc sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct doc editor');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
diff --git a/t/real/drukola.t b/t/real/drukola.t
index 084ccef..9a92444 100644
--- a/t/real/drukola.t
+++ b/t/real/drukola.t
@@ -30,38 +30,38 @@
is($doc->corpus_sigle, 'BBU', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, 'Schimbă vorba', 'Title');
-is($meta->{pub_place}, 'URL:http://www.bucurenci.ro', 'PubPlace');
-is($meta->{pub_date}, '20131005', 'Creation Date');
-ok(!$meta->{sub_title}, 'SubTitle');
-is($meta->{author}, 'DragoÈ™ Bucurenci', 'Author');
+is($meta->{T_title}, 'Schimbă vorba', 'Title');
+is($meta->{S_pub_place}, 'URL:http://www.bucurenci.ro', 'PubPlace');
+is($meta->{D_pub_date}, '20131005', 'Creation Date');
+ok(!$meta->{T_sub_title}, 'SubTitle');
+is($meta->{T_author}, 'DragoÈ™ Bucurenci', 'Author');
-ok(!$meta->{publisher}, 'Publisher');
-ok(!$meta->{editor}, 'Editor');
-is($meta->{translator}, '[TRANSLATOR]', 'Translator');
-#is($meta->{text_type}, 'Autobiographie', 'Correct Text Type');
-ok(!$meta->{text_type_art}, 'Correct Text Type Art');
-# is($meta->{text_type_ref}, '', 'Correct Text Type Ref');
-ok(!$meta->{text_column}, 'Correct Text Column');
-ok(!$meta->{text_domain}, 'Correct Text Domain');
-ok(!$meta->{creation_date}, 'Creation Date');
+ok(!$meta->{A_publisher}, 'Publisher');
+ok(!$meta->{A_editor}, 'Editor');
+is($meta->{A_translator}, '[TRANSLATOR]', 'Translator');
+#is($meta->{S_text_type}, 'Autobiographie', 'Correct Text Type');
+ok(!$meta->{S_text_type_art}, 'Correct Text Type Art');
+# is($meta->{S_text_type_ref}, '', 'Correct Text Type Ref');
+ok(!$meta->{S_text_column}, 'Correct Text Column');
+ok(!$meta->{S_text_domain}, 'Correct Text Domain');
+ok(!$meta->{D_creation_date}, 'Creation Date');
ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Ed Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Ed Statement');
-ok(!$meta->{reference}, 'Reference');
-is($meta->{language}, 'ro', 'Language');
+ok(!$meta->{A_file_edition_statement}, 'File Ed Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Ed Statement');
+ok(!$meta->{A_reference}, 'Reference');
+is($meta->{S_language}, 'ro', 'Language');
-#is($meta->{corpus_title}, 'Goethes Werke', 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus Sub title');
-#is($meta->{corpus_author}, 'Goethe, Johann Wolfgang von', 'Correct Corpus author');
-#is($meta->{corpus_editor}, 'Trunz, Erich', 'Correct Corpus editor');
+#is($meta->{T_corpus_title}, 'Goethes Werke', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
+#is($meta->{T_corpus_author}, 'Goethe, Johann Wolfgang von', 'Correct Corpus author');
+#is($meta->{A_corpus_editor}, 'Trunz, Erich', 'Correct Corpus editor');
-#is($meta->{doc_title}, 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)',
+#is($meta->{T_doc_title}, 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)',
# 'Correct Doc title');
-ok(!$meta->{doc_sub_title}, 'Correct Doc Sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct Doc editor');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc Sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct Doc editor');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
@@ -116,10 +116,10 @@
$meta = $doc->meta;
-ok(!exists $meta->{doc_title}, 'No doc title');
+ok(!exists $meta->{T_doc_title}, 'No doc title');
ok(!exists $meta->{translator}, 'No translator');
-ok(!exists $meta->{text_class}, 'No translator');
+ok(!exists $meta->{K_text_class}, 'No translator');
@@ -131,7 +131,7 @@
is($meta->text_sigle, 'Corola-Journal/-/247_a_537', 'Text Sigle');
is($meta->doc_sigle, 'Corola-Journal/-', 'Doc Sigle');
is($meta->corpus_sigle, 'Corola-Journal', 'Corpus Sigle');
-is($meta->{text_class}->[0], 'Sport', 'Text class');
+is($meta->{K_text_class}->[0], 'Sport', 'Text class');
$path = catdir(dirname(__FILE__), '../corpus/CoRoLa/Corola-Journal/COLEGIUL NATIONAL „OCTAV BANCILA“ - IASI/326_a_562');
@@ -142,7 +142,7 @@
is($meta->text_sigle, 'Corola-Journal/COLEGIUL NATIONAL „OCTAV BANCILA“ - IASI/326_a_562', 'Text Sigle');
is($meta->doc_sigle, 'Corola-Journal/COLEGIUL NATIONAL „OCTAV BANCILA“ - IASI', 'Doc Sigle');
is($meta->corpus_sigle, 'Corola-Journal', 'Corpus Sigle');
-is($meta->{title}, 'APOGEUL ARHITECTURĂ ȘI DESIGN', 'Title');
+is($meta->{T_title}, 'APOGEUL ARHITECTURĂ ȘI DESIGN', 'Title');
done_testing;
__END__
diff --git a/t/real/goethe-2.t b/t/real/goethe-2.t
index 85484eb..ff3cba4 100644
--- a/t/real/goethe-2.t
+++ b/t/real/goethe-2.t
@@ -29,40 +29,40 @@
is($doc->corpus_sigle, 'GOE', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, 'Autobiographische Einzelheiten', 'Title');
-is($meta->{pub_place}, 'München', 'PubPlace');
-is($meta->{pub_date}, '19820000', 'Creation Date');
-ok(!$meta->{sub_title}, 'SubTitle');
-is($meta->{author}, 'Goethe, Johann Wolfgang von', 'Author');
+is($meta->{T_title}, 'Autobiographische Einzelheiten', 'Title');
+is($meta->{S_pub_place}, 'München', 'PubPlace');
+is($meta->{D_pub_date}, '19820000', 'Creation Date');
+ok(!$meta->{T_sub_title}, 'SubTitle');
+is($meta->{T_author}, 'Goethe, Johann Wolfgang von', 'Author');
-is($meta->{publisher}, 'Verlag C. H. Beck', 'Publisher');
-ok(!$meta->{editor}, 'Publisher');
-is($meta->{text_type}, 'Autobiographie', 'Correct Text Type');
-ok(!$meta->{text_type_art}, 'Correct Text Type Art');
-ok(!$meta->{text_type_ref}, 'Correct Text Type Ref');
-ok(!$meta->{text_column}, 'Correct Text Column');
-ok(!$meta->{text_domain}, 'Correct Text Domain');
-is($meta->{creation_date}, '18200000', 'Creation Date');
-is($meta->{availability}, 'QAO-NC', 'License');
-is($meta->{src_pages}, '529-547', 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Ed Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Ed Statement');
-is($meta->{reference} . "\n", <<'REF', 'Author');
+is($meta->{A_publisher}, 'Verlag C. H. Beck', 'Publisher');
+ok(!$meta->{A_editor}, 'Publisher');
+is($meta->{S_text_type}, 'Autobiographie', 'Correct Text Type');
+ok(!$meta->{S_text_type_art}, 'Correct Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Correct Text Type Ref');
+ok(!$meta->{S_text_column}, 'Correct Text Column');
+ok(!$meta->{S_text_domain}, 'Correct Text Domain');
+is($meta->{D_creation_date}, '18200000', 'Creation Date');
+is($meta->{S_availability}, 'QAO-NC', 'License');
+is($meta->{A_src_pages}, '529-547', 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Ed Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Ed Statement');
+is($meta->{A_reference} . "\n", <<'REF', 'Author');
Goethe, Johann Wolfgang von: Autobiographische Einzelheiten, (Geschrieben bis 1832), In: Goethe, Johann Wolfgang von: Goethes Werke, Bd. 10, Autobiographische Schriften II, Hrsg.: Trunz, Erich. München: Verlag C. H. Beck, 1982, S. 529-547
REF
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-is($meta->{corpus_title}, 'Goethes Werke', 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus Sub title');
-is($meta->{corpus_author}, 'Goethe, Johann Wolfgang von', 'Correct Corpus author');
-is($meta->{corpus_editor}, 'Trunz, Erich', 'Correct Corpus editor');
+is($meta->{T_corpus_title}, 'Goethes Werke', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
+is($meta->{T_corpus_author}, 'Goethe, Johann Wolfgang von', 'Correct Corpus author');
+is($meta->{A_corpus_editor}, 'Trunz, Erich', 'Correct Corpus editor');
-is($meta->{doc_title}, 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)',
+is($meta->{T_doc_title}, 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)',
'Correct Doc title');
-ok(!$meta->{doc_sub_title}, 'Correct Doc Sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct Doc editor');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc Sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct Doc editor');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
diff --git a/t/real/goethe-tagged.t b/t/real/goethe-tagged.t
index 6e0e179..13d9365 100644
--- a/t/real/goethe-tagged.t
+++ b/t/real/goethe-tagged.t
@@ -29,41 +29,41 @@
is($doc->corpus_sigle, 'GOE', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, 'Autobiographische Einzelheiten', 'Title');
+is($meta->{T_title}, 'Autobiographische Einzelheiten', 'Title');
-is($meta->{pub_place}, 'München', 'PubPlace');
-is($meta->{pub_date}, '19820000', 'Creation Date');
-ok(!$meta->{sub_title}, 'SubTitle');
-is($meta->{author}, 'Goethe, Johann Wolfgang von', 'Author');
+is($meta->{S_pub_place}, 'München', 'PubPlace');
+is($meta->{D_pub_date}, '19820000', 'Creation Date');
+ok(!$meta->{T_sub_title}, 'SubTitle');
+is($meta->{T_author}, 'Goethe, Johann Wolfgang von', 'Author');
-is($meta->{publisher}, 'Verlag C. H. Beck', 'Publisher');
-ok(!$meta->{editor}, 'Publisher');
-is($meta->{text_type}, 'Autobiographie', 'Correct Text Type');
-ok(!$meta->{text_type_art}, 'Correct Text Type Art');
-ok(!$meta->{text_type_ref}, 'Correct Text Type Ref');
-ok(!$meta->{text_column}, 'Correct Text Column');
-ok(!$meta->{text_domain}, 'Correct Text Domain');
-is($meta->{creation_date}, '18200000', 'Creation Date');
+is($meta->{A_publisher}, 'Verlag C. H. Beck', 'Publisher');
+ok(!$meta->{A_editor}, 'Publisher');
+is($meta->{S_text_type}, 'Autobiographie', 'Correct Text Type');
+ok(!$meta->{S_text_type_art}, 'Correct Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Correct Text Type Ref');
+ok(!$meta->{S_text_column}, 'Correct Text Column');
+ok(!$meta->{S_text_domain}, 'Correct Text Domain');
+is($meta->{D_creation_date}, '18200000', 'Creation Date');
-is($meta->{src_pages}, '529-547', 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Ed Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Ed Statement');
-is($meta->{reference} . "\n", <<'REF', 'Author');
+is($meta->{A_src_pages}, '529-547', 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Ed Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Ed Statement');
+is($meta->{A_reference} . "\n", <<'REF', 'Author');
Goethe, Johann Wolfgang von: Autobiographische Einzelheiten, (Geschrieben bis 1832), In: Goethe, Johann Wolfgang von: Goethes Werke, Bd. 10, Autobiographische Schriften II, Hrsg.: Trunz, Erich. München: Verlag C. H. Beck, 1982, S. 529-547
REF
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-is($meta->{corpus_title}, 'Goethes Werke', 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus Sub title');
-is($meta->{corpus_author}, 'Goethe, Johann Wolfgang von', 'Correct Corpus author');
-is($meta->{corpus_editor}, 'Trunz, Erich', 'Correct Corpus editor');
+is($meta->{T_corpus_title}, 'Goethes Werke', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
+is($meta->{T_corpus_author}, 'Goethe, Johann Wolfgang von', 'Correct Corpus author');
+is($meta->{A_corpus_editor}, 'Trunz, Erich', 'Correct Corpus editor');
-is($meta->{doc_title}, 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)',
+is($meta->{T_doc_title}, 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)',
'Correct Doc title');
-ok(!$meta->{doc_sub_title}, 'Correct Doc Sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct Doc editor');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc Sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct Doc editor');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
diff --git a/t/real/goethe.t b/t/real/goethe.t
index d63849b..5bc5eb2 100644
--- a/t/real/goethe.t
+++ b/t/real/goethe.t
@@ -28,40 +28,40 @@
is($doc->corpus_sigle, 'GOE', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, 'Autobiographische Einzelheiten', 'Title');
-is($meta->{pub_place}, 'München', 'PubPlace');
-is($meta->{pub_date}, '19820000', 'Creation Date');
-ok(!$meta->{sub_title}, 'SubTitle');
-is($meta->{author}, 'Goethe, Johann Wolfgang von', 'Author');
+is($meta->{T_title}, 'Autobiographische Einzelheiten', 'Title');
+is($meta->{S_pub_place}, 'München', 'PubPlace');
+is($meta->{D_pub_date}, '19820000', 'Creation Date');
+ok(!$meta->{T_sub_title}, 'SubTitle');
+is($meta->{T_author}, 'Goethe, Johann Wolfgang von', 'Author');
-is($meta->{publisher}, 'Verlag C. H. Beck', 'Publisher');
-ok(!$meta->{editor}, 'Publisher');
-is($meta->{text_type}, 'Autobiographie', 'Correct Text Type');
-ok(!$meta->{text_type_art}, 'Correct Text Type Art');
-ok(!$meta->{text_type_ref}, 'Correct Text Type Ref');
-ok(!$meta->{text_column}, 'Correct Text Column');
-ok(!$meta->{text_domain}, 'Correct Text Domain');
-is($meta->{creation_date}, '18200000', 'Creation Date');
-is($meta->{availability}, 'QAO-NC', 'License');
-is($meta->{src_pages}, '529-547', 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Ed Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Ed Statement');
-is($meta->{reference} . "\n", <<'REF', 'Author');
+is($meta->{A_publisher}, 'Verlag C. H. Beck', 'Publisher');
+ok(!$meta->{A_editor}, 'Publisher');
+is($meta->{S_text_type}, 'Autobiographie', 'Correct Text Type');
+ok(!$meta->{S_text_type_art}, 'Correct Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Correct Text Type Ref');
+ok(!$meta->{S_text_column}, 'Correct Text Column');
+ok(!$meta->{S_text_domain}, 'Correct Text Domain');
+is($meta->{D_creation_date}, '18200000', 'Creation Date');
+is($meta->{S_availability}, 'QAO-NC', 'License');
+is($meta->{A_src_pages}, '529-547', 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Ed Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Ed Statement');
+is($meta->{A_reference} . "\n", <<'REF', 'Author');
Goethe, Johann Wolfgang von: Autobiographische Einzelheiten, (Geschrieben bis 1832), In: Goethe, Johann Wolfgang von: Goethes Werke, Bd. 10, Autobiographische Schriften II, Hrsg.: Trunz, Erich. München: Verlag C. H. Beck, 1982, S. 529-547
REF
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-is($meta->{corpus_title}, 'Goethes Werke', 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus Sub title');
-is($meta->{corpus_author}, 'Goethe, Johann Wolfgang von', 'Correct Corpus author');
-is($meta->{corpus_editor}, 'Trunz, Erich', 'Correct Corpus editor');
+is($meta->{T_corpus_title}, 'Goethes Werke', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
+is($meta->{T_corpus_author}, 'Goethe, Johann Wolfgang von', 'Correct Corpus author');
+is($meta->{A_corpus_editor}, 'Trunz, Erich', 'Correct Corpus editor');
-is($meta->{doc_title}, 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)',
+is($meta->{T_doc_title}, 'Goethe: Autobiographische Schriften II, (1817-1825, 1832)',
'Correct Doc title');
-ok(!$meta->{doc_sub_title}, 'Correct Doc Sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct Doc editor');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc Sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct Doc editor');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
@@ -100,7 +100,7 @@
ok(!exists $output->{subTitle}, 'subTitle');
is($output->{publisher}, 'Verlag C. H. Beck', 'Publisher');
-ok(!exists $output->{editor}, 'Editor');
+ok(!exists $output->{A_editor}, 'Editor');
is($output->{textType}, 'Autobiographie', 'Correct Text Type');
ok(!exists $output->{textTypeArt}, 'Correct Text Type');
ok(!exists $output->{textTypeRef}, 'Correct Text Type');
diff --git a/t/real/hnc.t b/t/real/hnc.t
index cde02d9..df2a162 100644
--- a/t/real/hnc.t
+++ b/t/real/hnc.t
@@ -29,43 +29,43 @@
is($doc->corpus_sigle, 'HNC', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, 'GNU Free Documentation License', 'Title');
-is($meta->{pub_place}, 'H_PUBPLACE', 'PubPlace');
+is($meta->{T_title}, 'GNU Free Documentation License', 'Title');
+is($meta->{S_pub_place}, 'H_PUBPLACE', 'PubPlace');
# Defined on document level as
# idsHeader > fileDesc > publicationStmt > pubDate == 2005/08/16
# idsHeader > fileDesc > biblFull > publicationStmt > pubDate == 2003/07/08-2014/05/03
# idsHeader > fileDesc > biblFull > publicationStmt > sourceDesc > biblStruct > monogr > imprint > pubDate == 2003/07/08-2014/05/03
-# is($meta->{pub_date}, '20030708', 'Publication date');
-ok(!$meta->{sub_title}, 'SubTitle');
-is($meta->{author}, 'Addbot', 'Author');
+# is($meta->{D_pub_date}, '20030708', 'Publication date');
+ok(!$meta->{T_sub_title}, 'SubTitle');
+is($meta->{T_author}, 'Addbot', 'Author');
-is($meta->{publisher}, 'H_PUBLISHER', 'Publisher');
-ok(!$meta->{editor}, 'Editor');
+is($meta->{A_publisher}, 'H_PUBLISHER', 'Publisher');
+ok(!$meta->{A_editor}, 'Editor');
ok(!$meta->{translator}, 'Translator');
-ok(!$meta->{text_type}, 'Correct Text Type');
-ok(!$meta->{text_type_art}, 'Correct Text Type Art');
-ok(!$meta->{text_type_ref}, 'Correct Text Type Ref');
-ok(!$meta->{text_column}, 'Correct Text Column');
-ok(!$meta->{text_domain}, 'Correct Text Domain');
-is($meta->{creation_date}, '20130302', 'Creation Date');
+ok(!$meta->{S_text_type}, 'Correct Text Type');
+ok(!$meta->{S_text_type_art}, 'Correct Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Correct Text Type Ref');
+ok(!$meta->{S_text_column}, 'Correct Text Column');
+ok(!$meta->{S_text_domain}, 'Correct Text Domain');
+is($meta->{D_creation_date}, '20130302', 'Creation Date');
ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Ed Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Ed Statement');
-ok(!$meta->{reference}, 'Reference');
-is($meta->{language}, 'hu', 'Language');
+ok(!$meta->{A_file_edition_statement}, 'File Ed Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Ed Statement');
+ok(!$meta->{A_reference}, 'Reference');
+is($meta->{S_language}, 'hu', 'Language');
-ok(!$meta->{corpus_title}, 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus Sub title');
-ok(!$meta->{corpus_author}, 'Correct Corpus author');
-ok(!$meta->{corpus_editor}, 'Correct Corpus editor');
+ok(!$meta->{T_corpus_title}, 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
+ok(!$meta->{T_corpus_author}, 'Correct Corpus author');
+ok(!$meta->{A_corpus_editor}, 'Correct Corpus editor');
-is($meta->{doc_title}, 'MNSZ hivatalos korpusz: Wikipédia cikkek', 'Correct Doc title');
-ok(!$meta->{doc_sub_title}, 'Correct Doc Sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct Doc editor');
+is($meta->{T_doc_title}, 'MNSZ hivatalos korpusz: Wikipédia cikkek', 'Correct Doc title');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc Sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct Doc editor');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
diff --git a/t/real/lwc.t b/t/real/lwc.t
index d69e3d2..1c104a5 100644
--- a/t/real/lwc.t
+++ b/t/real/lwc.t
@@ -30,37 +30,37 @@
is($doc->corpus_sigle, 'WPD17', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, '0er', 'Title');
-is($meta->{pub_place}, 'URL:http://de.wikipedia.org', 'PubPlace');
-is($meta->{pub_date}, '20170701', 'Creation Date');
-ok(!$meta->{sub_title}, 'SubTitle');
-is($meta->{author}, 'Rogi.Official, u.a.', 'Author');
+is($meta->{T_title}, '0er', 'Title');
+is($meta->{S_pub_place}, 'URL:http://de.wikipedia.org', 'PubPlace');
+is($meta->{D_pub_date}, '20170701', 'Creation Date');
+ok(!$meta->{T_sub_title}, 'SubTitle');
+is($meta->{T_author}, 'Rogi.Official, u.a.', 'Author');
-is($meta->{publisher}, 'Wikipedia', 'Publisher');
-is($meta->{editor},'wikipedia.org', 'Editor');
+is($meta->{A_publisher}, 'Wikipedia', 'Publisher');
+is($meta->{A_editor},'wikipedia.org', 'Editor');
ok(!$meta->{translator}, 'Translator');
-is($meta->{text_type}, 'Enzyklopädie', 'Correct Text Type');
-is($meta->{text_type_art}, 'Enzyklopädie-Artikel', 'Correct Text Type Art');
-ok(!$meta->{text_type_ref}, 'Correct Text Type Ref');
-ok(!$meta->{text_column}, 'Correct Text Column');
-ok(!$meta->{text_domain}, 'Correct Text Domain');
-is($meta->{creation_date},'20150511', 'Creation Date');
+is($meta->{S_text_type}, 'Enzyklopädie', 'Correct Text Type');
+is($meta->{S_text_type_art}, 'Enzyklopädie-Artikel', 'Correct Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Correct Text Type Ref');
+ok(!$meta->{S_text_column}, 'Correct Text Column');
+ok(!$meta->{S_text_domain}, 'Correct Text Domain');
+is($meta->{D_creation_date},'20150511', 'Creation Date');
ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Ed Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Ed Statement');
-is($meta->{reference}, '0er, In: Wikipedia - URL:http://de.wikipedia.org/wiki/0er: Wikipedia, 2017', 'Reference');
-is($meta->{language}, 'de', 'Language');
+ok(!$meta->{A_file_edition_statement}, 'File Ed Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Ed Statement');
+is($meta->{A_reference}, '0er, In: Wikipedia - URL:http://de.wikipedia.org/wiki/0er: Wikipedia, 2017', 'Reference');
+is($meta->{S_language}, 'de', 'Language');
-is($meta->{corpus_title}, 'Wikipedia', 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus Sub title');
-ok(!$meta->{corpus_author}, 'Correct Corpus author');
-is($meta->{corpus_editor}, 'wikipedia.org', 'Correct Corpus editor');
+is($meta->{T_corpus_title}, 'Wikipedia', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
+ok(!$meta->{T_corpus_author}, 'Correct Corpus author');
+is($meta->{A_corpus_editor}, 'wikipedia.org', 'Correct Corpus editor');
-is($meta->{doc_title}, 'Wikipedia, Artikel mit Anfangszahl 0, Teil 00', 'Correct Doc title');
-ok(!$meta->{doc_sub_title}, 'Correct Doc Sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct Doc editor');
+is($meta->{T_doc_title}, 'Wikipedia, Artikel mit Anfangszahl 0, Teil 00', 'Correct Doc title');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc Sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct Doc editor');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
@@ -112,11 +112,11 @@
$meta = $doc->meta;
-is($meta->{doc_title}, 'Wikipedia, Artikel mit Anfangszahl 0, Teil 60', 'No doc title');
+is($meta->{T_doc_title}, 'Wikipedia, Artikel mit Anfangszahl 0, Teil 60', 'No doc title');
ok(!exists $meta->{translator}, 'No translator');
-is($meta->{text_class}->[0], 'staat-gesellschaft', 'text class');
-is($meta->{text_class}->[1], 'verbrechen', 'text class');
+is($meta->{K_text_class}->[0], 'staat-gesellschaft', 'text class');
+is($meta->{K_text_class}->[1], 'verbrechen', 'text class');
diff --git a/t/real/rei.t b/t/real/rei.t
index 0d4d62c..b375c24 100644
--- a/t/real/rei.t
+++ b/t/real/rei.t
@@ -31,45 +31,45 @@
is($doc->corpus_sigle, 'REI', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, 'Friedensgutachten der führenden Friedensforschungsinstitute', 'Title');
-is($meta->{sub_title}, 'Rede im Deutschen Bundestag am 14.06.2002', 'SubTitle');
-is($meta->{author}, 'Nachtwei, Winfried', 'Author');
-ok(!$meta->{editor}, 'Editor');
-is($meta->{pub_place}, 'Berlin', 'PubPlace');
-ok(!$meta->{publisher}, 'Publisher');
+is($meta->{T_title}, 'Friedensgutachten der führenden Friedensforschungsinstitute', 'Title');
+is($meta->{T_sub_title}, 'Rede im Deutschen Bundestag am 14.06.2002', 'SubTitle');
+is($meta->{T_author}, 'Nachtwei, Winfried', 'Author');
+ok(!$meta->{A_editor}, 'Editor');
+is($meta->{S_pub_place}, 'Berlin', 'PubPlace');
+ok(!$meta->{A_publisher}, 'Publisher');
-ok(!$meta->{text_type}, 'No Text Type');
-ok(!$meta->{text_type_art}, 'No Text Type Art');
-ok(!$meta->{text_type_ref}, 'No Text Type Ref');
-ok(!$meta->{text_domain}, 'No Text Domain');
-ok(!$meta->{text_column}, 'No Text Column');
+ok(!$meta->{S_text_type}, 'No Text Type');
+ok(!$meta->{S_text_type_art}, 'No Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'No Text Type Ref');
+ok(!$meta->{S_text_domain}, 'No Text Domain');
+ok(!$meta->{S_text_column}, 'No Text Column');
-is($meta->{text_class}->[0], 'politik', 'Correct Text Class');
-is($meta->{text_class}->[1], 'inland', 'Correct Text Class');
-ok(!$meta->{text_class}->[2], 'Correct Text Class');
+is($meta->{K_text_class}->[0], 'politik', 'Correct Text Class');
+is($meta->{K_text_class}->[1], 'inland', 'Correct Text Class');
+ok(!$meta->{K_text_class}->[2], 'Correct Text Class');
-is($meta->{pub_date}, '20020614', 'Creation date');
-is($meta->{creation_date}, '20020614', 'Creation date');
-is($meta->{availability}, 'CC-BY-SA', 'License');
-ok(!$meta->{pages}, 'Pages');
+is($meta->{D_pub_date}, '20020614', 'Creation date');
+is($meta->{D_creation_date}, '20020614', 'Creation date');
+is($meta->{S_availability}, 'CC-BY-SA', 'License');
+ok(!$meta->{A_pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Statement');
+ok(!$meta->{A_file_edition_statement}, 'File Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Statement');
-is($meta->{reference} . "\n", <<'REF', 'Reference');
+is($meta->{A_reference} . "\n", <<'REF', 'Reference');
Nachtwei, Winfried: Friedensgutachten der führenden Friedensforschungsinstitute. Rede im Deutschen Bundestag am 14.06.2002, Hrsg: Bundestagsfraktion Bündnis 90/DIE GRÜNEN [Ausführliche Zitierung nicht verfügbar]
REF
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-is($meta->{corpus_title}, 'Reden und Interviews', 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus sub title');
-ok(!$meta->{corpus_author}, 'Correct Corpus author');
-ok(!$meta->{corpus_editor}, 'Correct Corpus editor');
+is($meta->{T_corpus_title}, 'Reden und Interviews', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus sub title');
+ok(!$meta->{T_corpus_author}, 'Correct Corpus author');
+ok(!$meta->{A_corpus_editor}, 'Correct Corpus editor');
-is($meta->{doc_title}, 'Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)', 'Correct Doc title');
-ok(!$meta->{doc_sub_title}, 'Correct Doc sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct doc editor');
+is($meta->{T_doc_title}, 'Reden der Bundestagsfraktion Bündnis 90/DIE GRÜNEN, (2002-2006)', 'Correct Doc title');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct doc editor');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
diff --git a/t/real/wdd.t b/t/real/wdd.t
index 8daf74f..2327677 100644
--- a/t/real/wdd.t
+++ b/t/real/wdd.t
@@ -27,36 +27,36 @@
is($doc->corpus_sigle, 'WDD11', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, 'Diskussion:Gunter A. Pilz', 'Title');
-ok(!$meta->{sub_title}, 'No SubTitle');
-is($meta->{author}, '€pa, u.a.', 'Author');
-is($meta->{editor}, 'wikipedia.org', 'Editor');
+is($meta->{T_title}, 'Diskussion:Gunter A. Pilz', 'Title');
+ok(!$meta->{T_sub_title}, 'No SubTitle');
+is($meta->{T_author}, '€pa, u.a.', 'Author');
+is($meta->{A_editor}, 'wikipedia.org', 'Editor');
-is($meta->{pub_place}, 'URL:http://de.wikipedia.org', 'PubPlace');
-is($meta->{publisher}, 'Wikipedia', 'Publisher');
-is($meta->{text_type}, 'Diskussionen zu Enzyklopädie-Artikeln', 'Correct Text Type');
-ok(!$meta->{text_type_art}, 'Correct Text Type Art');
-ok(!$meta->{text_type_ref}, 'Correct Text Type Ref');
-ok(!$meta->{text_domain}, 'Correct Text Domain');
-is($meta->{creation_date}, '20070707', 'Creation date');
-is($meta->{availability}, 'CC-BY-SA', 'License');
+is($meta->{S_pub_place}, 'URL:http://de.wikipedia.org', 'PubPlace');
+is($meta->{A_publisher}, 'Wikipedia', 'Publisher');
+is($meta->{S_text_type}, 'Diskussionen zu Enzyklopädie-Artikeln', 'Correct Text Type');
+ok(!$meta->{S_text_type_art}, 'Correct Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Correct Text Type Ref');
+ok(!$meta->{S_text_domain}, 'Correct Text Domain');
+is($meta->{D_creation_date}, '20070707', 'Creation date');
+is($meta->{S_availability}, 'CC-BY-SA', 'License');
ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Statement');
-is($meta->{reference} . "\n", <<'REF', 'Reference');
+ok(!$meta->{A_file_edition_statement}, 'File Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Statement');
+is($meta->{A_reference} . "\n", <<'REF', 'Reference');
Diskussion:Gunter A. Pilz, In: Wikipedia - URL:http://de.wikipedia.org/wiki/Diskussion:Gunter_A._Pilz: Wikipedia, 2007
REF
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-is($meta->{corpus_title}, 'Wikipedia', 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus sub title');
-ok(!$meta->{corpus_author}, 'Correct Corpus author');
-is($meta->{corpus_editor}, 'wikipedia.org', 'Correct Corpus editor');
+is($meta->{T_corpus_title}, 'Wikipedia', 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus sub title');
+ok(!$meta->{T_corpus_author}, 'Correct Corpus author');
+is($meta->{A_corpus_editor}, 'wikipedia.org', 'Correct Corpus editor');
-is($meta->{doc_title}, 'Wikipedia, Diskussionen zu Artikeln mit Anfangsbuchstabe G, Teil 27', 'Correct Doc title');
-ok(!$meta->{doc_sub_title}, 'Correct Doc sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct doc editor');
+is($meta->{T_doc_title}, 'Wikipedia, Diskussionen zu Artikeln mit Anfangsbuchstabe G, Teil 27', 'Correct Doc title');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct doc editor');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
diff --git a/t/real/wpd.t b/t/real/wpd.t
index cfadc87..344c636 100644
--- a/t/real/wpd.t
+++ b/t/real/wpd.t
@@ -26,22 +26,22 @@
is($doc->corpus_sigle, 'WPD', 'Correct corpus sigle');
my $meta = $doc->meta;
-is($meta->{title}, 'A', 'Title');
-is($meta->{pub_place}, 'URL:http://de.wikipedia.org', 'PubPlace');
-is($meta->{pub_date}, '20050328', 'Creation Date');
+is($meta->{T_title}, 'A', 'Title');
+is($meta->{S_pub_place}, 'URL:http://de.wikipedia.org', 'PubPlace');
+is($meta->{D_pub_date}, '20050328', 'Creation Date');
SKIP: {
skip 'Failure because corpus is no longer supported', 1;
- ok(!$meta->{sub_title}, 'SubTitle');
+ ok(!$meta->{T_sub_title}, 'SubTitle');
};
-is($meta->{author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'Author');
+is($meta->{T_author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'Author');
-ok(!$meta->{doc_title}, 'Correct Doc title');
-ok(!$meta->{doc_sub_title}, 'Correct Doc Sub title');
-ok(!$meta->{doc_author}, 'Correct Doc author');
-ok(!$meta->{doc_editor}, 'Correct Doc editor');
+ok(!$meta->{T_doc_title}, 'Correct Doc title');
+ok(!$meta->{T_doc_sub_title}, 'Correct Doc Sub title');
+ok(!$meta->{T_doc_author}, 'Correct Doc author');
+ok(!$meta->{A_doc_editor}, 'Correct Doc editor');
-ok(!$meta->{corpus_title}, 'Correct Corpus title');
-ok(!$meta->{corpus_sub_title}, 'Correct Corpus Sub title');
+ok(!$meta->{T_corpus_title}, 'Correct Corpus title');
+ok(!$meta->{T_corpus_sub_title}, 'Correct Corpus Sub title');
# Tokenization
use_ok('KorAP::XML::Tokenizer');
@@ -116,7 +116,7 @@
is($output->{data}->{foundries}, 'corenlp corenlp/constituency', 'Foundries');
is($output->{data}->{layerInfos}, 'corenlp/c=spans', 'layerInfos');
-is($doc->meta->{editor}, 'wikipedia.org', 'Editor');
+is($doc->meta->{A_editor}, 'wikipedia.org', 'Editor');
# Check offset problem
diff --git a/t/sgbr/meta.t b/t/sgbr/meta.t
index 20b5ad8..aa0fc10 100644
--- a/t/sgbr/meta.t
+++ b/t/sgbr/meta.t
@@ -26,46 +26,46 @@
my $meta = $doc->meta;
-is($meta->{title}, 'Sommerüberraschung', 'title');
-is($meta->{author}, 'TEST.BSP.Autoren.1', 'Author');
-is($meta->{'sgbr_author_age_class'}, 'X', 'AgeClass');
+is($meta->{T_title}, 'Sommerüberraschung', 'title');
+is($meta->{T_author}, 'TEST.BSP.Autoren.1', 'Author');
+is($meta->{'S_sgbr_author_age_class'}, 'X', 'AgeClass');
-is($meta->{'sgbr_author_sex'}, 'M', 'Sex');
-is($meta->{'sgbr_kodex'}, 'M', 'Kodex');
+is($meta->{'S_sgbr_author_sex'}, 'M', 'Sex');
+is($meta->{'S_sgbr_kodex'}, 'M', 'Kodex');
-is($meta->{doc_title}, 'Beispielkorpus', 'Doc: title');
-is($meta->{doc_sub_title}, 'Subkorpus Beispieltext', 'Doc: subtitle');
+is($meta->{T_doc_title}, 'Beispielkorpus', 'Doc: title');
+is($meta->{T_doc_sub_title}, 'Subkorpus Beispieltext', 'Doc: subtitle');
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-ok(!$meta->{publisher}, 'Publisher');
-ok(!$meta->{editor}, 'Editor');
-ok(!$meta->{text_type}, 'Text Type');
-ok(!$meta->{text_type_art}, 'Text Type Art');
-ok(!$meta->{text_type_ref}, 'Text Type Ref');
-ok(!$meta->{text_column}, 'Text Column');
-ok(!$meta->{text_domain}, 'Text Domain');
-ok(!$meta->{creation_date}, 'Creation Date');
+ok(!$meta->{A_publisher}, 'Publisher');
+ok(!$meta->{A_editor}, 'Editor');
+ok(!$meta->{S_text_type}, 'Text Type');
+ok(!$meta->{S_text_type_art}, 'Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Text Type Ref');
+ok(!$meta->{S_text_column}, 'Text Column');
+ok(!$meta->{S_text_domain}, 'Text Domain');
+ok(!$meta->{D_creation_date}, 'Creation Date');
ok(!$meta->{license}, 'License');
ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Edition Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Edition Statement');
-ok(!$meta->{reference}, 'Reference');
+ok(!$meta->{A_file_edition_statement}, 'File Edition Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Edition Statement');
+ok(!$meta->{A_reference}, 'Reference');
-ok(!$meta->{doc_editor}, 'Doc: editor');
-ok(!$meta->{doc_author}, 'Doc: author');
+ok(!$meta->{A_doc_editor}, 'Doc: editor');
+ok(!$meta->{T_doc_author}, 'Doc: author');
-ok(!$meta->{corpus_title}, 'Corpus: title');
-ok(!$meta->{corpus_sub_title}, 'Corpus: subtitle');
-ok(!$meta->{corpus_editor}, 'Corpus: editor');
-ok(!$meta->{corpus_author}, 'Corpus: author');
+ok(!$meta->{T_corpus_title}, 'Corpus: title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus: subtitle');
+ok(!$meta->{A_corpus_editor}, 'Corpus: editor');
+ok(!$meta->{T_corpus_author}, 'Corpus: author');
my $hash = $doc->to_hash;
is($hash->{title}, 'Sommerüberraschung', 'Corpus title');
is($hash->{sgbrAuthorSex}, 'M', 'additional');
# Sgbr specific keywords
-is($meta->keywords('keywords'), 'sgbrAuthorAgeClass:X sgbrAuthorSex:M sgbrKodex:M');
+is($meta->keywords('K_keywords'), 'sgbrAuthorAgeClass:X sgbrAuthorSex:M sgbrKodex:M');
done_testing;
diff --git a/t/sgbr/meta_duden.t b/t/sgbr/meta_duden.t
index 14f9746..ca3f3f1 100644
--- a/t/sgbr/meta_duden.t
+++ b/t/sgbr/meta_duden.t
@@ -25,49 +25,49 @@
is($doc->corpus_sigle, 'PRO-DUD', 'ID-corpus');
my $meta = $doc->meta;
-is($meta->{title}, 'Nur Platt, kein Deutsch', 'title');
-ok(!$meta->{sub_title}, 'no subtitle');
+is($meta->{T_title}, 'Nur Platt, kein Deutsch', 'title');
+ok(!$meta->{T_sub_title}, 'no subtitle');
-is($meta->{publisher}, 'Dorfblatt GmbH', 'Publisher');
-is($meta->{pub_date}, '20130126');
-is($meta->{sgbr_date}, '2013-01-26');
-is($meta->{pub_place}, 'Stadtingen');
+is($meta->{A_publisher}, 'Dorfblatt GmbH', 'Publisher');
+is($meta->{D_pub_date}, '20130126');
+is($meta->{D_sgbr_date}, '2013-01-26');
+is($meta->{S_pub_place}, 'Stadtingen');
-is($meta->{doc_title}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
-is($meta->{doc_sub_title}, 'Subkorpus Ortsblatt, Jahrgang 2013, Monat Januar', 'Doc Sub title');
+is($meta->{T_doc_title}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
+is($meta->{T_doc_sub_title}, 'Subkorpus Ortsblatt, Jahrgang 2013, Monat Januar', 'Doc Sub title');
-is($meta->{'funder'}, 'Bundesministerium für Bildung und Forschung', 'Funder');
+is($meta->{'A_funder'}, 'Bundesministerium für Bildung und Forschung', 'Funder');
-is($meta->{author}, 'unbekannt', 'Author');
-ok(!$meta->{'sgbr_author_sex'}, 'No Sex');
-is($meta->{'sgbr_kodex'}, 'T', '');
+is($meta->{T_author}, 'unbekannt', 'Author');
+ok(!$meta->{'S_sgbr_author_sex'}, 'No Sex');
+is($meta->{'S_sgbr_kodex'}, 'T', '');
-is($meta->keywords('keywords'), 'sgbrKodex:T');
+is($meta->keywords('K_keywords'), 'sgbrKodex:T');
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-ok(!$meta->{editor}, 'Editor');
+ok(!$meta->{A_editor}, 'Editor');
-ok(!$meta->{text_type}, 'Text Type');
-ok(!$meta->{text_type_art}, 'Text Type Art');
-ok(!$meta->{text_type_ref}, 'Text Type Ref');
-ok(!$meta->{text_column}, 'Text Column');
-ok(!$meta->{text_domain}, 'Text Domain');
-ok(!$meta->{creation_date}, 'Creation Date');
-ok(!$meta->{license}, 'License');
-ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Edition Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Edition Statement');
-ok(!$meta->{reference}, 'Reference');
+ok(!$meta->{S_text_type}, 'Text Type');
+ok(!$meta->{S_text_type_art}, 'Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Text Type Ref');
+ok(!$meta->{S_text_column}, 'Text Column');
+ok(!$meta->{S_text_domain}, 'Text Domain');
+ok(!$meta->{D_creation_date}, 'Creation Date');
+ok(!$meta->{A_license}, 'License');
+ok(!$meta->{A_pages}, 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Edition Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Edition Statement');
+ok(!$meta->{A_reference}, 'Reference');
-ok(!$meta->{doc_editor}, 'Doc: editor');
-ok(!$meta->{doc_author}, 'Doc: author');
+ok(!$meta->{A_doc_editor}, 'Doc: editor');
+ok(!$meta->{T_doc_author}, 'Doc: author');
-ok(!$meta->{corpus_title}, 'Corpus: title');
-ok(!$meta->{corpus_sub_title}, 'Corpus: subtitle');
-ok(!$meta->{corpus_editor}, 'Corpus: editor');
-ok(!$meta->{corpus_author}, 'Corpus: author');
+ok(!$meta->{T_corpus_title}, 'Corpus: title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus: subtitle');
+ok(!$meta->{A_corpus_editor}, 'Corpus: editor');
+ok(!$meta->{T_corpus_author}, 'Corpus: author');
my $hash = $doc->to_hash;
is($hash->{title}, 'Nur Platt, kein Deutsch', 'Corpus title');
diff --git a/t/sgbr/meta_ids.t b/t/sgbr/meta_ids.t
index 8a4cc4e..c9a83c7 100644
--- a/t/sgbr/meta_ids.t
+++ b/t/sgbr/meta_ids.t
@@ -27,50 +27,50 @@
my $meta = $doc->meta;
-is($meta->{title}, '@ Koelle_am_Rhing 10:18', 'title');
+is($meta->{T_title}, '@ Koelle_am_Rhing 10:18', 'title');
-ok(!$meta->{sub_title}, 'no subtitle');
+ok(!$meta->{T_sub_title}, 'no subtitle');
-is($meta->{publisher}, 'tagesschau.de', 'Publisher');
+is($meta->{A_publisher}, 'tagesschau.de', 'Publisher');
-is($meta->{pub_date}, '20140930');
+is($meta->{D_pub_date}, '20140930');
-ok(!$meta->{pub_place}, 'No pub place');
+ok(!$meta->{S_pub_place}, 'No pub place');
-is($meta->{doc_title}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
-is($meta->{doc_sub_title}, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title');
+is($meta->{T_doc_title}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
+is($meta->{T_doc_sub_title}, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title');
-is($meta->{'funder'}, 'Bundesministerium für Bildung und Forschung', 'Funder');
+is($meta->{'A_funder'}, 'Bundesministerium für Bildung und Forschung', 'Funder');
-is($meta->{author}, 'privat23', 'Author');
-ok(!$meta->{'sgbr_author_sex'}, 'No Sex');
-ok(!$meta->{'sgbr_kodex'}, 'No kodex');
-is($meta->{reference}, 'http://meta.tagesschau.de/node/090285#comment-1732187', 'Publace ref');
+is($meta->{T_author}, 'privat23', 'Author');
+ok(!$meta->{'S_sgbr_author_sex'}, 'No Sex');
+ok(!$meta->{'S_sgbr_kodex'}, 'No kodex');
+is($meta->{A_reference}, 'http://meta.tagesschau.de/node/090285#comment-1732187', 'Publace ref');
-is($meta->keywords('keywords'), '');
+is($meta->keywords('K_keywords'), '');
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-ok(!$meta->{editor}, 'Editor');
+ok(!$meta->{A_editor}, 'Editor');
-ok(!$meta->{text_type}, 'Text Type');
-ok(!$meta->{text_type_art}, 'Text Type Art');
-ok(!$meta->{text_type_ref}, 'Text Type Ref');
-ok(!$meta->{text_column}, 'Text Column');
-ok(!$meta->{text_domain}, 'Text Domain');
-ok(!$meta->{creation_date}, 'Creation Date');
-ok(!$meta->{license}, 'License');
-ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Edition Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Edition Statement');
+ok(!$meta->{S_text_type}, 'Text Type');
+ok(!$meta->{S_text_type_art}, 'Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Text Type Ref');
+ok(!$meta->{S_text_column}, 'Text Column');
+ok(!$meta->{S_text_domain}, 'Text Domain');
+ok(!$meta->{D_creation_date}, 'Creation Date');
+ok(!$meta->{S_license}, 'License');
+ok(!$meta->{A_pages}, 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Edition Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Edition Statement');
-ok(!$meta->{doc_editor}, 'Doc: editor');
-ok(!$meta->{doc_author}, 'Doc: author');
+ok(!$meta->{A_doc_editor}, 'Doc: editor');
+ok(!$meta->{T_doc_author}, 'Doc: author');
-ok(!$meta->{corpus_title}, 'Corpus: title');
-ok(!$meta->{corpus_sub_title}, 'Corpus: subtitle');
-ok(!$meta->{corpus_editor}, 'Corpus: editor');
-ok(!$meta->{corpus_author}, 'Corpus: author');
+ok(!$meta->{T_corpus_title}, 'Corpus: title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus: subtitle');
+ok(!$meta->{A_corpus_editor}, 'Corpus: editor');
+ok(!$meta->{T_corpus_author}, 'Corpus: author');
my $hash = $doc->to_hash;
is($hash->{title}, '@ Koelle_am_Rhing 10:18', 'Corpus title');
@@ -95,51 +95,51 @@
$meta = $doc->meta;
-is($meta->{title}, '@fitnessfrosch', 'title');
+is($meta->{T_title}, '@fitnessfrosch', 'title');
-ok(!$meta->{sub_title}, 'no subtitle');
+ok(!$meta->{T_sub_title}, 'no subtitle');
-is($meta->{publisher}, 'tagesschau.de', 'Publisher');
+is($meta->{A_publisher}, 'tagesschau.de', 'Publisher');
-is($meta->{pub_date}, '20141001');
-is($meta->{'sgbr_date'}, '2014-10-01 00:50:00');
+is($meta->{D_pub_date}, '20141001');
+is($meta->{'D_sgbr_date'}, '2014-10-01 00:50:00');
-ok(!$meta->{pub_place}, 'No pub place');
+ok(!$meta->{S_pub_place}, 'No pub place');
-is($meta->{doc_title}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
-is($meta->{doc_sub_title}, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title');
+is($meta->{T_doc_title}, 'Korpus zur Beobachtung des Schreibgebrauchs im Deutschen', 'Doc title');
+is($meta->{T_doc_sub_title}, 'Subkorpus Internettexte, Subkorpus Leserkommentare Tagesschau, Subkorpus September 2014, Subkorpus Beispielauszug', 'Doc Sub title');
-is($meta->{'funder'}, 'Bundesministerium für Bildung und Forschung', 'Funder');
+is($meta->{'A_funder'}, 'Bundesministerium für Bildung und Forschung', 'Funder');
-is($meta->{author}, 'weltoffen', 'Author');
-ok(!$meta->{'sgbr_author_sex'}, 'No Sex');
-ok(!$meta->{'sgbr_kodex'}, 'No kodex');
-is($meta->{reference}, 'http://meta.tagesschau.de/node/090308#comment-1732754', 'Publace ref');
+is($meta->{T_author}, 'weltoffen', 'Author');
+ok(!$meta->{'S_sgbr_author_sex'}, 'No Sex');
+ok(!$meta->{'S_sgbr_kodex'}, 'No kodex');
+is($meta->{A_reference}, 'http://meta.tagesschau.de/node/090308#comment-1732754', 'Publace ref');
-is($meta->keywords('keywords'), '');
+is($meta->keywords('K_keywords'), '');
-is($meta->{language}, 'de', 'Language');
+is($meta->{S_language}, 'de', 'Language');
-ok(!$meta->{editor}, 'Editor');
+ok(!$meta->{A_editor}, 'Editor');
-ok(!$meta->{text_type}, 'Text Type');
-ok(!$meta->{text_type_art}, 'Text Type Art');
-ok(!$meta->{text_type_ref}, 'Text Type Ref');
-ok(!$meta->{text_column}, 'Text Column');
-ok(!$meta->{text_domain}, 'Text Domain');
-ok(!$meta->{creation_date}, 'Creation Date');
-ok(!$meta->{license}, 'License');
-ok(!$meta->{pages}, 'Pages');
-ok(!$meta->{file_edition_statement}, 'File Edition Statement');
-ok(!$meta->{bibl_edition_statement}, 'Bibl Edition Statement');
+ok(!$meta->{S_text_type}, 'Text Type');
+ok(!$meta->{S_text_type_art}, 'Text Type Art');
+ok(!$meta->{S_text_type_ref}, 'Text Type Ref');
+ok(!$meta->{S_text_column}, 'Text Column');
+ok(!$meta->{S_text_domain}, 'Text Domain');
+ok(!$meta->{D_creation_date}, 'Creation Date');
+ok(!$meta->{S_license}, 'License');
+ok(!$meta->{A_pages}, 'Pages');
+ok(!$meta->{A_file_edition_statement}, 'File Edition Statement');
+ok(!$meta->{A_bibl_edition_statement}, 'Bibl Edition Statement');
-ok(!$meta->{doc_editor}, 'Doc: editor');
-ok(!$meta->{doc_author}, 'Doc: author');
+ok(!$meta->{A_doc_editor}, 'Doc: editor');
+ok(!$meta->{T_doc_author}, 'Doc: author');
-ok(!$meta->{corpus_title}, 'Corpus: title');
-ok(!$meta->{corpus_sub_title}, 'Corpus: subtitle');
-ok(!$meta->{corpus_editor}, 'Corpus: editor');
-ok(!$meta->{corpus_author}, 'Corpus: author');
+ok(!$meta->{T_corpus_title}, 'Corpus: title');
+ok(!$meta->{T_corpus_sub_title}, 'Corpus: subtitle');
+ok(!$meta->{A_corpus_editor}, 'Corpus: editor');
+ok(!$meta->{T_corpus_author}, 'Corpus: author');
$hash = $doc->to_hash;
is($hash->{title}, '@fitnessfrosch', 'Corpus title');
diff --git a/t/transform.t b/t/transform.t
index acb314a..4537bcb 100644
--- a/t/transform.t
+++ b/t/transform.t
@@ -79,20 +79,20 @@
# Metdata
my $meta = $doc->meta;
-is($meta->{title}, 'A', 'title');
-ok(!$meta->{sub_title}, 'subTitle');
+is($meta->{T_title}, 'A', 'title');
+ok(!$meta->{T_sub_title}, 'subTitle');
is($doc->text_sigle, 'WPD/AAA/00001', 'ID');
is($doc->corpus_sigle, 'WPD', 'corpusID');
-is($meta->{pub_date}, '20050328', 'pubDate');
-is($meta->{pub_place}, 'URL:http://de.wikipedia.org', 'pubPlace');
-is($meta->{text_class}->[0], 'freizeit-unterhaltung', 'TextClass');
-is($meta->{text_class}->[1], 'reisen', 'TextClass');
-is($meta->{text_class}->[2], 'wissenschaft', 'TextClass');
-is($meta->{text_class}->[3], 'populaerwissenschaft', 'TextClass');
-ok(!$meta->{text_class}->[4], 'TextClass');
-is($meta->{author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
+is($meta->{D_pub_date}, '20050328', 'pubDate');
+is($meta->{S_pub_place}, 'URL:http://de.wikipedia.org', 'pubPlace');
+is($meta->{K_text_class}->[0], 'freizeit-unterhaltung', 'TextClass');
+is($meta->{K_text_class}->[1], 'reisen', 'TextClass');
+is($meta->{K_text_class}->[2], 'wissenschaft', 'TextClass');
+is($meta->{K_text_class}->[3], 'populaerwissenschaft', 'TextClass');
+ok(!$meta->{K_text_class}->[4], 'TextClass');
+is($meta->{T_author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author');
# Get tokens
use_ok('KorAP::XML::Tokenizer');