Include field serialization in versioned json output
Change-Id: Ib48d28af2eaac4e550727f838903ebb0cf15bb74
diff --git a/lib/KorAP/XML/Batch/File.pm b/lib/KorAP/XML/Batch/File.pm
index 583bc8f..c982bdd 100644
--- a/lib/KorAP/XML/Batch/File.pm
+++ b/lib/KorAP/XML/Batch/File.pm
@@ -70,7 +70,7 @@
};
my $file;
- my $print_text = ($self->{pretty} ? $tokens->to_pretty_json($self->{primary}) : $tokens->to_json($self->{primary}));
+ my $print_text = ($self->{pretty} ? $tokens->to_pretty_json(undef, $self->{primary}) : $tokens->to_json(undef, $self->{primary}));
# There is an output file given
if ($output) {
diff --git a/lib/KorAP/XML/Tokenizer.pm b/lib/KorAP/XML/Tokenizer.pm
index dec20cf..39e1fe0 100644
--- a/lib/KorAP/XML/Tokenizer.pm
+++ b/lib/KorAP/XML/Tokenizer.pm
@@ -377,7 +377,7 @@
}
else {
my $perc = _perc(
- $tokens->should, $tokens->have, $self->should, $self->should - $self->have
+ $tokens->should, $tokens->have, $self->should, $self->should - $self->have
);
$self->log->debug('With an alignment quota of ' . $perc);
};
@@ -464,13 +464,13 @@
# Get all layers
foreach my $layer (@{$self->{support}->{$foundry}}) {
- my @layers = @$layer;
- push(@supports, $foundry . '/' . $layers[0]);
+ my @layers = @$layer;
+ push(@supports, $foundry . '/' . $layers[0]);
- # More information
- if ($layers[1]) {
- push(@supports, $foundry . '/' . join('/', @layers));
- };
+ # More information
+ if ($layers[1]) {
+ push(@supports, $foundry . '/' . join('/', @layers));
+ };
};
};
return lc ( join ' ', sort {$a cmp $b } @supports );
@@ -488,14 +488,14 @@
sub layer_info {
- my $self = shift;
- $self->{layer_info} //= [];
- if ($_[0]) {
- push(@{$self->{layer_info}}, @{$_[0]});
- }
- else {
- return join ' ', sort {$a cmp $b } uniq @{$self->{layer_info}};
- };
+ my $self = shift;
+ $self->{layer_info} //= [];
+ if ($_[0]) {
+ push(@{$self->{layer_info}}, @{$_[0]});
+ }
+ else {
+ return join ' ', sort {$a cmp $b } uniq @{$self->{layer_info}};
+ };
};
@@ -531,12 +531,15 @@
sub to_data {
my $self = shift;
my $primary = defined $_[0] ? $_[0] : 1;
- my $legacy = defined $_[1] ? $_[1] : 0;
+ my $version = defined $_[1] ? $_[1] : 0.03;
- my %data = %{$self->doc->to_hash};
- my @fields;
+ # Legacy version
+ if ($version == 0) {
- if ($legacy) {
+ # Serialize meta fields
+ my %data = %{$self->doc->to_hash};
+
+ my @fields;
push(@fields, { primaryData => $self->doc->primary->data }) if $primary;
push(@fields, {
@@ -548,17 +551,39 @@
});
$data{fields} = \@fields;
+
+ return \%data;
}
- else {
+ # Version 0.03 serialization
+ elsif ($version == 0.03) {
+
+ # Serialize meta fields
+ my %data = %{$self->doc->to_hash};
+
my $tokens = $self->to_hash;
$tokens->{text} = $self->doc->primary->data if $primary;
$data{data} = $tokens;
$data{version} = '0.03';
- };
- \%data;
+ return \%data;
+ }
+
+ # Version 0.04 serialization
+ elsif ($version == 0.4) {
+ my %data = (
+ '@context' => 'http://korap.ids-mannheim.de/ns/koral/0.4/context.jsonld',
+ '@type' => 'koral:corpus'
+ );
+ $data{fields} = $self->doc->meta->to_koral_fields;
+
+ my $tokens = $self->to_hash;
+ $tokens->{text} = $self->doc->primary->data if $primary;
+ $data{data} = $tokens;
+ $data{version} = '0.4';
+ return \%data;
+ };
};
sub to_hash {
@@ -574,16 +599,18 @@
sub to_json_legacy {
- encode_json($_[0]->to_data($_[1], 1));
+ encode_json($_[0]->to_data($_[1], 0));
};
sub to_json {
- encode_json($_[0]->to_data($_[1], 0));
+ my ($self, $version, $primary) = @_;
+ encode_json($self->to_data($primary, $version));
};
sub to_pretty_json {
- JSON::XS->new->pretty->encode($_[0]->to_data($_[1]));
+ my ($self, $version, $primary) = @_;
+ JSON::XS->new->pretty->encode($self->to_data($primary, $version));
};
diff --git a/t/meta_koral.t b/t/meta_koral.t
index 931f106..1de378d 100644
--- a/t/meta_koral.t
+++ b/t/meta_koral.t
@@ -47,8 +47,6 @@
_contains_not($fields, 'textDomain');
_contains_not($fields, 'keywords');
-# diag Dumper $fields;
-
_contains_not($fields, 'subTitle');
diff --git a/t/tokenization.t b/t/tokenization.t
index 4c4ddaa..4e883d1 100644
--- a/t/tokenization.t
+++ b/t/tokenization.t
@@ -60,6 +60,16 @@
like($tokens->stream->pos(14)->to_string, qr/s:Der/);
+my $json = decode_json $tokens->to_json;
+is($json->{docSigle}, 'WPD/AAA', 'DocSigle old');
+is($json->{author}, 'Ruru; Jens.Ol; Aglarech; u.a.', 'author old');
+
+$json = decode_json $tokens->to_json(0.4);
+is($json->{fields}->[0]->{key}, 'corpusSigle');
+is($json->{fields}->[0]->{value}, 'WPD');
+is($json->{fields}->[6]->{key}, 'creationDate');
+is($json->{fields}->[6]->{value}, '2005');
+
done_testing;
__END__